Merge tag 'v4.4.207' into 10

author 0ranko0P <ranko0p@outlook.com>

Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)

committer 0ranko0P <ranko0p@outlook.com>

Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)
author 0ranko0P <ranko0p@outlook.com>
Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)
committer 0ranko0P <ranko0p@outlook.com>
Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)
diff --combined Makefile

index 3853ef3,4857306..c4c981b
--- 1/Makefile
--- 2/Makefile
+++ b/Makefile
@@@ -1,7 -1,7 +1,7 @@@
   VERSION = 4
   PATCHLEVEL = 4
- SUBLEVEL = 206
- EXTRAVERSION = -rc1
+ SUBLEVEL = 207
+ EXTRAVERSION =
   NAME = Blurry Fish Butt
   
   # *DOCUMENTATION*
@@@ -30,7 -30,7 +30,7 @@@ unexport GREP_OPTION
   # Most importantly: sub-Makefiles should only ever modify files in
   # their own directory. If in some directory we have a dependency on
   # a file in another dir (which doesn't happen often, but it's often
- -# unavoidable when linking the built-in.o targets which finally
+ +# unavoidable when linking the built-in.a targets which finally
   # turn into vmlinux), we will call a sub make in that other dir, and
   # after that we are sure that everything which is in that other dir
   # is now up to date.
@@@ -148,7 -148,7 +148,7 @@@ PHONY += $(MAKECMDGOALS) sub-mak
   $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make
         @:
   
- -sub-make: FORCE
+ +sub-make:
         $(Q)$(MAKE) -C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR) \
         -f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS))
   
@@@ -303,7 -303,7 +303,7 @@@ CONFIG_SHELL := $(shell if [ -x "$$BASH
   
   HOSTCC       = gcc
   HOSTCXX      = g++
- -HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
+ +HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89 -pipe
   HOSTCXXFLAGS = -O2
   
   # Decide whether to build built-in, modular, or both.
@@@ -343,7 -343,6 +343,7 @@@ include scripts/Kbuild.includ
   # Make variables (CC, etc...)
   AS            = $(CROSS_COMPILE)as
   LD            = $(CROSS_COMPILE)ld
+ +LDLLD         = ld.lld
   CC            = $(CROSS_COMPILE)gcc
   CPP           = $(CC) -E
   AR            = $(CROSS_COMPILE)ar
@@@ -367,7 -366,6 +367,7 @@@ LDFLAGS_MODULE  
   CFLAGS_KERNEL =
   AFLAGS_KERNEL =
   CFLAGS_GCOV   = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+ +CFLAGS_KCOV   = -fsanitize-coverage=trace-pc
   
   
   # Use USERINCLUDE when you must reference the UAPI directories only.
@@@ -390,15 -388,13 +390,15 @@@ LINUXINCLUDE    := 
   
   KBUILD_CPPFLAGS := -D__KERNEL__
   
- -KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ +KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -pipe \
                    -fno-strict-aliasing -fno-common \
                    -Werror-implicit-function-declaration \
                    -Wno-format-security \
                    -std=gnu89 $(call cc-option,-fno-PIE)
   
- -
+ +ifeq ($(TARGET_BOARD_TYPE),auto)
+ +KBUILD_CFLAGS    += -DCONFIG_PLATFORM_AUTO
+ +endif
   KBUILD_AFLAGS_KERNEL :=
   KBUILD_CFLAGS_KERNEL :=
   KBUILD_AFLAGS   := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
@@@ -418,8 -414,7 +418,8 @@@ export HOSTCXX HOSTCXXFLAGS LDFLAGS_MOD
   
   export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
   export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV
- -export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE
+ +export CFLAGS_KASAN CFLAGS_UBSAN CFLAGS_KASAN_NOSANITIZE
+ +export CFLAGS_KCOV
   export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
   export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
   export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@@ -614,11 -609,7 +614,11 @@@ all: vmlinu
   
   ifeq ($(cc-name),clang)
   ifneq ($(CROSS_COMPILE),)
- -CLANG_TARGET  := --target=$(notdir $(CROSS_COMPILE:%-=%))
+ +CLANG_TRIPLE  ?= $(CROSS_COMPILE)
+ +CLANG_TARGET  := --target=$(notdir $(CLANG_TRIPLE:%-=%))
+ +ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_TARGET)), y)
+ +$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?")
+ +endif
   GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
   CLANG_PREFIX  := --prefix=$(GCC_TOOLCHAIN_DIR)
   GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..)
@@@ -632,26 -623,6 +632,26 @@@ KBUILD_CFLAGS += $(call cc-option, -no-
   KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
   endif
   
+ +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
+ +# ar/cc/ld-* macros return correct values.
+ +ifdef CONFIG_LTO_CLANG
+ +# use LLVM linker LLD for LTO linking and vmlinux_link
+ +LD            := $(LDLLD)
+ +# use llvm-ar for building symbol tables from IR files, and llvm-nm instead
+ +# of objdump for processing symbol versions and exports
+ +LLVM_AR               := llvm-ar
+ +LLVM_NM               := llvm-nm
+ +export LLVM_AR LLVM_NM
+ +endif
+ +
+ +ifeq ($(cc-name),clang)
+ +ifeq ($(ld-name),lld)
+ +KBUILD_CFLAGS += -fuse-ld=lld
+ +LDFLAGS               += -O2
+ +endif
+ +KBUILD_CPPFLAGS       += -Qunused-arguments
+ +endif
+ +
   # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
   # values of the respective KBUILD_* variables
   ARCH_CPPFLAGS :=
@@@ -659,29 -630,6 +659,29 @@@ ARCH_AFLAGS :
   ARCH_CFLAGS :=
   include arch/$(SRCARCH)/Makefile
   
+ +ifeq ($(cc-name),clang)
+ +KBUILD_CFLAGS += -O3
+ +KBUILD_CFLAGS += $(call cc-option, -mllvm -polly) \
+ +                 $(call cc-option, -mllvm -polly-run-dce) \
+ +                 $(call cc-option, -mllvm -polly-run-inliner) \
+ +                 $(call cc-option, -mllvm -polly-opt-fusion=max) \
+ +                 $(call cc-option, -mllvm -polly-ast-use-context) \
+ +                 $(call cc-option, -mllvm -polly-detect-keep-going) \
+ +                 $(call cc-option, -mllvm -polly-vectorizer=stripmine) \
+ +                 $(call cc-option, -mllvm -polly-invariant-load-hoisting)
+ +else
+ +KBUILD_CFLAGS += -O2
+ +endif
+ +
+ +ifeq ($(cc-name),gcc)
+ +KBUILD_CFLAGS += -mcpu=cortex-a73.cortex-a53
+ +KBUILD_AFLAGS += -mcpu=cortex-a73.cortex-a53
+ +endif
+ +ifeq ($(cc-name),clang)
+ +KBUILD_CFLAGS += -mcpu=cortex-a53
+ +KBUILD_AFLAGS += -mcpu=cortex-a53
+ +endif
+ +
   KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,)
   KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,)
   KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
@@@ -691,8 -639,14 +691,8 @@@ KBUILD_CFLAGS     += $(call cc-disable-warn
   KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
   KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias)
   
- -ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
- -KBUILD_CFLAGS += -Os
- -else
- -ifdef CONFIG_PROFILE_ALL_BRANCHES
- -KBUILD_CFLAGS += -O2
- -else
- -KBUILD_CFLAGS   += -O2
- -endif
+ +ifdef CONFIG_CC_WERROR
+ +KBUILD_CFLAGS += -Werror
   endif
   
   # Tell gcc to never replace conditional load with a non-conditional one
@@@ -755,24 -709,17 +755,24 @@@ endi
   endif
   KBUILD_CFLAGS += $(stackp-flag)
   
+ +ifdef CONFIG_KCOV
+ +  ifeq ($(call cc-option, $(CFLAGS_KCOV)),)
+ +    $(warning Cannot use CONFIG_KCOV: \
+ +             -fsanitize-coverage=trace-pc is not supported by compiler)
+ +    CFLAGS_KCOV =
+ +  endif
+ +endif
+ +
   ifeq ($(cc-name),clang)
- -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
   KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
   KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+ +KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier)
   # Quiet clang warning: comparison of unsigned expression < 0 is always false
   KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
   # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
   # source of a reference will be _MergedGlobals and not on of the whitelisted names.
   # See modpost pattern 2
   KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
- -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
   else
   
   # These warnings generated too much noise in a regular build.
@@@ -794,11 -741,6 +794,11 @@@ KBUILD_CFLAGS    += -fomit-frame-pointe
   endif
   endif
   
+ +# Initialize all stack variables with a pattern, if desired.
+ +ifdef CONFIG_INIT_STACK_ALL
+ +KBUILD_CFLAGS += $(call cc-option, -ftrivial-auto-var-init=pattern)
+ +endif
+ +
   KBUILD_CFLAGS   += $(call cc-option, -fno-var-tracking-assignments)
   
   ifdef CONFIG_DEBUG_INFO
@@@ -841,33 -783,6 +841,33 @@@ ifdef CONFIG_DEBUG_SECTION_MISMATC
   KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
   endif
   
+ +ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+ +KBUILD_CFLAGS_KERNEL  += $(call cc-option,-ffunction-sections,)
+ +KBUILD_CFLAGS_KERNEL  += $(call cc-option,-fdata-sections,)
+ +endif
+ +
+ +ifdef CONFIG_LTO_CLANG
+ +ifdef CONFIG_THINLTO
+ +lto-clang-flags := -flto=thin
+ +LDFLAGS += --thinlto-cache-dir=.thinlto-cache
+ +else
+ +lto-clang-flags       := -flto
+ +endif
+ +lto-clang-flags += -fvisibility=hidden
+ +
+ +# allow disabling only clang LTO where needed
+ +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
+ +export DISABLE_LTO_CLANG
+ +endif
+ +
+ +ifdef CONFIG_LTO
+ +LTO_CFLAGS    := $(lto-clang-flags)
+ +KBUILD_CFLAGS += $(LTO_CFLAGS)
+ +
+ +DISABLE_LTO   := $(DISABLE_LTO_CLANG)
+ +export LTO_CFLAGS DISABLE_LTO
+ +endif
+ +
   # arch Makefile may override CC so keep this after arch Makefile is included
   NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
   CHECKFLAGS     += $(NOSTDINC_FLAGS)
@@@ -919,7 -834,6 +919,7 @@@ KBUILD_ARFLAGS := $(call ar-option,D
   
   include scripts/Makefile.kasan
   include scripts/Makefile.extrawarn
+ +include scripts/Makefile.ubsan
   
   # Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the
   # last assignments
@@@ -933,10 -847,6 +933,10 @@@ LDFLAGS_BUILD_ID = $(patsubst -Wl$(comm
   KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
   LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
   
+ +ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+ +LDFLAGS_vmlinux       += $(call ld-option, --gc-sections,)
+ +endif
+ +
   ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
   LDFLAGS_vmlinux       += $(call ld-option, -X,)
   endif
@@@ -1034,24 -944,24 +1034,24 @@@ vmlinux-dirs  := $(patsubst %/,%,$(filte
   vmlinux-alldirs       := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
                      $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
   
- -init-y                := $(patsubst %/, %/built-in.o, $(init-y))
- -core-y                := $(patsubst %/, %/built-in.o, $(core-y))
- -drivers-y     := $(patsubst %/, %/built-in.o, $(drivers-y))
- -net-y         := $(patsubst %/, %/built-in.o, $(net-y))
+ +init-y                := $(patsubst %/, %/built-in.a, $(init-y))
+ +core-y                := $(patsubst %/, %/built-in.a, $(core-y))
+ +drivers-y     := $(patsubst %/, %/built-in.a, $(drivers-y))
+ +net-y         := $(patsubst %/, %/built-in.a, $(net-y))
   libs-y1               := $(patsubst %/, %/lib.a, $(libs-y))
- -libs-y2               := $(patsubst %/, %/built-in.o, $(libs-y))
- -libs-y                := $(libs-y1) $(libs-y2)
- -virt-y                := $(patsubst %/, %/built-in.o, $(virt-y))
+ +libs-y2               := $(patsubst %/, %/built-in.a, $(filter-out %.a, $(libs-y)))
+ +virt-y                := $(patsubst %/, %/built-in.a, $(virt-y))
   
   # Externally visible symbols (used by link-vmlinux.sh)
   export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
- -export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
+ +export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y2) $(drivers-y) $(net-y) $(virt-y)
+ +export KBUILD_VMLINUX_LIBS := $(libs-y1)
   export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
   export LDFLAGS_vmlinux
   # used by scripts/pacmage/Makefile
   export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
   
- -vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
+ +vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) $(KBUILD_VMLINUX_LIBS)
   
   # Final link of vmlinux
         cmd_link-vmlinux = $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux)
@@@ -1086,7 -996,7 +1086,7 @@@ $(sort $(vmlinux-deps)): $(vmlinux-dirs
   
   PHONY += $(vmlinux-dirs)
   $(vmlinux-dirs): prepare scripts
- -      $(Q)$(MAKE) $(build)=$@
+ +      $(Q)$(MAKE) $(build)=$@ need-builtin=1
   
   define filechk_kernel.release
         echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
@@@ -1128,29 -1038,12 +1128,29 @@@ prepare1: prepare2 $(version_h) include
   
   archprepare: archheaders archscripts prepare1 scripts_basic
   
- -prepare0: archprepare FORCE
+ +prepare0: archprepare
         $(Q)$(MAKE) $(build)=.
   
   # All the preparing..
   prepare: prepare0
   
+ +# Make sure we're using a supported toolchain with LTO_CLANG
+ +ifdef CONFIG_LTO_CLANG
+ +  ifneq ($(call clang-ifversion, -ge, 0800, y), y)
+ +      @echo Cannot use CONFIG_LTO_CLANG: requires clang 8.0 or later >&2 && exit 1
+ +  endif
+ +  ifneq ($(ld-name),lld)
+ +      @echo Cannot use CONFIG_LTO_CLANG: requires LLD >&2 && exit 1
+ +  endif
+ +endif
+ +# Make sure compiler supports LTO flags
+ +ifdef lto-flags
+ +  ifeq ($(call cc-option, $(lto-flags)),)
+ +      @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \
+ +              >&2 && exit 1
+ +  endif
+ +endif
+ +
   # Generate some files
   # ---------------------------------------------------------------------------
   
@@@ -1190,7 -1083,7 +1190,7 @@@ INSTALL_FW_PATH=$(INSTALL_MOD_PATH)/lib
   export INSTALL_FW_PATH
   
   PHONY += firmware_install
- -firmware_install: FORCE
+ +firmware_install:
         @mkdir -p $(objtree)/firmware
         $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_install
   
@@@ -1210,7 -1103,7 +1210,7 @@@ PHONY += archscript
   archscripts:
   
   PHONY += __headers
- -__headers: $(version_h) scripts_basic asm-generic archheaders archscripts FORCE
+ +__headers: $(version_h) scripts_basic asm-generic archheaders archscripts
         $(Q)$(MAKE) $(build)=scripts build_unifdef
   
   PHONY += headers_install_all
@@@ -1531,9 -1424,6 +1531,6 @@@ else # KBUILD_EXTMO
   
   # We are always building modules
   KBUILD_MODULES := 1
- PHONY += crmodverdir
- crmodverdir:
-       $(cmd_crmodverdir)
   
   PHONY += $(objtree)/Module.symvers
   $(objtree)/Module.symvers:
@@@ -1545,7 -1435,7 +1542,7 @@@
   
   module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
   PHONY += $(module-dirs) modules
- $(module-dirs): crmodverdir $(objtree)/Module.symvers
+ $(module-dirs): prepare $(objtree)/Module.symvers
         $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
   
   modules: $(module-dirs)
@@@ -1585,7 -1475,8 +1582,8 @@@ help
   
   # Dummies...
   PHONY += prepare scripts
- prepare: ;
+ prepare:
+       $(cmd_crmodverdir)
   scripts: ;
   endif # KBUILD_EXTMOD
   
@@@ -1601,8 -1492,7 +1599,8 @@@ clean: $(clean-dirs
                 -o -name '*.symtypes' -o -name 'modules.order' \
                 -o -name modules.builtin -o -name '.tmp_*.o.*' \
                 -o -name '*.ll' \
- -              -o -name '*.gcno' \) -type f -print | xargs rm -f
+ +              -o -name '*.gcno' \
+ +              -o -name '*.*.symversions' \) -type f -print | xargs rm -f
   
   # Generate tags for editors
   # ---------------------------------------------------------------------------
@@@ -1710,17 -1600,14 +1708,14 @@@ endi
   
   # Modules
   /: prepare scripts FORCE
-       $(cmd_crmodverdir)
         $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
         $(build)=$(build-dir)
   # Make sure the latest headers are built for Documentation
   Documentation/: headers_install
   %/: prepare scripts FORCE
-       $(cmd_crmodverdir)
         $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
         $(build)=$(build-dir)
   %.ko: prepare scripts FORCE
-       $(cmd_crmodverdir)
         $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1)   \
         $(build)=$(build-dir) $(@:.ko=.o)
         $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
diff --combined arch/arm/include/asm/uaccess.h

index 942be2f,61479eb..40f8eba
--- 1/arch/arm/include/asm/uaccess.h
--- 2/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@@ -387,6 -387,13 +387,13 @@@ do {                                                                     
   #define __get_user_asm_byte(x, addr, err)                     \
         __get_user_asm(x, addr, err, ldrb)
   
+ #if __LINUX_ARM_ARCH__ >= 6
+ 
+ #define __get_user_asm_half(x, addr, err)                     \
+       __get_user_asm(x, addr, err, ldrh)
+ 
+ #else
+ 
   #ifndef __ARMEB__
   #define __get_user_asm_half(x, __gu_addr, err)                        \
   ({                                                            \
@@@ -405,6 -412,8 +412,8 @@@
   })
   #endif
   
+ #endif /* __LINUX_ARM_ARCH__ >= 6 */
+ 
   #define __get_user_asm_word(x, addr, err)                     \
         __get_user_asm(x, addr, err, ldr)
   #endif
@@@ -480,6 -489,13 +489,13 @@@
   #define __put_user_asm_byte(x, __pu_addr, err)                        \
         __put_user_asm(x, __pu_addr, err, strb)
   
+ #if __LINUX_ARM_ARCH__ >= 6
+ 
+ #define __put_user_asm_half(x, __pu_addr, err)                        \
+       __put_user_asm(x, __pu_addr, err, strh)
+ 
+ #else
+ 
   #ifndef __ARMEB__
   #define __put_user_asm_half(x, __pu_addr, err)                        \
   ({                                                            \
@@@ -496,6 -512,8 +512,8 @@@
   })
   #endif
   
+ #endif /* __LINUX_ARM_ARCH__ >= 6 */
+ 
   #define __put_user_asm_word(x, __pu_addr, err)                        \
         __put_user_asm(x, __pu_addr, err, str)
   
@@@ -537,10 -555,7 +555,10 @@@ arm_copy_from_user(void *to, const voi
   static inline unsigned long __must_check
   __copy_from_user(void *to, const void __user *from, unsigned long n)
   {
- -      unsigned int __ua_flags = uaccess_save_and_enable();
+ +      unsigned int __ua_flags;
+ +
+ +      check_object_size(to, n, false);
+ +      __ua_flags = uaccess_save_and_enable();
         n = arm_copy_from_user(to, from, n);
         uaccess_restore(__ua_flags);
         return n;
@@@ -555,15 -570,11 +573,15 @@@ static inline unsigned long __must_chec
   __copy_to_user(void __user *to, const void *from, unsigned long n)
   {
   #ifndef CONFIG_UACCESS_WITH_MEMCPY
- -      unsigned int __ua_flags = uaccess_save_and_enable();
+ +      unsigned int __ua_flags;
+ +
+ +      check_object_size(from, n, true);
+ +      __ua_flags = uaccess_save_and_enable();
         n = arm_copy_to_user(to, from, n);
         uaccess_restore(__ua_flags);
         return n;
   #else
+ +      check_object_size(from, n, true);
         return arm_copy_to_user(to, from, n);
   #endif
   }
diff --combined arch/mips/Kconfig

index c6ae78b,596cbda..29b4bde
--- 1/arch/mips/Kconfig
--- 2/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@@ -65,8 -65,6 +65,8 @@@ config MIP
         select HAVE_IRQ_TIME_ACCOUNTING
         select GENERIC_TIME_VSYSCALL
         select ARCH_CLOCKSOURCE_DATA
+ +      select HANDLE_DOMAIN_IRQ
+ +      select HAVE_EXIT_THREAD
   
   menu "Machine selection"
   
@@@ -74,57 -72,6 +74,57 @@@ choic
         prompt "System type"
         default SGI_IP22
   
+ +config MIPS_GENERIC
+ +      bool "Generic board-agnostic MIPS kernel"
+ +      select BOOT_RAW
+ +      select BUILTIN_DTB
+ +      select CEVT_R4K
+ +      select CLKSRC_MIPS_GIC
+ +      select COMMON_CLK
+ +      select CPU_MIPSR2_IRQ_VI
+ +      select CPU_MIPSR2_IRQ_EI
+ +      select CSRC_R4K
+ +      select DMA_PERDEV_COHERENT
+ +      select HW_HAS_PCI
+ +      select IRQ_MIPS_CPU
+ +      select LIBFDT
+ +      select MIPS_CPU_SCACHE
+ +      select MIPS_GIC
+ +      select MIPS_L1_CACHE_SHIFT_7
+ +      select NO_EXCEPT_FILL
+ +      select PCI_DRIVERS_GENERIC
+ +      select PINCTRL
+ +      select SMP_UP if SMP
+ +      select SYS_HAS_CPU_MIPS32_R1
+ +      select SYS_HAS_CPU_MIPS32_R2
+ +      select SYS_HAS_CPU_MIPS32_R6
+ +      select SYS_HAS_CPU_MIPS64_R1
+ +      select SYS_HAS_CPU_MIPS64_R2
+ +      select SYS_HAS_CPU_MIPS64_R6
+ +      select SYS_SUPPORTS_32BIT_KERNEL
+ +      select SYS_SUPPORTS_64BIT_KERNEL
+ +      select SYS_SUPPORTS_BIG_ENDIAN
+ +      select SYS_SUPPORTS_HIGHMEM
+ +      select SYS_SUPPORTS_LITTLE_ENDIAN
+ +      select SYS_SUPPORTS_MICROMIPS
+ +      select SYS_SUPPORTS_MIPS_CPS
+ +      select SYS_SUPPORTS_MIPS16
+ +      select SYS_SUPPORTS_MULTITHREADING
+ +      select SYS_SUPPORTS_RELOCATABLE
+ +      select SYS_SUPPORTS_SMARTMIPS
+ +      select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+ +      select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+ +      select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+ +      select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+ +      select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+ +      select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+ +      select USE_OF
+ +      help
+ +        Select this to build a kernel which aims to support multiple boards,
+ +        generally using a flattened device tree passed from the bootloader
+ +        using the boot protocol defined in the UHI (Unified Hosting
+ +        Interface) specification.
+ +
   config MIPS_ALCHEMY
         bool "Alchemy processor based machines"
         select ARCH_PHYS_ADDR_T_64BIT
@@@ -826,6 -773,7 +826,7 @@@ config SIBYTE_LITTLESU
         select SYS_SUPPORTS_BIG_ENDIAN
         select SYS_SUPPORTS_HIGHMEM
         select SYS_SUPPORTS_LITTLE_ENDIAN
+       select ZONE_DMA32 if 64BIT
   
   config SIBYTE_SENTOSA
         bool "Sibyte BCM91250E-Sentosa"
@@@ -1031,7 -979,6 +1032,7 @@@ source "arch/mips/ath79/Kconfig
   source "arch/mips/bcm47xx/Kconfig"
   source "arch/mips/bcm63xx/Kconfig"
   source "arch/mips/bmips/Kconfig"
+ +source "arch/mips/generic/Kconfig"
   source "arch/mips/jazz/Kconfig"
   source "arch/mips/jz4740/Kconfig"
   source "arch/mips/lantiq/Kconfig"
@@@ -1141,10 -1088,6 +1142,10 @@@ config DMA_MAYBE_COHEREN
         select DMA_NONCOHERENT
         bool
   
+ +config DMA_PERDEV_COHERENT
+ +      bool
+ +      select DMA_MAYBE_COHERENT
+ +
   config DMA_COHERENT
         bool
   
@@@ -2040,7 -1983,7 +2041,7 @@@ config CPU_SUPPORTS_UNCACHED_ACCELERATE
         bool
   config MIPS_PGD_C0_CONTEXT
         bool
- -      default y if 64BIT && CPU_MIPSR2 && !CPU_XLP
+ +      default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
   
   #
   # Set to y for ptrace access to watch registers.
@@@ -2329,7 -2272,7 +2330,7 @@@ config MIPS_CM
   
   config MIPS_CPS
         bool "MIPS Coherent Processing System support"
- -      depends on SYS_SUPPORTS_MIPS_CPS && !CPU_MIPSR6
+ +      depends on SYS_SUPPORTS_MIPS_CPS
         select MIPS_CM
         select MIPS_CPC
         select MIPS_CPS_PM if HOTPLUG_CPU
@@@ -2337,7 -2280,6 +2338,7 @@@
         select SMP
         select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
         select SYS_SUPPORTS_HOTPLUG_CPU
+ +      select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6
         select SYS_SUPPORTS_SMP
         select WEAK_ORDERING
         help
diff --combined drivers/net/wireless/ath/ar5523/ar5523.c

index 8aded24,5bf2205..8cbfd97
--- 1/drivers/net/wireless/ath/ar5523/ar5523.c
--- 2/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@@ -255,7 -255,8 +255,8 @@@ static int ar5523_cmd(struct ar5523 *ar
   
         if (flags & AR5523_CMD_FLAG_MAGIC)
                 hdr->magic = cpu_to_be32(1 << 24);
-       memcpy(hdr + 1, idata, ilen);
+       if (ilen)
+               memcpy(hdr + 1, idata, ilen);
   
         cmd->odata = odata;
         cmd->olen = olen;
@@@ -1471,12 -1472,12 +1472,12 @@@ static int ar5523_init_modes(struct ar5
         memcpy(ar->channels, ar5523_channels, sizeof(ar5523_channels));
         memcpy(ar->rates, ar5523_rates, sizeof(ar5523_rates));
   
- -      ar->band.band = IEEE80211_BAND_2GHZ;
+ +      ar->band.band = NL80211_BAND_2GHZ;
         ar->band.channels = ar->channels;
         ar->band.n_channels = ARRAY_SIZE(ar5523_channels);
         ar->band.bitrates = ar->rates;
         ar->band.n_bitrates = ARRAY_SIZE(ar5523_rates);
- -      ar->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &ar->band;
+ +      ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = &ar->band;
         return 0;
   }
   
diff --combined drivers/net/wireless/iwlwifi/mvm/mac80211.c

index 323ba83,ca498b1..8ba6374
--- 1/drivers/net/wireless/iwlwifi/mvm/mac80211.c
--- 2/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@@ -548,18 -548,18 +548,18 @@@ int iwl_mvm_mac_setup_register(struct i
         else
                 mvm->max_scans = IWL_MVM_MAX_LMAC_SCANS;
   
- -      if (mvm->nvm_data->bands[IEEE80211_BAND_2GHZ].n_channels)
- -              hw->wiphy->bands[IEEE80211_BAND_2GHZ] =
- -                      &mvm->nvm_data->bands[IEEE80211_BAND_2GHZ];
- -      if (mvm->nvm_data->bands[IEEE80211_BAND_5GHZ].n_channels) {
- -              hw->wiphy->bands[IEEE80211_BAND_5GHZ] =
- -                      &mvm->nvm_data->bands[IEEE80211_BAND_5GHZ];
+ +      if (mvm->nvm_data->bands[NL80211_BAND_2GHZ].n_channels)
+ +              hw->wiphy->bands[NL80211_BAND_2GHZ] =
+ +                      &mvm->nvm_data->bands[NL80211_BAND_2GHZ];
+ +      if (mvm->nvm_data->bands[NL80211_BAND_5GHZ].n_channels) {
+ +              hw->wiphy->bands[NL80211_BAND_5GHZ] =
+ +                      &mvm->nvm_data->bands[NL80211_BAND_5GHZ];
   
                 if (fw_has_capa(&mvm->fw->ucode_capa,
                                 IWL_UCODE_TLV_CAPA_BEAMFORMER) &&
                     fw_has_api(&mvm->fw->ucode_capa,
                                IWL_UCODE_TLV_API_LQ_SS_PARAMS))
- -                      hw->wiphy->bands[IEEE80211_BAND_5GHZ]->vht_cap.cap |=
+ +                      hw->wiphy->bands[NL80211_BAND_5GHZ]->vht_cap.cap |=
                                 IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE;
         }
   
@@@ -741,6 -741,21 +741,21 @@@ static void iwl_mvm_mac_tx(struct ieee8
                      !ieee80211_is_action(hdr->frame_control)))
                 sta = NULL;
   
+       /* If there is no sta, and it's not offchannel - send through AP */
+       if (info->control.vif->type == NL80211_IFTYPE_STATION &&
+           info->hw_queue != IWL_MVM_OFFCHANNEL_QUEUE && !sta) {
+               struct iwl_mvm_vif *mvmvif =
+                       iwl_mvm_vif_from_mac80211(info->control.vif);
+               u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id);
+ 
+               if (ap_sta_id < IWL_MVM_STATION_COUNT) {
+                       /* mac80211 holds rcu read lock */
+                       sta = rcu_dereference(mvm->fw_id_to_mac_id[ap_sta_id]);
+                       if (IS_ERR_OR_NULL(sta))
+                               goto drop;
+               }
+       }
+ 
         if (sta) {
                 if (iwl_mvm_defer_tx(mvm, sta, skb))
                         return;
@@@ -3126,7 -3141,7 +3141,7 @@@ static int iwl_mvm_send_aux_roc_cmd(str
                         cpu_to_le32(FW_CMD_ID_AND_COLOR(MAC_INDEX_AUX, 0)),
                 .sta_id_and_color = cpu_to_le32(mvm->aux_sta.sta_id),
                 /* Set the channel info data */
- -              .channel_info.band = (channel->band == IEEE80211_BAND_2GHZ) ?
+ +              .channel_info.band = (channel->band == NL80211_BAND_2GHZ) ?
                         PHY_BAND_24 : PHY_BAND_5,
                 .channel_info.channel = channel->hw_value,
                 .channel_info.width = PHY_VHT_CHANNEL_MODE20,
diff --combined drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c

index 5645b38,a0904e4..661448e
--- 1/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
--- 2/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@@ -1,6 -1,6 +1,6 @@@
   /*
    * Copyright (c) 2015, Sony Mobile Communications AB.
- - * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ + * Copyright (c) 2013, 2018 The Linux Foundation. All rights reserved.
    *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of the GNU General Public License version 2 and
@@@ -23,7 -23,6 +23,7 @@@
   #include <linux/gpio.h>
   #include <linux/interrupt.h>
   #include <linux/of_device.h>
+ +#include <linux/of_irq.h>
   
   #include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
   
@@@ -379,7 -378,7 +379,7 @@@ static int pm8xxx_pin_config_set(struc
                         banks |= BIT(0);
                         break;
                 case PM8XXX_QCOM_DRIVE_STRENGH:
- -                      if (arg > PMIC_GPIO_STRENGTH_LOW) {
+ +                      if (arg > PM8921_GPIO_STRENGTH_LOW) {
                                 dev_err(pctrl->dev, "invalid drive strength\n");
                                 return -EINVAL;
                         }
@@@ -665,12 -664,11 +665,12 @@@ static int pm8xxx_pin_populate(struct p
   }
   
   static const struct of_device_id pm8xxx_gpio_of_match[] = {
- -      { .compatible = "qcom,pm8018-gpio", .data = (void *)6 },
- -      { .compatible = "qcom,pm8038-gpio", .data = (void *)12 },
- -      { .compatible = "qcom,pm8058-gpio", .data = (void *)40 },
- -      { .compatible = "qcom,pm8917-gpio", .data = (void *)38 },
- -      { .compatible = "qcom,pm8921-gpio", .data = (void *)44 },
+ +      { .compatible = "qcom,pm8018-gpio" },
+ +      { .compatible = "qcom,pm8038-gpio" },
+ +      { .compatible = "qcom,pm8058-gpio" },
+ +      { .compatible = "qcom,pm8917-gpio" },
+ +      { .compatible = "qcom,pm8921-gpio" },
+ +      { .compatible = "qcom,ssbi-gpio" },
         { },
   };
   MODULE_DEVICE_TABLE(of, pm8xxx_gpio_of_match);
@@@ -681,19 -679,14 +681,19 @@@ static int pm8xxx_gpio_probe(struct pla
         struct pinctrl_pin_desc *pins;
         struct pm8xxx_gpio *pctrl;
         int ret;
- -      int i;
+ +      int i, npins;
   
         pctrl = devm_kzalloc(&pdev->dev, sizeof(*pctrl), GFP_KERNEL);
         if (!pctrl)
                 return -ENOMEM;
   
         pctrl->dev = &pdev->dev;
- -      pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
+ +      npins = platform_irq_count(pdev);
+ +      if (!npins)
+ +              return -EINVAL;
+ +      if (npins < 0)
+ +              return npins;
+ +      pctrl->npins = npins;
   
         pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
         if (!pctrl->regmap) {
@@@ -762,12 -755,23 +762,23 @@@
                 goto unregister_pinctrl;
         }
   
-       ret = gpiochip_add_pin_range(&pctrl->chip,
-                                    dev_name(pctrl->dev),
-                                    0, 0, pctrl->chip.ngpio);
-       if (ret) {
-               dev_err(pctrl->dev, "failed to add pin range\n");
-               goto unregister_gpiochip;
+       /*
+        * For DeviceTree-supported systems, the gpio core checks the
+        * pinctrl's device node for the "gpio-ranges" property.
+        * If it is present, it takes care of adding the pin ranges
+        * for the driver. In this case the driver can skip ahead.
+        *
+        * In order to remain compatible with older, existing DeviceTree
+        * files which don't set the "gpio-ranges" property or systems that
+        * utilize ACPI the driver has to call gpiochip_add_pin_range().
+        */
+       if (!of_property_read_bool(pctrl->dev->of_node, "gpio-ranges")) {
+               ret = gpiochip_add_pin_range(&pctrl->chip, dev_name(pctrl->dev),
+                                            0, 0, pctrl->chip.ngpio);
+               if (ret) {
+                       dev_err(pctrl->dev, "failed to add pin range\n");
+                       goto unregister_gpiochip;
+               }
         }
   
         platform_set_drvdata(pdev, pctrl);
diff --combined drivers/thermal/thermal_core.c

index 38355d2,a6df077..7bef526
--- 1/drivers/thermal/thermal_core.c
--- 2/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@@ -4,7 -4,6 +4,7 @@@
    *  Copyright (C) 2008 Intel Corp
    *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
    *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ + *  Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
    *
    *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    *
@@@ -36,9 -35,9 +36,9 @@@
   #include <linux/reboot.h>
   #include <linux/string.h>
   #include <linux/of.h>
+ +#include <linux/kthread.h>
   #include <net/netlink.h>
   #include <net/genetlink.h>
- -#include <linux/suspend.h>
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/thermal.h>
@@@ -46,14 -45,10 +46,14 @@@
   #include "thermal_core.h"
   #include "thermal_hwmon.h"
   
+ +#define THERMAL_UEVENT_DATA "type"
+ +
   MODULE_AUTHOR("Zhang Rui");
   MODULE_DESCRIPTION("Generic thermal management sysfs support");
   MODULE_LICENSE("GPL v2");
   
+ +#define THERMAL_MAX_ACTIVE    16
+ +
   static DEFINE_IDR(thermal_tz_idr);
   static DEFINE_IDR(thermal_cdev_idr);
   static DEFINE_MUTEX(thermal_idr_lock);
@@@ -65,10 -60,10 +65,10 @@@ static LIST_HEAD(thermal_governor_list)
   static DEFINE_MUTEX(thermal_list_lock);
   static DEFINE_MUTEX(thermal_governor_lock);
   
- -static atomic_t in_suspend;
- -
   static struct thermal_governor *def_governor;
   
+ +static struct workqueue_struct *thermal_passive_wq;
+ +
   static struct thermal_governor *__find_governor(const char *name)
   {
         struct thermal_governor *pos;
@@@ -211,407 -206,6 +211,407 @@@ exit
         return;
   }
   
+ +static LIST_HEAD(sensor_info_list);
+ +static DEFINE_MUTEX(sensor_list_lock);
+ +
+ +static struct sensor_info *get_sensor(uint32_t sensor_id)
+ +{
+ +      struct sensor_info *pos = NULL, *matching_sensor = NULL;
+ +
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(pos, &sensor_info_list, sensor_list) {
+ +              if (pos->sensor_id == sensor_id) {
+ +                      matching_sensor = pos;
+ +                      break;
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      return matching_sensor;
+ +}
+ +
+ +int sensor_get_id(char *name)
+ +{
+ +      struct sensor_info *pos = NULL;
+ +      int matching_id = -ENODEV;
+ +
+ +      if (!name)
+ +              return matching_id;
+ +
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(pos, &sensor_info_list, sensor_list) {
+ +              if (!strcmp(pos->tz->type, name)) {
+ +                      matching_id = pos->sensor_id;
+ +                      break;
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      return matching_id;
+ +}
+ +EXPORT_SYMBOL(sensor_get_id);
+ +
+ +static void init_sensor_trip(struct sensor_info *sensor)
+ +{
+ +      int ret = 0, i = 0;
+ +      enum thermal_trip_type type;
+ +
+ +      for (i = 0; ((sensor->max_idx == -1) ||
+ +              (sensor->min_idx == -1)) &&
+ +              (sensor->tz->ops->get_trip_type) &&
+ +              (i < sensor->tz->trips); i++) {
+ +
+ +              sensor->tz->ops->get_trip_type(sensor->tz, i, &type);
+ +              if (type == THERMAL_TRIP_CONFIGURABLE_HI)
+ +                      sensor->max_idx = i;
+ +              if (type == THERMAL_TRIP_CONFIGURABLE_LOW)
+ +                      sensor->min_idx = i;
+ +              type = 0;
+ +      }
+ +
+ +      ret = sensor->tz->ops->get_trip_temp(sensor->tz,
+ +              sensor->min_idx, &sensor->threshold_min);
+ +      if (ret)
+ +              pr_err("Unable to get MIN trip temp. sensor:%d err:%d\n",
+ +                              sensor->sensor_id, ret);
+ +
+ +      ret = sensor->tz->ops->get_trip_temp(sensor->tz,
+ +              sensor->max_idx, &sensor->threshold_max);
+ +      if (ret)
+ +              pr_err("Unable to get MAX trip temp. sensor:%d err:%d\n",
+ +                              sensor->sensor_id, ret);
+ +}
+ +
+ +static int __update_sensor_thresholds(struct sensor_info *sensor)
+ +{
+ +      long max_of_low_thresh = LONG_MIN;
+ +      long min_of_high_thresh = LONG_MAX;
+ +      struct sensor_threshold *pos = NULL;
+ +      int ret = 0;
+ +
+ +      if (!sensor->tz->ops->set_trip_temp ||
+ +              !sensor->tz->ops->activate_trip_type ||
+ +              !sensor->tz->ops->get_trip_type ||
+ +              !sensor->tz->ops->get_trip_temp) {
+ +              ret = -ENODEV;
+ +              goto update_done;
+ +      }
+ +
+ +      if ((sensor->max_idx == -1) || (sensor->min_idx == -1))
+ +              init_sensor_trip(sensor);
+ +
+ +      list_for_each_entry(pos, &sensor->threshold_list, list) {
+ +              if (!pos->active)
+ +                      continue;
+ +              if (pos->trip == THERMAL_TRIP_CONFIGURABLE_LOW) {
+ +                      if (pos->temp > max_of_low_thresh)
+ +                              max_of_low_thresh = pos->temp;
+ +              }
+ +              if (pos->trip == THERMAL_TRIP_CONFIGURABLE_HI) {
+ +                      if (pos->temp < min_of_high_thresh)
+ +                              min_of_high_thresh = pos->temp;
+ +              }
+ +      }
+ +
+ +      pr_debug("sensor %d: Thresholds: max of low: %ld min of high: %ld\n",
+ +                      sensor->sensor_id, max_of_low_thresh,
+ +                      min_of_high_thresh);
+ +
+ +      if (min_of_high_thresh != LONG_MAX) {
+ +              ret = sensor->tz->ops->set_trip_temp(sensor->tz,
+ +                      sensor->max_idx, min_of_high_thresh);
+ +              if (ret) {
+ +                      pr_err("sensor %d: Unable to set high threshold %d",
+ +                                      sensor->sensor_id, ret);
+ +                      goto update_done;
+ +              }
+ +              sensor->threshold_max = min_of_high_thresh;
+ +      }
+ +      ret = sensor->tz->ops->activate_trip_type(sensor->tz,
+ +              sensor->max_idx,
+ +              (min_of_high_thresh == LONG_MAX) ?
+ +              THERMAL_TRIP_ACTIVATION_DISABLED :
+ +              THERMAL_TRIP_ACTIVATION_ENABLED);
+ +      if (ret) {
+ +              pr_err("sensor %d: Unable to activate high threshold %d",
+ +                      sensor->sensor_id, ret);
+ +              goto update_done;
+ +      }
+ +
+ +      if (max_of_low_thresh != LONG_MIN) {
+ +              ret = sensor->tz->ops->set_trip_temp(sensor->tz,
+ +                      sensor->min_idx, max_of_low_thresh);
+ +              if (ret) {
+ +                      pr_err("sensor %d: Unable to set low threshold %d",
+ +                              sensor->sensor_id, ret);
+ +                      goto update_done;
+ +              }
+ +              sensor->threshold_min = max_of_low_thresh;
+ +      }
+ +      ret = sensor->tz->ops->activate_trip_type(sensor->tz,
+ +              sensor->min_idx,
+ +              (max_of_low_thresh == LONG_MIN) ?
+ +              THERMAL_TRIP_ACTIVATION_DISABLED :
+ +              THERMAL_TRIP_ACTIVATION_ENABLED);
+ +      if (ret) {
+ +              pr_err("sensor %d: Unable to activate low threshold %d",
+ +                      sensor->sensor_id, ret);
+ +              goto update_done;
+ +      }
+ +
+ +      pr_debug("sensor %d: low: %d high: %d\n",
+ +              sensor->sensor_id,
+ +              sensor->threshold_min, sensor->threshold_max);
+ +
+ +update_done:
+ +      return ret;
+ +}
+ +
+ +static void sensor_update_work(struct work_struct *work)
+ +{
+ +      struct sensor_info *sensor = container_of(work, struct sensor_info,
+ +                                              work);
+ +      int ret = 0;
+ +      mutex_lock(&sensor->lock);
+ +      ret = __update_sensor_thresholds(sensor);
+ +      if (ret)
+ +              pr_err("sensor %d: Error %d setting threshold\n",
+ +                      sensor->sensor_id, ret);
+ +      mutex_unlock(&sensor->lock);
+ +}
+ +
+ +static __ref int sensor_sysfs_notify(void *data)
+ +{
+ +      int ret = 0;
+ +      struct sensor_info *sensor = (struct sensor_info *)data;
+ +
+ +      while (!kthread_should_stop()) {
+ +              if (wait_for_completion_interruptible(
+ +                      &sensor->sysfs_notify_complete) != 0)
+ +                      continue;
+ +              if (sensor->deregister_active)
+ +                      return ret;
+ +              reinit_completion(&sensor->sysfs_notify_complete);
+ +              sysfs_notify(&sensor->tz->device.kobj, NULL,
+ +                                      THERMAL_UEVENT_DATA);
+ +      }
+ +      return ret;
+ +}
+ +
+ +/* May be called in an interrupt context.
+ + * Do NOT call sensor_set_trip from this function
+ + */
+ +int thermal_sensor_trip(struct thermal_zone_device *tz,
+ +              enum thermal_trip_type trip, long temp)
+ +{
+ +      struct sensor_threshold *pos = NULL;
+ +      int ret = -ENODEV;
+ +
+ +      if (trip != THERMAL_TRIP_CONFIGURABLE_HI &&
+ +                      trip != THERMAL_TRIP_CONFIGURABLE_LOW)
+ +              return 0;
+ +
+ +      if (list_empty(&tz->sensor.threshold_list))
+ +              return 0;
+ +
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(pos, &tz->sensor.threshold_list, list) {
+ +              if ((pos->trip != trip) || (!pos->active))
+ +                      continue;
+ +              if (((trip == THERMAL_TRIP_CONFIGURABLE_LOW) &&
+ +                      (pos->temp <= tz->sensor.threshold_min) &&
+ +                      (pos->temp >= temp)) ||
+ +                      ((trip == THERMAL_TRIP_CONFIGURABLE_HI) &&
+ +                              (pos->temp >= tz->sensor.threshold_max) &&
+ +                              (pos->temp <= temp))) {
+ +                      if ((pos == &tz->tz_threshold[0])
+ +                              || (pos == &tz->tz_threshold[1]))
+ +                              complete(&tz->sensor.sysfs_notify_complete);
+ +                      pos->active = 0;
+ +                      pos->notify(trip, temp, pos->data);
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      schedule_work(&tz->sensor.work);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(thermal_sensor_trip);
+ +
+ +int sensor_get_temp(uint32_t sensor_id, int *temp)
+ +{
+ +      struct sensor_info *sensor = get_sensor(sensor_id);
+ +      int ret = 0;
+ +
+ +      if (!sensor)
+ +              return -ENODEV;
+ +
+ +      ret = sensor->tz->ops->get_temp(sensor->tz, temp);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(sensor_get_temp);
+ +
+ +int sensor_activate_trip(uint32_t sensor_id,
+ +      struct sensor_threshold *threshold, bool enable)
+ +{
+ +      struct sensor_info *sensor = get_sensor(sensor_id);
+ +      int ret = 0;
+ +
+ +      if (!sensor || !threshold) {
+ +              pr_err("%s: uninitialized data\n",
+ +                      KBUILD_MODNAME);
+ +              ret = -ENODEV;
+ +              goto activate_trip_exit;
+ +      }
+ +
+ +      mutex_lock(&sensor->lock);
+ +      threshold->active = (enable) ? 1 : 0;
+ +      ret = __update_sensor_thresholds(sensor);
+ +      mutex_unlock(&sensor->lock);
+ +
+ +activate_trip_exit:
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(sensor_activate_trip);
+ +
+ +int sensor_set_trip(uint32_t sensor_id, struct sensor_threshold *threshold)
+ +{
+ +      struct sensor_threshold *pos = NULL;
+ +      struct sensor_info *sensor = get_sensor(sensor_id);
+ +
+ +      if (!sensor)
+ +              return -ENODEV;
+ +
+ +      if (!threshold || !threshold->notify)
+ +              return -EFAULT;
+ +
+ +      mutex_lock(&sensor->lock);
+ +      list_for_each_entry(pos, &sensor->threshold_list, list) {
+ +              if (pos == threshold)
+ +                      break;
+ +      }
+ +
+ +      if (pos != threshold) {
+ +              INIT_LIST_HEAD(&threshold->list);
+ +              list_add_rcu(&threshold->list, &sensor->threshold_list);
+ +      }
+ +      threshold->active = 0; /* Do not allow active threshold right away */
+ +
+ +      mutex_unlock(&sensor->lock);
+ +
+ +      return 0;
+ +
+ +}
+ +EXPORT_SYMBOL(sensor_set_trip);
+ +
+ +int sensor_cancel_trip(uint32_t sensor_id, struct sensor_threshold *threshold)
+ +{
+ +      struct sensor_threshold *pos = NULL, *var = NULL;
+ +      struct sensor_info *sensor = get_sensor(sensor_id);
+ +      int ret = 0;
+ +
+ +      if (!sensor)
+ +              return -ENODEV;
+ +
+ +      mutex_lock(&sensor->lock);
+ +      list_for_each_entry_safe(pos, var, &sensor->threshold_list, list) {
+ +              if (pos == threshold) {
+ +                      pos->active = 0;
+ +                      list_del_rcu(&pos->list);
+ +                      break;
+ +              }
+ +      }
+ +
+ +      ret = __update_sensor_thresholds(sensor);
+ +      mutex_unlock(&sensor->lock);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(sensor_cancel_trip);
+ +
+ +static int tz_notify_trip(enum thermal_trip_type type, int temp, void *data)
+ +{
+ +      struct thermal_zone_device *tz = (struct thermal_zone_device *)data;
+ +
+ +      pr_debug("sensor %d tripped: type %d temp %d\n",
+ +                      tz->sensor.sensor_id, type, temp);
+ +
+ +      return 0;
+ +}
+ +
+ +static void get_trip_threshold(struct thermal_zone_device *tz, int trip,
+ +      struct sensor_threshold **threshold)
+ +{
+ +      enum thermal_trip_type type;
+ +
+ +      tz->ops->get_trip_type(tz, trip, &type);
+ +
+ +      if (type == THERMAL_TRIP_CONFIGURABLE_HI)
+ +              *threshold = &tz->tz_threshold[0];
+ +      else if (type == THERMAL_TRIP_CONFIGURABLE_LOW)
+ +              *threshold = &tz->tz_threshold[1];
+ +      else
+ +              *threshold = NULL;
+ +}
+ +
+ +int sensor_set_trip_temp(struct thermal_zone_device *tz,
+ +              int trip, long temp)
+ +{
+ +      int ret = 0;
+ +      struct sensor_threshold *threshold = NULL;
+ +
+ +      if (!tz->ops->get_trip_type)
+ +              return -EPERM;
+ +
+ +      get_trip_threshold(tz, trip, &threshold);
+ +      if (threshold) {
+ +              threshold->temp = temp;
+ +              ret = sensor_set_trip(tz->sensor.sensor_id, threshold);
+ +      } else {
+ +              ret = tz->ops->set_trip_temp(tz, trip, temp);
+ +      }
+ +
+ +      return ret;
+ +}
+ +
+ +int sensor_init(struct thermal_zone_device *tz)
+ +{
+ +      struct sensor_info *sensor = &tz->sensor;
+ +
+ +      sensor->sensor_id = tz->id;
+ +      sensor->tz = tz;
+ +      sensor->threshold_min = INT_MIN;
+ +      sensor->threshold_max = INT_MAX;
+ +      sensor->max_idx = -1;
+ +      sensor->min_idx = -1;
+ +      sensor->deregister_active = false;
+ +      mutex_init(&sensor->lock);
+ +      INIT_LIST_HEAD_RCU(&sensor->sensor_list);
+ +      INIT_LIST_HEAD_RCU(&sensor->threshold_list);
+ +      INIT_LIST_HEAD(&tz->tz_threshold[0].list);
+ +      INIT_LIST_HEAD(&tz->tz_threshold[1].list);
+ +      tz->tz_threshold[0].notify = tz_notify_trip;
+ +      tz->tz_threshold[0].data = tz;
+ +      tz->tz_threshold[0].trip = THERMAL_TRIP_CONFIGURABLE_HI;
+ +      tz->tz_threshold[1].notify = tz_notify_trip;
+ +      tz->tz_threshold[1].data = tz;
+ +      tz->tz_threshold[1].trip = THERMAL_TRIP_CONFIGURABLE_LOW;
+ +      list_add_rcu(&sensor->sensor_list, &sensor_info_list);
+ +      INIT_WORK(&sensor->work, sensor_update_work);
+ +      init_completion(&sensor->sysfs_notify_complete);
+ +      sensor->sysfs_notify_thread = kthread_run(sensor_sysfs_notify,
+ +                                                &tz->sensor,
+ +                                                "therm_core:notify%d",
+ +                                                tz->id);
+ +      if (IS_ERR(sensor->sysfs_notify_thread))
+ +              pr_err("Failed to create notify thread %d", tz->id);
+ +
+ +
+ +      return 0;
+ +}
+ +
   static int get_idr(struct idr *idr, struct mutex *lock, int *id)
   {
         int ret;
@@@ -798,18 -392,17 +798,18 @@@ exit
         mutex_unlock(&thermal_list_lock);
   }
   
- -static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
+ +static void thermal_zone_device_set_polling(struct workqueue_struct *queue,
+ +                                          struct thermal_zone_device *tz,
                                             int delay)
   {
         if (delay > 1000)
- -              mod_delayed_work(system_freezable_wq, &tz->poll_queue,
+ +              mod_delayed_work(queue, &tz->poll_queue,
                                  round_jiffies(msecs_to_jiffies(delay)));
         else if (delay)
- -              mod_delayed_work(system_freezable_wq, &tz->poll_queue,
+ +              mod_delayed_work(queue, &tz->poll_queue,
                                  msecs_to_jiffies(delay));
         else
-               cancel_delayed_work_sync(&tz->poll_queue);
+               cancel_delayed_work(&tz->poll_queue);
   }
   
   static void monitor_thermal_zone(struct thermal_zone_device *tz)
@@@ -817,14 -410,11 +817,14 @@@
         mutex_lock(&tz->lock);
   
         if (tz->passive)
- -              thermal_zone_device_set_polling(tz, tz->passive_delay);
+ +              thermal_zone_device_set_polling(thermal_passive_wq,
+ +                                              tz, tz->passive_delay);
         else if (tz->polling_delay)
- -              thermal_zone_device_set_polling(tz, tz->polling_delay);
+ +              thermal_zone_device_set_polling(
+ +                              system_freezable_power_efficient_wq,
+ +                              tz, tz->polling_delay);
         else
- -              thermal_zone_device_set_polling(tz, 0);
+ +              thermal_zone_device_set_polling(NULL, tz, 0);
   
         mutex_unlock(&tz->lock);
   }
@@@ -844,19 -434,15 +844,19 @@@ static void handle_critical_trips(struc
         tz->ops->get_trip_temp(tz, trip, &trip_temp);
   
         /* If we have not crossed the trip_temp, we do not care. */
- -      if (trip_temp <= 0 || tz->temperature < trip_temp)
- -              return;
- -
- -      trace_thermal_zone_trip(tz, trip, trip_type);
+ +      if (trip_type != THERMAL_TRIP_CRITICAL_LOW &&
+ +          trip_type != THERMAL_TRIP_CONFIGURABLE_LOW) {
+ +              if (tz->temperature < trip_temp)
+ +                      return;
+ +      } else
+ +              if (tz->temperature >= trip_temp)
+ +                      return;
   
         if (tz->ops->notify)
                 tz->ops->notify(tz, trip, trip_type);
   
- -      if (trip_type == THERMAL_TRIP_CRITICAL) {
+ +      if (trip_type == THERMAL_TRIP_CRITICAL ||
+ +          trip_type == THERMAL_TRIP_CRITICAL_LOW) {
                 dev_emerg(&tz->device,
                           "critical temperature reached(%d C),shutting down\n",
                           tz->temperature / 1000);
@@@ -874,10 -460,7 +874,10 @@@ static void handle_thermal_trip(struct 
   
         tz->ops->get_trip_type(tz, trip, &type);
   
- -      if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
+ +      if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT ||
+ +          type == THERMAL_TRIP_CONFIGURABLE_HI ||
+ +          type == THERMAL_TRIP_CONFIGURABLE_LOW ||
+ +          type == THERMAL_TRIP_CRITICAL_LOW)
                 handle_critical_trips(tz, trip, type);
         else
                 handle_non_critical_trips(tz, trip, type);
@@@ -978,6 -561,9 +978,6 @@@ void thermal_zone_device_update(struct 
   {
         int count;
   
- -      if (atomic_read(&in_suspend))
- -              return;
- -
         if (!tz->ops->get_temp)
                 return;
   
@@@ -1087,12 -673,6 +1087,12 @@@ trip_point_type_show(struct device *dev
                 return sprintf(buf, "critical\n");
         case THERMAL_TRIP_HOT:
                 return sprintf(buf, "hot\n");
+ +      case THERMAL_TRIP_CONFIGURABLE_HI:
+ +              return sprintf(buf, "configurable_hi\n");
+ +      case THERMAL_TRIP_CONFIGURABLE_LOW:
+ +              return sprintf(buf, "configurable_low\n");
+ +      case THERMAL_TRIP_CRITICAL_LOW:
+ +              return sprintf(buf, "critical_low\n");
         case THERMAL_TRIP_PASSIVE:
                 return sprintf(buf, "passive\n");
         case THERMAL_TRIP_ACTIVE:
@@@ -1103,57 -683,12 +1103,57 @@@
   }
   
   static ssize_t
+ +trip_point_type_activate(struct device *dev, struct device_attribute *attr,
+ +              const char *buf, size_t count)
+ +{
+ +      struct thermal_zone_device *tz = to_thermal_zone(dev);
+ +      int trip, result = 0;
+ +      bool activate;
+ +      struct sensor_threshold *threshold = NULL;
+ +
+ +      if (!tz->ops->get_trip_type ||
+ +              !tz->ops->activate_trip_type) {
+ +              result = -EPERM;
+ +              goto trip_activate_exit;
+ +      }
+ +
+ +      if (!sscanf(attr->attr.name, "trip_point_%d_type", &trip)) {
+ +              result = -EINVAL;
+ +              goto trip_activate_exit;
+ +      }
+ +
+ +      if (!strcmp(buf, "enabled")) {
+ +              activate = true;
+ +      } else if (!strcmp(buf, "disabled")) {
+ +              activate = false;
+ +      } else {
+ +              result = -EINVAL;
+ +              goto trip_activate_exit;
+ +      }
+ +
+ +      get_trip_threshold(tz, trip, &threshold);
+ +      if (threshold)
+ +              result = sensor_activate_trip(tz->sensor.sensor_id,
+ +                      threshold, activate);
+ +      else
+ +              result = tz->ops->activate_trip_type(tz, trip,
+ +                      activate ? THERMAL_TRIP_ACTIVATION_ENABLED :
+ +                      THERMAL_TRIP_ACTIVATION_DISABLED);
+ +
+ +trip_activate_exit:
+ +      if (result)
+ +              return result;
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t
   trip_point_temp_store(struct device *dev, struct device_attribute *attr,
                      const char *buf, size_t count)
   {
         struct thermal_zone_device *tz = to_thermal_zone(dev);
         int trip, ret;
- -      unsigned long temperature;
+ +      long temperature;
   
         if (!tz->ops->set_trip_temp)
                 return -EPERM;
@@@ -1161,10 -696,10 +1161,10 @@@
         if (!sscanf(attr->attr.name, "trip_point_%d_temp", &trip))
                 return -EINVAL;
   
- -      if (kstrtoul(buf, 10, &temperature))
+ +      if (kstrtol(buf, 10, &temperature))
                 return -EINVAL;
   
- -      ret = tz->ops->set_trip_temp(tz, trip, temperature);
+ +      ret = sensor_set_trip_temp(tz, trip, temperature);
   
         return ret ? ret : count;
   }
@@@ -1184,6 -719,7 +1184,6 @@@ trip_point_temp_show(struct device *dev
                 return -EINVAL;
   
         ret = tz->ops->get_trip_temp(tz, trip, &temperature);
- -
         if (ret)
                 return ret;
   
@@@ -2166,9 -1702,8 +2166,9 @@@ static int create_trip_attrs(struct the
                 sysfs_attr_init(&tz->trip_type_attrs[indx].attr.attr);
                 tz->trip_type_attrs[indx].attr.attr.name =
                                                 tz->trip_type_attrs[indx].name;
- -              tz->trip_type_attrs[indx].attr.attr.mode = S_IRUGO;
+ +              tz->trip_type_attrs[indx].attr.attr.mode = S_IRUGO | S_IWUSR;
                 tz->trip_type_attrs[indx].attr.show = trip_point_type_show;
+ +              tz->trip_type_attrs[indx].attr.store = trip_point_type_activate;
   
                 device_create_file(&tz->device,
                                    &tz->trip_type_attrs[indx].attr);
@@@ -2398,14 -1933,13 +2398,14 @@@ struct thermal_zone_device *thermal_zon
         }
   
         mutex_lock(&thermal_list_lock);
- -      list_add_tail(&tz->node, &thermal_tz_list);
+ +      list_add_tail_rcu(&tz->node, &thermal_tz_list);
+ +      sensor_init(tz);
         mutex_unlock(&thermal_list_lock);
   
         /* Bind cooling devices for this zone */
         bind_tz(tz);
   
- -      INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
+ +      INIT_DEFERRABLE_WORK(&(tz->poll_queue), thermal_zone_device_check);
   
         thermal_zone_device_reset(tz);
         /* Update the new thermal zone and mark it as already updated. */
@@@ -2446,7 -1980,7 +2446,7 @@@ void thermal_zone_device_unregister(str
                 mutex_unlock(&thermal_list_lock);
                 return;
         }
- -      list_del(&tz->node);
+ +      list_del_rcu(&tz->node);
   
         /* Unbind all cdevs associated with 'this' thermal zone */
         list_for_each_entry(cdev, &thermal_cdev_list, node) {
@@@ -2468,7 -2002,7 +2468,7 @@@
   
         mutex_unlock(&thermal_list_lock);
   
-       thermal_zone_device_set_polling(NULL, tz, 0);
+       cancel_delayed_work_sync(&tz->poll_queue);
   
         if (tz->type[0])
                 device_remove_file(&tz->device, &dev_attr_type);
@@@ -2481,13 -2015,6 +2481,13 @@@
         thermal_set_governor(tz, NULL);
   
         thermal_remove_hwmon_sysfs(tz);
+ +      flush_work(&tz->sensor.work);
+ +      tz->sensor.deregister_active = true;
+ +      complete(&tz->sensor.sysfs_notify_complete);
+ +      kthread_stop(tz->sensor.sysfs_notify_thread);
+ +      mutex_lock(&thermal_list_lock);
+ +      list_del_rcu(&tz->sensor.sensor_list);
+ +      mutex_unlock(&thermal_list_lock);
         release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
         idr_destroy(&tz->idr);
         mutex_destroy(&tz->lock);
@@@ -2514,13 -2041,13 +2514,13 @@@ struct thermal_zone_device *thermal_zon
         if (!name)
                 goto exit;
   
- -      mutex_lock(&thermal_list_lock);
- -      list_for_each_entry(pos, &thermal_tz_list, node)
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(pos, &thermal_tz_list, node)
                 if (!strncasecmp(name, pos->type, THERMAL_NAME_LENGTH)) {
                         found++;
                         ref = pos;
                 }
- -      mutex_unlock(&thermal_list_lock);
+ +      rcu_read_unlock();
   
         /* nothing has been found, thus an error code for it */
         if (found == 0)
@@@ -2657,22 -2184,43 +2657,22 @@@ static void thermal_unregister_governor
         thermal_gov_power_allocator_unregister();
   }
   
- -static int thermal_pm_notify(struct notifier_block *nb,
- -                              unsigned long mode, void *_unused)
- -{
- -      struct thermal_zone_device *tz;
- -
- -      switch (mode) {
- -      case PM_HIBERNATION_PREPARE:
- -      case PM_RESTORE_PREPARE:
- -      case PM_SUSPEND_PREPARE:
- -              atomic_set(&in_suspend, 1);
- -              break;
- -      case PM_POST_HIBERNATION:
- -      case PM_POST_RESTORE:
- -      case PM_POST_SUSPEND:
- -              atomic_set(&in_suspend, 0);
- -              list_for_each_entry(tz, &thermal_tz_list, node) {
- -                      thermal_zone_device_reset(tz);
- -                      thermal_zone_device_update(tz);
- -              }
- -              break;
- -      default:
- -              break;
- -      }
- -      return 0;
- -}
- -
- -static struct notifier_block thermal_pm_nb = {
- -      .notifier_call = thermal_pm_notify,
- -};
- -
   static int __init thermal_init(void)
   {
         int result;
   
+ +      thermal_passive_wq = alloc_workqueue("thermal_passive_wq",
+ +                                              WQ_HIGHPRI | WQ_UNBOUND
+ +                                              | WQ_FREEZABLE,
+ +                                              THERMAL_MAX_ACTIVE);
+ +      if (!thermal_passive_wq) {
+ +              result = -ENOMEM;
+ +              goto error;
+ +      }
+ +
         result = thermal_register_governors();
         if (result)
- -              goto error;
+ +              goto destroy_wq;
   
         result = class_register(&thermal_class);
         if (result)
@@@ -2686,6 -2234,11 +2686,6 @@@
         if (result)
                 goto exit_netlink;
   
- -      result = register_pm_notifier(&thermal_pm_nb);
- -      if (result)
- -              pr_warn("Thermal: Can not register suspend notifier, return %d\n",
- -                      result);
- -
         return 0;
   
   exit_netlink:
@@@ -2694,8 -2247,6 +2694,8 @@@ unregister_class
         class_unregister(&thermal_class);
   unregister_governors:
         thermal_unregister_governors();
+ +destroy_wq:
+ +      destroy_workqueue(thermal_passive_wq);
   error:
         idr_destroy(&thermal_tz_idr);
         idr_destroy(&thermal_cdev_idr);
@@@ -2707,8 -2258,8 +2707,8 @@@
   
   static void __exit thermal_exit(void)
   {
- -      unregister_pm_notifier(&thermal_pm_nb);
         of_thermal_destroy_zones();
+ +      destroy_workqueue(thermal_passive_wq);
         genetlink_exit();
         class_unregister(&thermal_class);
         thermal_unregister_governors();
diff --combined drivers/tty/serial/msm_serial.c

index 924c50d,03cac21..053bad6
--- 1/drivers/tty/serial/msm_serial.c
--- 2/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@@ -19,147 -19,33 +19,147 @@@
   # define SUPPORT_SYSRQ
   #endif
   
+ +#include <linux/kernel.h>
   #include <linux/atomic.h>
   #include <linux/dma-mapping.h>
   #include <linux/dmaengine.h>
- -#include <linux/hrtimer.h>
   #include <linux/module.h>
   #include <linux/io.h>
   #include <linux/ioport.h>
- -#include <linux/irq.h>
+ +#include <linux/interrupt.h>
   #include <linux/init.h>
   #include <linux/console.h>
   #include <linux/tty.h>
   #include <linux/tty_flip.h>
   #include <linux/serial_core.h>
- -#include <linux/serial.h>
   #include <linux/slab.h>
   #include <linux/clk.h>
   #include <linux/platform_device.h>
   #include <linux/delay.h>
   #include <linux/of.h>
   #include <linux/of_device.h>
- -
- -#include "msm_serial.h"
- -
- -#define UARTDM_BURST_SIZE     16   /* in bytes */
- -#define UARTDM_TX_AIGN(x)     ((x) & ~0x3) /* valid for > 1p3 */
- -#define UARTDM_TX_MAX         256   /* in bytes, valid for <= 1p3 */
- -#define UARTDM_RX_SIZE                (UART_XMIT_SIZE / 4)
+ +#include <linux/wait.h>
+ +
+ +#define UART_MR1                      0x0000
+ +
+ +#define UART_MR1_AUTO_RFR_LEVEL0      0x3F
+ +#define UART_MR1_AUTO_RFR_LEVEL1      0x3FF00
+ +#define UART_DM_MR1_AUTO_RFR_LEVEL1   0xFFFFFF00
+ +#define UART_MR1_RX_RDY_CTL           BIT(7)
+ +#define UART_MR1_CTS_CTL              BIT(6)
+ +
+ +#define UART_MR2                      0x0004
+ +#define UART_MR2_ERROR_MODE           BIT(6)
+ +#define UART_MR2_BITS_PER_CHAR                0x30
+ +#define UART_MR2_BITS_PER_CHAR_5      (0x0 << 4)
+ +#define UART_MR2_BITS_PER_CHAR_6      (0x1 << 4)
+ +#define UART_MR2_BITS_PER_CHAR_7      (0x2 << 4)
+ +#define UART_MR2_BITS_PER_CHAR_8      (0x3 << 4)
+ +#define UART_MR2_STOP_BIT_LEN_ONE     (0x1 << 2)
+ +#define UART_MR2_STOP_BIT_LEN_TWO     (0x3 << 2)
+ +#define UART_MR2_PARITY_MODE_NONE     0x0
+ +#define UART_MR2_PARITY_MODE_ODD      0x1
+ +#define UART_MR2_PARITY_MODE_EVEN     0x2
+ +#define UART_MR2_PARITY_MODE_SPACE    0x3
+ +#define UART_MR2_PARITY_MODE          0x3
+ +
+ +#define UART_CSR                      0x0008
+ +
+ +#define UART_TF                               0x000C
+ +#define UARTDM_TF                     0x0070
+ +
+ +#define UART_CR                               0x0010
+ +#define UART_CR_CMD_NULL              (0 << 4)
+ +#define UART_CR_CMD_RESET_RX          (1 << 4)
+ +#define UART_CR_CMD_RESET_TX          (2 << 4)
+ +#define UART_CR_CMD_RESET_ERR         (3 << 4)
+ +#define UART_CR_CMD_RESET_BREAK_INT   (4 << 4)
+ +#define UART_CR_CMD_START_BREAK               (5 << 4)
+ +#define UART_CR_CMD_STOP_BREAK                (6 << 4)
+ +#define UART_CR_CMD_RESET_CTS         (7 << 4)
+ +#define UART_CR_CMD_RESET_STALE_INT   (8 << 4)
+ +#define UART_CR_CMD_PACKET_MODE               (9 << 4)
+ +#define UART_CR_CMD_MODE_RESET                (12 << 4)
+ +#define UART_CR_CMD_SET_RFR           (13 << 4)
+ +#define UART_CR_CMD_RESET_RFR         (14 << 4)
+ +#define UART_CR_CMD_PROTECTION_EN     (16 << 4)
+ +#define UART_CR_CMD_STALE_EVENT_DISABLE       (6 << 8)
+ +#define UART_CR_CMD_STALE_EVENT_ENABLE        (80 << 4)
+ +#define UART_CR_CMD_FORCE_STALE               (4 << 8)
+ +#define UART_CR_CMD_RESET_TX_READY    (3 << 8)
+ +#define UART_CR_TX_DISABLE            BIT(3)
+ +#define UART_CR_TX_ENABLE             BIT(2)
+ +#define UART_CR_RX_DISABLE            BIT(1)
+ +#define UART_CR_RX_ENABLE             BIT(0)
+ +#define UART_CR_CMD_RESET_RXBREAK_START       ((1 << 11) | (2 << 4))
+ +
+ +#define UART_IMR                      0x0014
+ +#define UART_IMR_TXLEV                        BIT(0)
+ +#define UART_IMR_RXSTALE              BIT(3)
+ +#define UART_IMR_RXLEV                        BIT(4)
+ +#define UART_IMR_DELTA_CTS            BIT(5)
+ +#define UART_IMR_CURRENT_CTS          BIT(6)
+ +#define UART_IMR_RXBREAK_START                BIT(10)
+ +
+ +#define UART_IPR_RXSTALE_LAST         0x20
+ +#define UART_IPR_STALE_LSB            0x1F
+ +#define UART_IPR_STALE_TIMEOUT_MSB    0x3FF80
+ +#define UART_DM_IPR_STALE_TIMEOUT_MSB 0xFFFFFF80
+ +
+ +#define UART_IPR                      0x0018
+ +#define UART_TFWR                     0x001C
+ +#define UART_RFWR                     0x0020
+ +#define UART_HCR                      0x0024
+ +
+ +#define UART_MREG                     0x0028
+ +#define UART_NREG                     0x002C
+ +#define UART_DREG                     0x0030
+ +#define UART_MNDREG                   0x0034
+ +#define UART_IRDA                     0x0038
+ +#define UART_MISR_MODE                        0x0040
+ +#define UART_MISR_RESET                       0x0044
+ +#define UART_MISR_EXPORT              0x0048
+ +#define UART_MISR_VAL                 0x004C
+ +#define UART_TEST_CTRL                        0x0050
+ +
+ +#define UART_SR                               0x0008
+ +#define UART_SR_HUNT_CHAR             BIT(7)
+ +#define UART_SR_RX_BREAK              BIT(6)
+ +#define UART_SR_PAR_FRAME_ERR         BIT(5)
+ +#define UART_SR_OVERRUN                       BIT(4)
+ +#define UART_SR_TX_EMPTY              BIT(3)
+ +#define UART_SR_TX_READY              BIT(2)
+ +#define UART_SR_RX_FULL                       BIT(1)
+ +#define UART_SR_RX_READY              BIT(0)
+ +
+ +#define UART_RF                               0x000C
+ +#define UARTDM_RF                     0x0070
+ +#define UART_MISR                     0x0010
+ +#define UART_ISR                      0x0014
+ +#define UART_ISR_TX_READY             BIT(7)
+ +
+ +#define UARTDM_RXFS                   0x50
+ +#define UARTDM_RXFS_BUF_SHIFT         0x7
+ +#define UARTDM_RXFS_BUF_MASK          0x7
+ +
+ +#define UARTDM_DMEN                   0x3C
+ +#define UARTDM_DMEN_RX_SC_ENABLE      BIT(5)
+ +#define UARTDM_DMEN_TX_SC_ENABLE      BIT(4)
+ +
+ +#define UARTDM_DMEN_TX_BAM_ENABLE     BIT(2)  /* UARTDM_1P4 */
+ +#define UARTDM_DMEN_TX_DM_ENABLE      BIT(0)  /* < UARTDM_1P4 */
+ +
+ +#define UARTDM_DMEN_RX_BAM_ENABLE     BIT(3)  /* UARTDM_1P4 */
+ +#define UARTDM_DMEN_RX_DM_ENABLE      BIT(1)  /* < UARTDM_1P4 */
+ +
+ +#define UARTDM_DMRX                   0x34
+ +#define UARTDM_NCF_TX                 0x40
+ +#define UARTDM_RX_TOTAL_SNAP          0x38
+ +
+ +#define UARTDM_BURST_SIZE             16   /* in bytes */
+ +#define UARTDM_TX_AIGN(x)             ((x) & ~0x3) /* valid for > 1p3 */
+ +#define UARTDM_TX_MAX                 256   /* in bytes, valid for <= 1p3 */
+ +#define UARTDM_RX_SIZE                        (UART_XMIT_SIZE / 4)
   
   enum {
         UARTDM_1P1 = 1,
@@@ -192,65 -78,10 +192,65 @@@ struct msm_port 
         struct msm_dma          rx_dma;
   };
   
+ +#define UART_TO_MSM(uart_port)        container_of(uart_port, struct msm_port, uart)
+ +
+ +static
+ +void msm_write(struct uart_port *port, unsigned int val, unsigned int off)
+ +{
+ +      writel_relaxed_no_log(val, port->membase + off);
+ +}
+ +
+ +static
+ +unsigned int msm_read(struct uart_port *port, unsigned int off)
+ +{
+ +      return readl_relaxed_no_log(port->membase + off);
+ +}
+ +
+ +/*
+ + * Setup the MND registers to use the TCXO clock.
+ + */
+ +static void msm_serial_set_mnd_regs_tcxo(struct uart_port *port)
+ +{
+ +      msm_write(port, 0x06, UART_MREG);
+ +      msm_write(port, 0xF1, UART_NREG);
+ +      msm_write(port, 0x0F, UART_DREG);
+ +      msm_write(port, 0x1A, UART_MNDREG);
+ +      port->uartclk = 1843200;
+ +}
+ +
+ +/*
+ + * Setup the MND registers to use the TCXO clock divided by 4.
+ + */
+ +static void msm_serial_set_mnd_regs_tcxoby4(struct uart_port *port)
+ +{
+ +      msm_write(port, 0x18, UART_MREG);
+ +      msm_write(port, 0xF6, UART_NREG);
+ +      msm_write(port, 0x0F, UART_DREG);
+ +      msm_write(port, 0x0A, UART_MNDREG);
+ +      port->uartclk = 1843200;
+ +}
+ +
+ +static void msm_serial_set_mnd_regs(struct uart_port *port)
+ +{
+ +      struct msm_port *msm_port = UART_TO_MSM(port);
+ +
+ +      /*
+ +       * These registers don't exist so we change the clk input rate
+ +       * on uartdm hardware instead
+ +       */
+ +      if (msm_port->is_uartdm)
+ +              return;
+ +
+ +      if (port->uartclk == 19200000)
+ +              msm_serial_set_mnd_regs_tcxo(port);
+ +      else if (port->uartclk == 4800000)
+ +              msm_serial_set_mnd_regs_tcxoby4(port);
+ +}
+ +
   static void msm_handle_tx(struct uart_port *port);
   static void msm_start_rx_dma(struct msm_port *msm_port);
   
- -void msm_stop_dma(struct uart_port *port, struct msm_dma *dma)
+ +static void msm_stop_dma(struct uart_port *port, struct msm_dma *dma)
   {
         struct device *dev = port->dev;
         unsigned int mapped;
@@@ -303,17 -134,15 +303,17 @@@ static void msm_request_tx_dma(struct m
         struct device *dev = msm_port->uart.dev;
         struct dma_slave_config conf;
         struct msm_dma *dma;
+ +      struct dma_chan *dma_chan;
         u32 crci = 0;
         int ret;
   
         dma = &msm_port->tx_dma;
   
         /* allocate DMA resources, if available */
- -      dma->chan = dma_request_slave_channel_reason(dev, "tx");
- -      if (IS_ERR(dma->chan))
+ +      dma_chan = dma_request_slave_channel_reason(dev, "tx");
+ +      if (IS_ERR(dma_chan))
                 goto no_tx;
+ +      dma->chan = dma_chan;
   
         of_property_read_u32(dev->of_node, "qcom,tx-crci", &crci);
   
@@@ -348,17 -177,15 +348,17 @@@ static void msm_request_rx_dma(struct m
         struct device *dev = msm_port->uart.dev;
         struct dma_slave_config conf;
         struct msm_dma *dma;
+ +      struct dma_chan *dma_chan;
         u32 crci = 0;
         int ret;
   
         dma = &msm_port->rx_dma;
   
         /* allocate DMA resources, if available */
- -      dma->chan = dma_request_slave_channel_reason(dev, "rx");
- -      if (IS_ERR(dma->chan))
+ +      dma_chan = dma_request_slave_channel_reason(dev, "rx");
+ +      if (IS_ERR(dma_chan))
                 goto no_rx;
+ +      dma->chan = dma_chan;
   
         of_property_read_u32(dev->of_node, "qcom,rx-crci", &crci);
   
@@@ -565,6 -392,10 +565,6 @@@ static void msm_complete_rx_dma(void *a
         val &= ~dma->enable_bit;
         msm_write(port, val, UARTDM_DMEN);
   
- -      /* Restore interrupts */
- -      msm_port->imr |= UART_IMR_RXLEV | UART_IMR_RXSTALE;
- -      msm_write(port, msm_port->imr, UART_IMR);
- -
         if (msm_read(port, UART_SR) & UART_SR_OVERRUN) {
                 port->icount.overrun++;
                 tty_insert_flip_char(tport, 0, TTY_OVERRUN);
@@@ -992,6 -823,7 +992,7 @@@ static unsigned int msm_get_mctrl(struc
   static void msm_reset(struct uart_port *port)
   {
         struct msm_port *msm_port = UART_TO_MSM(port);
+       unsigned int mr;
   
         /* reset everything */
         msm_write(port, UART_CR_CMD_RESET_RX, UART_CR);
@@@ -999,7 -831,10 +1000,10 @@@
         msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR);
         msm_write(port, UART_CR_CMD_RESET_BREAK_INT, UART_CR);
         msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
-       msm_write(port, UART_CR_CMD_SET_RFR, UART_CR);
+       msm_write(port, UART_CR_CMD_RESET_RFR, UART_CR);
+       mr = msm_read(port, UART_MR1);
+       mr &= ~UART_MR1_RX_RDY_CTL;
+       msm_write(port, mr, UART_MR1);
   
         /* Disable DM modes */
         if (msm_port->is_uartdm)
@@@ -1037,72 -872,37 +1041,72 @@@ struct msm_baud_map 
   };
   
   static const struct msm_baud_map *
- -msm_find_best_baud(struct uart_port *port, unsigned int baud)
+ +msm_find_best_baud(struct uart_port *port, unsigned int baud,
+ +                 unsigned long *rate)
   {
- -      unsigned int i, divisor;
- -      const struct msm_baud_map *entry;
+ +      struct msm_port *msm_port = UART_TO_MSM(port);
+ +      unsigned int divisor, result;
+ +      unsigned long target, old, best_rate = 0, diff, best_diff = ULONG_MAX;
+ +      const struct msm_baud_map *entry, *end, *best;
         static const struct msm_baud_map table[] = {
- -              { 1536, 0x00,  1 },
- -              {  768, 0x11,  1 },
- -              {  384, 0x22,  1 },
- -              {  192, 0x33,  1 },
- -              {   96, 0x44,  1 },
- -              {   48, 0x55,  1 },
- -              {   32, 0x66,  1 },
- -              {   24, 0x77,  1 },
- -              {   16, 0x88,  1 },
- -              {   12, 0x99,  6 },
- -              {    8, 0xaa,  6 },
- -              {    6, 0xbb,  6 },
- -              {    4, 0xcc,  6 },
- -              {    3, 0xdd,  8 },
- -              {    2, 0xee, 16 },
                 {    1, 0xff, 31 },
- -              {    0, 0xff, 31 },
+ +              {    2, 0xee, 16 },
+ +              {    3, 0xdd,  8 },
+ +              {    4, 0xcc,  6 },
+ +              {    6, 0xbb,  6 },
+ +              {    8, 0xaa,  6 },
+ +              {   12, 0x99,  6 },
+ +              {   16, 0x88,  1 },
+ +              {   24, 0x77,  1 },
+ +              {   32, 0x66,  1 },
+ +              {   48, 0x55,  1 },
+ +              {   96, 0x44,  1 },
+ +              {  192, 0x33,  1 },
+ +              {  384, 0x22,  1 },
+ +              {  768, 0x11,  1 },
+ +              { 1536, 0x00,  1 },
         };
   
- -      divisor = uart_get_divisor(port, baud);
+ +      best = table; /* Default to smallest divider */
+ +      target = clk_round_rate(msm_port->clk, 16 * baud);
+ +      divisor = DIV_ROUND_CLOSEST(target, 16 * baud);
+ +
+ +      end = table + ARRAY_SIZE(table);
+ +      entry = table;
+ +      while (entry < end) {
+ +              if (entry->divisor <= divisor) {
+ +                      result = target / entry->divisor / 16;
+ +                      diff = abs(result - baud);
+ +
+ +                      /* Keep track of best entry */
+ +                      if (diff < best_diff) {
+ +                              best_diff = diff;
+ +                              best = entry;
+ +                              best_rate = target;
+ +                      }
   
- -      for (i = 0, entry = table; i < ARRAY_SIZE(table); i++, entry++)
- -              if (entry->divisor <= divisor)
- -                      break;
+ +                      if (result == baud)
+ +                              break;
+ +              } else if (entry->divisor > divisor) {
+ +                      old = target;
+ +                      target = clk_round_rate(msm_port->clk, old + 1);
+ +                      /*
+ +                       * The rate didn't get any faster so we can't do
+ +                       * better at dividing it down
+ +                       */
+ +                      if (target == old)
+ +                              break;
+ +
+ +                      /* Start the divisor search over at this new rate */
+ +                      entry = table;
+ +                      divisor = DIV_ROUND_CLOSEST(target, 16 * baud);
+ +                      continue;
+ +              }
+ +              entry++;
+ +      }
   
- -      return entry; /* Default to smallest divider */
+ +      *rate = best_rate;
+ +      return best;
   }
   
   static int msm_set_baud_rate(struct uart_port *port, unsigned int baud,
@@@ -1111,20 -911,22 +1115,20 @@@
         unsigned int rxstale, watermark, mask;
         struct msm_port *msm_port = UART_TO_MSM(port);
         const struct msm_baud_map *entry;
- -      unsigned long flags;
- -
- -      entry = msm_find_best_baud(port, baud);
- -
- -      msm_write(port, entry->code, UART_CSR);
- -
- -      if (baud > 460800)
- -              port->uartclk = baud * 16;
+ +      unsigned long flags, rate;
   
         flags = *saved_flags;
         spin_unlock_irqrestore(&port->lock, flags);
   
- -      clk_set_rate(msm_port->clk, port->uartclk);
+ +      entry = msm_find_best_baud(port, baud, &rate);
+ +      clk_set_rate(msm_port->clk, rate);
+ +      baud = rate / 16 / entry->divisor;
   
         spin_lock_irqsave(&port->lock, flags);
         *saved_flags = flags;
+ +      port->uartclk = rate;
+ +
+ +      msm_write(port, entry->code, UART_CSR);
   
         /* RX stale watermark */
         rxstale = entry->rxstale;
@@@ -1168,6 -970,15 +1172,6 @@@
         return baud;
   }
   
- -static void msm_init_clock(struct uart_port *port)
- -{
- -      struct msm_port *msm_port = UART_TO_MSM(port);
- -
- -      clk_prepare_enable(msm_port->clk);
- -      clk_prepare_enable(msm_port->pclk);
- -      msm_serial_set_mnd_regs(port);
- -}
- -
   static int msm_startup(struct uart_port *port)
   {
         struct msm_port *msm_port = UART_TO_MSM(port);
@@@ -1177,19 -988,12 +1181,19 @@@
         snprintf(msm_port->name, sizeof(msm_port->name),
                  "msm_serial%d", port->line);
   
- -      ret = request_irq(port->irq, msm_uart_irq, IRQF_TRIGGER_HIGH,
- -                        msm_port->name, port);
- -      if (unlikely(ret))
+ +      /*
+ +       * UART clk must be kept enabled to
+ +       * avoid losing received character
+ +       */
+ +      ret = clk_prepare_enable(msm_port->clk);
+ +      if (ret)
                 return ret;
   
- -      msm_init_clock(port);
+ +      ret = clk_prepare_enable(msm_port->pclk);
+ +      if (ret)
+ +              goto err_pclk;
+ +
+ +      msm_serial_set_mnd_regs(port);
   
         if (likely(port->fifosize > 12))
                 rfr_level = port->fifosize - 12;
@@@ -1215,23 -1019,7 +1219,23 @@@
                 msm_request_rx_dma(msm_port, msm_port->uart.mapbase);
         }
   
+ +      ret = request_irq(port->irq, msm_uart_irq, IRQF_TRIGGER_HIGH,
+ +                        msm_port->name, port);
+ +      if (unlikely(ret))
+ +              goto err_irq;
+ +
         return 0;
+ +
+ +err_irq:
+ +      if (msm_port->is_uartdm)
+ +              msm_release_dma(msm_port);
+ +
+ +      clk_disable_unprepare(msm_port->pclk);
+ +
+ +err_pclk:
+ +      clk_disable_unprepare(msm_port->clk);
+ +
+ +      return ret;
   }
   
   static void msm_shutdown(struct uart_port *port)
@@@ -1244,7 -1032,6 +1248,7 @@@
         if (msm_port->is_uartdm)
                 msm_release_dma(msm_port);
   
+ +      clk_disable_unprepare(msm_port->pclk);
         clk_disable_unprepare(msm_port->clk);
   
         free_irq(port->irq, port);
@@@ -1411,16 -1198,8 +1415,16 @@@ static void msm_power(struct uart_port 
   
         switch (state) {
         case 0:
- -              clk_prepare_enable(msm_port->clk);
- -              clk_prepare_enable(msm_port->pclk);
+ +              /*
+ +               * UART clk must be kept enabled to
+ +               * avoid losing received character
+ +               */
+ +              if (clk_prepare_enable(msm_port->clk))
+ +                      return;
+ +              if (clk_prepare_enable(msm_port->pclk)) {
+ +                      clk_disable_unprepare(msm_port->clk);
+ +                      return;
+ +              }
                 break;
         case 3:
                 clk_disable_unprepare(msm_port->clk);
@@@ -1623,7 -1402,6 +1627,7 @@@ static void __msm_console_write(struct 
                 int j;
                 unsigned int num_chars;
                 char buf[4] = { 0 };
+ +              const u32 *buffer;
   
                 if (is_uartdm)
                         num_chars = min(count - i, (unsigned int)sizeof(buf));
@@@ -1648,8 -1426,7 +1652,8 @@@
                 while (!(msm_read(port, UART_SR) & UART_SR_TX_READY))
                         cpu_relax();
   
- -              iowrite32_rep(tf, buf, 1);
+ +              buffer = (const u32 *)buf;
+ +              writel_relaxed_no_log(*buffer, tf);
                 i += num_chars;
         }
         spin_unlock(&port->lock);
@@@ -1685,7 -1462,7 +1689,7 @@@ static int __init msm_console_setup(str
         if (unlikely(!port->membase))
                 return -ENXIO;
   
- -      msm_init_clock(port);
+ +      msm_serial_set_mnd_regs(port);
   
         if (options)
                 uart_parse_options(options, &baud, &parity, &bits, &flow);
@@@ -1712,6 -1489,7 +1716,6 @@@ msm_serial_early_console_setup(struct e
         device->con->write = msm_serial_early_write;
         return 0;
   }
- -EARLYCON_DECLARE(msm_serial, msm_serial_early_console_setup);
   OF_EARLYCON_DECLARE(msm_serial, "qcom,msm-uart",
                     msm_serial_early_console_setup);
   
@@@ -1733,6 -1511,7 +1737,6 @@@ msm_serial_early_console_setup_dm(struc
         device->con->write = msm_serial_early_write_dm;
         return 0;
   }
- -EARLYCON_DECLARE(msm_serial_dm, msm_serial_early_console_setup_dm);
   OF_EARLYCON_DECLARE(msm_serial_dm, "qcom,msm-uartdm",
                     msm_serial_early_console_setup_dm);
   
@@@ -1811,6 -1590,8 +1815,6 @@@ static int msm_serial_probe(struct plat
                 msm_port->pclk = devm_clk_get(&pdev->dev, "iface");
                 if (IS_ERR(msm_port->pclk))
                         return PTR_ERR(msm_port->pclk);
- -
- -              clk_set_rate(msm_port->clk, 1843200);
         }
   
         port->uartclk = clk_get_rate(msm_port->clk);
@@@ -1847,37 -1628,12 +1851,37 @@@ static const struct of_device_id msm_ma
   };
   MODULE_DEVICE_TABLE(of, msm_match_table);
   
+ +#ifdef CONFIG_PM_SLEEP
+ +static int msm_serial_suspend(struct device *dev)
+ +{
+ +      struct uart_port *port = dev_get_drvdata(dev);
+ +
+ +      uart_suspend_port(&msm_uart_driver, port);
+ +
+ +      return 0;
+ +}
+ +
+ +static int msm_serial_resume(struct device *dev)
+ +{
+ +      struct uart_port *port = dev_get_drvdata(dev);
+ +
+ +      uart_resume_port(&msm_uart_driver, port);
+ +
+ +      return 0;
+ +}
+ +#endif
+ +
+ +static const struct dev_pm_ops msm_serial_pm_ops = {
+ +      SET_SYSTEM_SLEEP_PM_OPS(msm_serial_suspend, msm_serial_resume)
+ +};
+ +
   static struct platform_driver msm_platform_driver = {
         .remove = msm_serial_remove,
         .probe = msm_serial_probe,
         .driver = {
                 .name = "msm_serial",
                 .of_match_table = msm_match_table,
+ +              .pm = &msm_serial_pm_ops,
         },
   };
   
diff --combined drivers/tty/serial/serial_core.c

index 62fe368,54122db..c8b2eec
--- 1/drivers/tty/serial/serial_core.c
--- 2/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@@ -95,9 -95,6 +95,9 @@@ static void __uart_start(struct tty_str
         struct uart_state *state = tty->driver_data;
         struct uart_port *port = state->uart_port;
   
+ +      if (port->ops->wake_peer)
+ +              port->ops->wake_peer(port);
+ +
         if (!uart_tx_stopped(port))
                 port->ops->start_tx(port);
   }
@@@ -1018,7 -1015,7 +1018,7 @@@ static int uart_break_ctl(struct tty_st
   
         mutex_lock(&port->mutex);
   
-       if (uport->type != PORT_UNKNOWN)
+       if (uport->type != PORT_UNKNOWN && uport->ops->break_ctl)
                 uport->ops->break_ctl(uport, break_state);
   
         mutex_unlock(&port->mutex);
diff --combined drivers/usb/core/hub.c

index d0ce7bc,7805f3e..5a3cd56
--- 1/drivers/usb/core/hub.c
--- 2/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@@ -36,8 -36,6 +36,8 @@@
   #define USB_VENDOR_GENESYS_LOGIC              0x05e3
   #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND      0x01
   
+ +extern int deny_new_usb;
+ +
   /* Protect struct usb_device->state and ->children members
    * Note: Both are also protected by ->dev.sem, except that ->state can
    * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
@@@ -50,11 -48,6 +50,11 @@@ static void hub_event(struct work_struc
   /* synchronize hub-port add/remove and peering operations */
   DEFINE_MUTEX(usb_port_peer_mutex);
   
+ +static bool skip_extended_resume_delay = 1;
+ +module_param(skip_extended_resume_delay, bool, S_IRUGO | S_IWUSR);
+ +MODULE_PARM_DESC(skip_extended_resume_delay,
+ +              "removes extra delay added to finish bus resume");
+ +
   /* cycle leds on hubs that aren't blinking for attention */
   static bool blinkenlights = 0;
   module_param(blinkenlights, bool, S_IRUGO);
@@@ -112,11 -105,6 +112,11 @@@ static int hub_port_disable(struct usb_
   static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
                 u16 portstatus);
   
+ +#define USB_VENDOR_XIAOMI             0x2717
+ +#define USB_PRODUCT_XIAOMI_HEADSET    0x3801
+ +
+ +bool is_xiaomi_headset = false;
+ +
   static inline char *portspeed(struct usb_hub *hub, int portstatus)
   {
         if (hub_is_superspeed(hub->hdev))
@@@ -634,12 -622,6 +634,12 @@@ void usb_kick_hub_wq(struct usb_device 
                 kick_hub_wq(hub);
   }
   
+ +void usb_flush_hub_wq(void)
+ +{
+ +      flush_workqueue(hub_wq);
+ +}
+ +EXPORT_SYMBOL(usb_flush_hub_wq);
+ +
   /*
    * Let the USB core know that a USB 3.0 device has sent a Function Wake Device
    * Notification, which indicates it had initiated remote wakeup.
@@@ -1698,6 -1680,47 +1698,6 @@@ static int hub_probe(struct usb_interfa
         hdev = interface_to_usbdev(intf);
   
         /*
- -       * Set default autosuspend delay as 0 to speedup bus suspend,
- -       * based on the below considerations:
- -       *
- -       * - Unlike other drivers, the hub driver does not rely on the
- -       *   autosuspend delay to provide enough time to handle a wakeup
- -       *   event, and the submitted status URB is just to check future
- -       *   change on hub downstream ports, so it is safe to do it.
- -       *
- -       * - The patch might cause one or more auto supend/resume for
- -       *   below very rare devices when they are plugged into hub
- -       *   first time:
- -       *
- -       *      devices having trouble initializing, and disconnect
- -       *      themselves from the bus and then reconnect a second
- -       *      or so later
- -       *
- -       *      devices just for downloading firmware, and disconnects
- -       *      themselves after completing it
- -       *
- -       *   For these quite rare devices, their drivers may change the
- -       *   autosuspend delay of their parent hub in the probe() to one
- -       *   appropriate value to avoid the subtle problem if someone
- -       *   does care it.
- -       *
- -       * - The patch may cause one or more auto suspend/resume on
- -       *   hub during running 'lsusb', but it is probably too
- -       *   infrequent to worry about.
- -       *
- -       * - Change autosuspend delay of hub can avoid unnecessary auto
- -       *   suspend timer for hub, also may decrease power consumption
- -       *   of USB bus.
- -       *
- -       * - If user has indicated to prevent autosuspend by passing
- -       *   usbcore.autosuspend = -1 then keep autosuspend disabled.
- -       */
- -#ifdef CONFIG_PM
- -      if (hdev->dev.power.autosuspend_delay >= 0)
- -              pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
- -#endif
- -
- -      /*
          * Hubs have proper suspend/resume support, except for root hubs
          * where the controller driver doesn't have bus_suspend and
          * bus_resume methods.
@@@ -2089,11 -2112,6 +2089,11 @@@ void usb_disconnect(struct usb_device *
         dev_info(&udev->dev, "USB disconnect, device number %d\n",
                         udev->devnum);
   
+ +      if (is_xiaomi_headset) {
+ +              dev_info(&udev->dev, "xiaomi headset removed, devnum %d\n", udev->devnum);
+ +              is_xiaomi_headset = false;
+ +      }
+ +
         /*
          * Ensure that the pm runtime code knows that the USB device
          * is in the process of being disconnected.
@@@ -2421,12 -2439,6 +2421,12 @@@ int usb_new_device(struct usb_device *u
         udev->dev.devt = MKDEV(USB_DEVICE_MAJOR,
                         (((udev->bus->busnum-1) * 128) + (udev->devnum-1)));
   
+ +      if (USB_VENDOR_XIAOMI == le16_to_cpu(udev->descriptor.idVendor)
+ +                       && USB_PRODUCT_XIAOMI_HEADSET == le16_to_cpu(udev->descriptor.idProduct)) {
+ +              dev_info(&udev->dev, "xiaomi headset identified, devnum %d\n", udev->devnum);
+ +              is_xiaomi_headset = true;
+ +      }
+ +
         /* Tell the world! */
         announce_device(udev);
   
@@@ -3397,9 -3409,7 +3397,9 @@@ int usb_port_resume(struct usb_device *
                 /* drive resume for USB_RESUME_TIMEOUT msec */
                 dev_dbg(&udev->dev, "usb %sresume\n",
                                 (PMSG_IS_AUTO(msg) ? "auto-" : ""));
- -              msleep(USB_RESUME_TIMEOUT);
+ +              if (!skip_extended_resume_delay)
+ +                      usleep_range(USB_RESUME_TIMEOUT * 1000,
+ +                                      (USB_RESUME_TIMEOUT + 1) * 1000);
   
                 /* Virtual root hubs can trigger on GET_PORT_STATUS to
                  * stop resume signaling.  Then finish the resume
@@@ -3408,7 -3418,7 +3408,7 @@@
                 status = hub_port_status(hub, port1, &portstatus, &portchange);
   
                 /* TRSMRCY = 10 msec */
- -              msleep(10);
+ +              usleep_range(10000, 10500);
         }
   
    SuspendCleared:
@@@ -4310,8 -4320,6 +4310,8 @@@ hub_port_init(struct usb_hub *hub, stru
         enum usb_device_speed   oldspeed = udev->speed;
         const char              *speed;
         int                     devnum = udev->devnum;
+ +      char                    *error_event[] = {
+ +                              "USB_DEVICE_ERROR=Device_No_Response", NULL };
   
         /* root hub ports have a slightly longer reset period
          * (from USB 2.0 spec, section 7.1.7.5)
@@@ -4487,8 -4495,6 +4487,8 @@@
                                 if (r != -ENODEV)
                                         dev_err(&udev->dev, "device descriptor read/64, error %d\n",
                                                         r);
+ +                              kobject_uevent_env(&udev->parent->dev.kobj,
+ +                                              KOBJ_CHANGE, error_event);
                                 retval = -EMSGSIZE;
                                 continue;
                         }
@@@ -4541,8 -4547,6 +4541,8 @@@
                                 dev_err(&udev->dev,
                                         "device descriptor read/8, error %d\n",
                                         retval);
+ +                      kobject_uevent_env(&udev->parent->dev.kobj,
+ +                                              KOBJ_CHANGE, error_event);
                         if (retval >= 0)
                                 retval = -EMSGSIZE;
                 } else {
@@@ -4758,12 -4762,6 +4758,12 @@@ static void hub_port_connect(struct usb
                         goto done;
                 return;
         }
+ +
+ +      if (deny_new_usb) {
+ +              dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1);
+ +              goto done;
+ +      }
+ +
         if (hub_is_superspeed(hub->hdev))
                 unit_load = 150;
         else
@@@ -5556,7 -5554,7 +5556,7 @@@ re_enumerate_no_bos
   
   /**
    * usb_reset_device - warn interface drivers and perform a USB port reset
-  * @udev: device to reset (not in SUSPENDED or NOTATTACHED state)
+  * @udev: device to reset (not in NOTATTACHED state)
    *
    * Warns all drivers bound to registered interfaces (using their pre_reset
    * method), performs the port reset, and then lets the drivers know that
@@@ -5584,8 -5582,7 +5584,7 @@@ int usb_reset_device(struct usb_device 
         struct usb_host_config *config = udev->actconfig;
         struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
   
-       if (udev->state == USB_STATE_NOTATTACHED ||
-                       udev->state == USB_STATE_SUSPENDED) {
+       if (udev->state == USB_STATE_NOTATTACHED) {
                 dev_dbg(&udev->dev, "device reset not allowed in state %d\n",
                                 udev->state);
                 return -EINVAL;
diff --combined drivers/usb/gadget/configfs.c

index d39a3c4,d193e95..b168474
--- 1/drivers/usb/gadget/configfs.c
--- 2/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@@ -8,33 -8,6 +8,33 @@@
   #include "configfs.h"
   #include "u_f.h"
   #include "u_os_desc.h"
+ +#include "debug.h"
+ +
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +#include <linux/platform_device.h>
+ +#include <linux/kdev_t.h>
+ +#include <linux/usb/ch9.h>
+ +
+ +#ifdef CONFIG_USB_CONFIGFS_F_ACC
+ +extern int acc_ctrlrequest(struct usb_composite_dev *cdev,
+ +                              const struct usb_ctrlrequest *ctrl);
+ +void acc_disconnect(void);
+ +#endif
+ +static struct class *android_class;
+ +static struct device *android_device;
+ +static int index;
+ +static int gadget_index;
+ +
+ +struct device *create_function_device(char *name)
+ +{
+ +      if (android_device && !IS_ERR(android_device))
+ +              return device_create(android_class, android_device,
+ +                      MKDEV(0, index++), NULL, name);
+ +      else
+ +              return ERR_PTR(-EINVAL);
+ +}
+ +EXPORT_SYMBOL_GPL(create_function_device);
+ +#endif
   
   int check_user_usb_string(const char *name,
                 struct usb_gadget_strings *stringtab_dev)
@@@ -87,17 -60,10 +87,17 @@@ struct gadget_info 
         struct usb_composite_driver composite;
         struct usb_composite_dev cdev;
         bool use_os_desc;
+ +      bool unbinding;
         char b_vendor_code;
         char qw_sign[OS_STRING_QW_SIGN_LEN];
         spinlock_t spinlock;
         bool unbind;
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +      bool connected;
+ +      bool sw_connected;
+ +      struct work_struct work;
+ +      struct device *dev;
+ +#endif
   };
   
   static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@@ -145,28 -111,21 +145,28 @@@ struct gadget_config_name 
         struct list_head list;
   };
   
+ +#define MAX_USB_STRING_LEN    126
+ +#define MAX_USB_STRING_WITH_NULL_LEN  (MAX_USB_STRING_LEN+1)
+ +
   static int usb_string_copy(const char *s, char **s_copy)
   {
         int ret;
         char *str;
         char *copy = *s_copy;
         ret = strlen(s);
- -      if (ret > 126)
+ +      if (ret > MAX_USB_STRING_LEN)
                 return -EOVERFLOW;
   
- -      str = kstrdup(s, GFP_KERNEL);
- -      if (!str)
- -              return -ENOMEM;
+ +      if (copy) {
+ +              str = copy;
+ +      } else {
+ +              str = kmalloc(MAX_USB_STRING_WITH_NULL_LEN, GFP_KERNEL);
+ +              if (!str)
+ +                      return -ENOMEM;
+ +      }
+ +      strlcpy(str, s, MAX_USB_STRING_WITH_NULL_LEN);
         if (str[ret - 1] == '\n')
                 str[ret - 1] = '\0';
- -      kfree(copy);
         *s_copy = str;
         return 0;
   }
@@@ -286,11 -245,9 +286,11 @@@ static int unregister_gadget(struct gad
         if (!gi->udc_name)
                 return -ENODEV;
   
+ +      gi->unbinding = true;
         ret = usb_gadget_unregister_driver(&gi->composite.gadget_driver);
         if (ret)
                 return ret;
+ +      gi->unbinding = false;
         kfree(gi->udc_name);
         gi->udc_name = NULL;
         return 0;
@@@ -311,7 -268,7 +311,7 @@@ static ssize_t gadget_dev_desc_UDC_stor
   
         mutex_lock(&gi->lock);
   
- -      if (!strlen(name)) {
+ +      if (!strlen(name) || strcmp(name, "none") == 0) {
                 ret = unregister_gadget(gi);
                 if (ret)
                         goto err;
@@@ -1261,12 -1218,12 +1261,12 @@@ static void purge_configs_funcs(struct 
   
                 cfg = container_of(c, struct config_usb_cfg, c);
   
- -              list_for_each_entry_safe(f, tmp, &c->functions, list) {
+ +              list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) {
   
- -                      list_move_tail(&f->list, &cfg->func_list);
+ +                      list_move(&f->list, &cfg->func_list);
                         if (f->unbind) {
                                 dev_err(&gi->cdev.gadget->dev, "unbind function"
- -                                              " '%s'/%p\n", f->name, f);
+ +                                              " '%s'/%pK\n", f->name, f);
                                 f->unbind(c, f);
                         }
                 }
@@@ -1291,6 -1248,7 +1291,6 @@@ static int configfs_composite_bind(stru
         int                             ret;
   
         /* the gi->lock is hold by the caller */
- -      gi->unbind = 0;
         cdev->gadget = gadget;
         set_gadget_data(gadget, cdev);
         ret = composite_dev_prepare(composite, cdev);
@@@ -1419,269 -1377,139 +1419,269 @@@ err_comp_cleanup
         return ret;
   }
   
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +static void android_work(struct work_struct *data)
+ +{
+ +      struct gadget_info *gi = container_of(data, struct gadget_info, work);
+ +      struct usb_composite_dev *cdev = &gi->cdev;
+ +      char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL };
+ +      char *connected[2]    = { "USB_STATE=CONNECTED", NULL };
+ +      char *configured[2]   = { "USB_STATE=CONFIGURED", NULL };
+ +      /* 0-connected 1-configured 2-disconnected*/
+ +      bool status[3] = { false, false, false };
+ +      unsigned long flags;
+ +      bool uevent_sent = false;
+ +
+ +      spin_lock_irqsave(&cdev->lock, flags);
+ +      if (cdev->config)
+ +              status[1] = true;
+ +
+ +      if (gi->connected != gi->sw_connected) {
+ +              if (gi->connected)
+ +                      status[0] = true;
+ +              else
+ +                      status[2] = true;
+ +              gi->sw_connected = gi->connected;
+ +      }
+ +      spin_unlock_irqrestore(&cdev->lock, flags);
+ +
+ +      if (status[0]) {
+ +              kobject_uevent_env(&gi->dev->kobj,
+ +                                      KOBJ_CHANGE, connected);
+ +              pr_info("%s: sent uevent %s\n", __func__, connected[0]);
+ +              uevent_sent = true;
+ +      }
+ +
+ +      if (status[1]) {
+ +              kobject_uevent_env(&gi->dev->kobj,
+ +                                      KOBJ_CHANGE, configured);
+ +              pr_info("%s: sent uevent %s\n", __func__, configured[0]);
+ +              uevent_sent = true;
+ +      }
+ +
+ +      if (status[2]) {
+ +              kobject_uevent_env(&gi->dev->kobj,
+ +                                      KOBJ_CHANGE, disconnected);
+ +              pr_info("%s: sent uevent %s\n", __func__, disconnected[0]);
+ +              uevent_sent = true;
+ +      }
+ +
+ +      if (!uevent_sent) {
+ +              pr_info("%s: did not send uevent (%d %d %pK)\n", __func__,
+ +                      gi->connected, gi->sw_connected, cdev->config);
+ +      }
+ +}
+ +#endif
+ +
   static void configfs_composite_unbind(struct usb_gadget *gadget)
   {
         struct usb_composite_dev        *cdev;
         struct gadget_info              *gi;
- -      unsigned long flags;
   
         /* the gi->lock is hold by the caller */
   
         cdev = get_gadget_data(gadget);
         gi = container_of(cdev, struct gadget_info, cdev);
- -      spin_lock_irqsave(&gi->spinlock, flags);
- -      gi->unbind = 1;
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
   
         kfree(otg_desc[0]);
         otg_desc[0] = NULL;
         purge_configs_funcs(gi);
         composite_dev_cleanup(cdev);
         usb_ep_autoconfig_reset(cdev->gadget);
- -      spin_lock_irqsave(&gi->spinlock, flags);
         cdev->gadget = NULL;
         set_gadget_data(gadget, NULL);
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
   }
   
- -static int configfs_composite_setup(struct usb_gadget *gadget,
- -              const struct usb_ctrlrequest *ctrl)
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +static int android_setup(struct usb_gadget *gadget,
+ +                      const struct usb_ctrlrequest *c)
   {
- -      struct usb_composite_dev *cdev;
- -      struct gadget_info *gi;
+ +      struct usb_composite_dev *cdev = get_gadget_data(gadget);
         unsigned long flags;
- -      int ret;
+ +      struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+ +      int value = -EOPNOTSUPP;
+ +      struct usb_function_instance *fi;
   
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev)
- -              return 0;
+ +      spin_lock_irqsave(&cdev->lock, flags);
+ +      if (!gi->connected) {
+ +              gi->connected = 1;
+ +              schedule_work(&gi->work);
+ +      }
+ +      spin_unlock_irqrestore(&cdev->lock, flags);
+ +      list_for_each_entry(fi, &gi->available_func, cfs_list) {
+ +              if (fi != NULL && fi->f != NULL && fi->f->setup != NULL) {
+ +                      value = fi->f->setup(fi->f, c);
+ +                      if (value >= 0)
+ +                              break;
+ +              }
+ +      }
   
- -      gi = container_of(cdev, struct gadget_info, cdev);
- -      spin_lock_irqsave(&gi->spinlock, flags);
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev || gi->unbind) {
- -              spin_unlock_irqrestore(&gi->spinlock, flags);
- -              return 0;
+ +#ifdef CONFIG_USB_CONFIGFS_F_ACC
+ +      if (value < 0)
+ +              value = acc_ctrlrequest(cdev, c);
+ +#endif
+ +
+ +      if (value < 0)
+ +              value = composite_setup(gadget, c);
+ +
+ +      spin_lock_irqsave(&cdev->lock, flags);
+ +      if (c->bRequest == USB_REQ_SET_CONFIGURATION &&
+ +                                              cdev->config) {
+ +              schedule_work(&gi->work);
         }
+ +      spin_unlock_irqrestore(&cdev->lock, flags);
   
- -      ret = composite_setup(gadget, ctrl);
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
- -      return ret;
+ +      return value;
   }
   
- -static void configfs_composite_disconnect(struct usb_gadget *gadget)
+ +static void android_disconnect(struct usb_gadget *gadget)
   {
- -      struct usb_composite_dev *cdev;
+ +      struct usb_composite_dev        *cdev = get_gadget_data(gadget);
         struct gadget_info *gi;
- -      unsigned long flags;
   
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev)
+ +      if (!cdev) {
+ +              pr_err("%s: gadget is not connected\n", __func__);
                 return;
+ +      }
   
         gi = container_of(cdev, struct gadget_info, cdev);
- -      spin_lock_irqsave(&gi->spinlock, flags);
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev || gi->unbind) {
- -              spin_unlock_irqrestore(&gi->spinlock, flags);
+ +
+ +      /* FIXME: There's a race between usb_gadget_udc_stop() which is likely
+ +       * to set the gadget driver to NULL in the udc driver and this drivers
+ +       * gadget disconnect fn which likely checks for the gadget driver to
+ +       * be a null ptr. It happens that unbind (doing set_gadget_data(NULL))
+ +       * is called before the gadget driver is set to NULL and the udc driver
+ +       * calls disconnect fn which results in cdev being a null ptr.
+ +       */
+ +      if (cdev == NULL) {
+ +              WARN(1, "%s: gadget driver already disconnected\n", __func__);
                 return;
         }
   
+ +      /* accessory HID support can be active while the
+ +              accessory function is not actually enabled,
+ +              so we need to inform it when we are disconnected.
+ +      */
+ +
+ +#ifdef CONFIG_USB_CONFIGFS_F_ACC
+ +      acc_disconnect();
+ +#endif
+ +      gi->connected = 0;
+ +      if (!gi->unbinding)
+ +              schedule_work(&gi->work);
         composite_disconnect(gadget);
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
   }
+ +#endif
+ +
+ +static const struct usb_gadget_driver configfs_driver_template = {
+ +      .bind           = configfs_composite_bind,
+ +      .unbind         = configfs_composite_unbind,
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +      .setup          = android_setup,
+ +      .reset          = android_disconnect,
+ +      .disconnect     = android_disconnect,
+ +#else
+ +      .setup          = composite_setup,
+ +      .reset          = composite_disconnect,
+ +      .disconnect     = composite_disconnect,
+ +#endif
+ +      .suspend        = composite_suspend,
+ +      .resume         = composite_resume,
+ +
+ +      .max_speed      = USB_SPEED_SUPER,
+ +      .driver = {
+ +              .owner          = THIS_MODULE,
+ +              .name           = "configfs-gadget",
+ +      },
+ +};
   
- -static void configfs_composite_suspend(struct usb_gadget *gadget)
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +static ssize_t state_show(struct device *pdev, struct device_attribute *attr,
+ +                      char *buf)
   {
+ +      struct gadget_info *dev = dev_get_drvdata(pdev);
         struct usb_composite_dev *cdev;
- -      struct gadget_info *gi;
+ +      char *state = "DISCONNECTED";
         unsigned long flags;
   
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev)
- -              return;
+ +      if (!dev)
+ +              goto out;
   
- -      gi = container_of(cdev, struct gadget_info, cdev);
- -      spin_lock_irqsave(&gi->spinlock, flags);
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev || gi->unbind) {
- -              spin_unlock_irqrestore(&gi->spinlock, flags);
- -              return;
- -      }
+ +      cdev = &dev->cdev;
   
- -      composite_suspend(gadget);
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
+ +      if (!cdev)
+ +              goto out;
+ +
+ +      spin_lock_irqsave(&cdev->lock, flags);
+ +      if (cdev->config)
+ +              state = "CONFIGURED";
+ +      else if (dev->connected)
+ +              state = "CONNECTED";
+ +      spin_unlock_irqrestore(&cdev->lock, flags);
+ +out:
+ +      return sprintf(buf, "%s\n", state);
   }
   
- -static void configfs_composite_resume(struct usb_gadget *gadget)
- -{
- -      struct usb_composite_dev *cdev;
- -      struct gadget_info *gi;
- -      unsigned long flags;
+ +static DEVICE_ATTR(state, S_IRUGO, state_show, NULL);
   
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev)
- -              return;
+ +static struct device_attribute *android_usb_attributes[] = {
+ +      &dev_attr_state,
+ +      NULL
+ +};
   
- -      gi = container_of(cdev, struct gadget_info, cdev);
- -      spin_lock_irqsave(&gi->spinlock, flags);
- -      cdev = get_gadget_data(gadget);
- -      if (!cdev || gi->unbind) {
- -              spin_unlock_irqrestore(&gi->spinlock, flags);
- -              return;
+ +static int android_device_create(struct gadget_info *gi)
+ +{
+ +      struct device_attribute **attrs;
+ +      struct device_attribute *attr;
+ +      char str[10];
+ +
+ +      INIT_WORK(&gi->work, android_work);
+ +      snprintf(str, sizeof(str), "android%d", gadget_index - 1);
+ +      pr_debug("Creating android device %s\n", str);
+ +      gi->dev = device_create(android_class, NULL,
+ +                              MKDEV(0, 0), NULL, str);
+ +      if (IS_ERR(gi->dev))
+ +              return PTR_ERR(gi->dev);
+ +
+ +      dev_set_drvdata(gi->dev, gi);
+ +      if (gadget_index == 1)
+ +              android_device = gi->dev;
+ +
+ +      attrs = android_usb_attributes;
+ +      while ((attr = *attrs++)) {
+ +              int err;
+ +
+ +              err = device_create_file(gi->dev, attr);
+ +              if (err) {
+ +                      device_destroy(gi->dev->class,
+ +                                     gi->dev->devt);
+ +                      return err;
+ +              }
         }
   
- -      composite_resume(gadget);
- -      spin_unlock_irqrestore(&gi->spinlock, flags);
+ +      return 0;
   }
   
- -static const struct usb_gadget_driver configfs_driver_template = {
- -      .bind           = configfs_composite_bind,
- -      .unbind         = configfs_composite_unbind,
- -
- -      .setup          = configfs_composite_setup,
- -      .reset          = configfs_composite_disconnect,
- -      .disconnect     = configfs_composite_disconnect,
+ +static void android_device_destroy(struct device *dev)
+ +{
+ +      struct device_attribute **attrs;
+ +      struct device_attribute *attr;
   
- -      .suspend        = configfs_composite_suspend,
- -      .resume         = configfs_composite_resume,
+ +      attrs = android_usb_attributes;
+ +      while ((attr = *attrs++))
+ +              device_remove_file(dev, attr);
+ +      device_destroy(dev->class, dev->devt);
+ +}
+ +#else
+ +static inline int android_device_create(struct gadget_info *gi)
+ +{
+ +      return 0;
+ +}
   
- -      .max_speed      = USB_SPEED_SUPER,
- -      .driver = {
- -              .owner          = THIS_MODULE,
- -              .name           = "configfs-gadget",
- -      },
- -};
+ +static inline void android_device_destroy(struct device *dev)
+ +{
+ +}
+ +#endif
   
   static struct config_group *gadgets_make(
                 struct config_group *group,
@@@ -1692,6 -1520,7 +1692,6 @@@
         gi = kzalloc(sizeof(*gi), GFP_KERNEL);
         if (!gi)
                 return ERR_PTR(-ENOMEM);
- -
         gi->group.default_groups = gi->default_groups;
         gi->group.default_groups[0] = &gi->functions_group;
         gi->group.default_groups[1] = &gi->configs_group;
@@@ -1713,6 -1542,7 +1713,7 @@@
         gi->composite.resume = NULL;
         gi->composite.max_speed = USB_SPEED_SUPER;
   
+       spin_lock_init(&gi->spinlock);
         mutex_init(&gi->lock);
         INIT_LIST_HEAD(&gi->string_list);
         INIT_LIST_HEAD(&gi->available_func);
@@@ -1730,15 -1560,9 +1731,15 @@@
         if (!gi->composite.gadget_driver.function)
                 goto err;
   
+ +      gadget_index++;
+ +      pr_debug("Creating gadget index %d\n", gadget_index);
+ +      if (android_device_create(gi) < 0)
+ +              goto err;
+ +
         config_group_init_type_name(&gi->group, name,
                                 &gadget_root_type);
         return &gi->group;
+ +
   err:
         kfree(gi);
         return ERR_PTR(-ENOMEM);
@@@ -1746,14 -1570,7 +1747,14 @@@
   
   static void gadgets_drop(struct config_group *group, struct config_item *item)
   {
+ +      struct gadget_info *gi;
+ +
+ +      gi = container_of(to_config_group(item), struct gadget_info, group);
         config_item_put(item);
+ +      if (gi->dev) {
+ +              android_device_destroy(gi->dev);
+ +              gi->dev = NULL;
+ +      }
   }
   
   static struct configfs_group_operations gadgets_ops = {
@@@ -1780,7 -1597,6 +1781,7 @@@ void unregister_gadget_item(struct conf
   {
         struct gadget_info *gi = to_gadget_info(item);
   
+ +      /* to protect race with gadget_dev_desc_UDC_store*/
         mutex_lock(&gi->lock);
         unregister_gadget(gi);
         mutex_unlock(&gi->lock);
@@@ -1793,28 -1609,13 +1794,28 @@@ static int __init gadget_cfs_init(void
   
         config_group_init(&gadget_subsys.su_group);
   
+ +      debug_debugfs_init();
+ +
         ret = configfs_register_subsystem(&gadget_subsys);
+ +
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +      android_class = class_create(THIS_MODULE, "android_usb");
+ +      if (IS_ERR(android_class))
+ +              return PTR_ERR(android_class);
+ +#endif
+ +
         return ret;
   }
   module_init(gadget_cfs_init);
   
   static void __exit gadget_cfs_exit(void)
   {
+ +      debug_debugfs_exit();
         configfs_unregister_subsystem(&gadget_subsys);
+ +#ifdef CONFIG_USB_CONFIGFS_UEVENT
+ +      if (!IS_ERR(android_class))
+ +              class_destroy(android_class);
+ +#endif
+ +
   }
   module_exit(gadget_cfs_exit);
diff --combined drivers/usb/gadget/function/u_serial.c

index 6882c96,31e08bb..fa674f9
--- 1/drivers/usb/gadget/function/u_serial.c
--- 2/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@@ -4,7 -4,6 +4,7 @@@
    * Copyright (C) 2003 Al Borchers (alborchers@steinerpoint.com)
    * Copyright (C) 2008 David Brownell
    * Copyright (C) 2008 by Nokia Corporation
+ + * Copyright (c) 2013-2017 The Linux Foundation. All rights reserved.
    *
    * This code also borrows from usbserial.c, which is
    * Copyright (C) 1999 - 2002 Greg Kroah-Hartman (greg@kroah.com)
@@@ -28,8 -27,6 +28,8 @@@
   #include <linux/slab.h>
   #include <linux/export.h>
   #include <linux/module.h>
+ +#include <linux/debugfs.h>
+ +#include <linux/workqueue.h>
   
   #include "u_serial.h"
   
@@@ -80,13 -77,9 +80,13 @@@
    * next layer of buffering.  For TX that's a circular buffer; for RX
    * consider it a NOP.  A third layer is provided by the TTY code.
    */
- -#define QUEUE_SIZE            16
+ +#define TX_QUEUE_SIZE          8
+ +#define TX_BUF_SIZE            4096
   #define WRITE_BUF_SIZE                8192            /* TX only */
   
+ +#define RX_QUEUE_SIZE          8
+ +#define RX_BUF_SIZE            4096
+ +
   /* circular buffer */
   struct gs_buf {
         unsigned                buf_size;
@@@ -113,7 -106,7 +113,7 @@@ struct gs_port 
         int read_allocated;
         struct list_head        read_queue;
         unsigned                n_read;
- -      struct tasklet_struct   push;
+ +      struct work_struct      push;
   
         struct list_head        write_pool;
         int write_started;
@@@ -125,10 -118,6 +125,10 @@@
   
         /* REVISIT this state ... */
         struct usb_cdc_line_coding port_line_coding;    /* 8-N-1 etc */
+ +      unsigned long   nbytes_from_host;
+ +      unsigned long   nbytes_to_tty;
+ +      unsigned long   nbytes_from_tty;
+ +      unsigned long   nbytes_to_host;
   };
   
   static struct portmaster {
@@@ -136,7 -125,6 +136,7 @@@
         struct gs_port  *port;
   } ports[MAX_U_SERIAL_PORTS];
   
+ +static struct workqueue_struct *gserial_wq;
   #define GS_CLOSE_TIMEOUT              15              /* seconds */
   
   
@@@ -372,50 -360,26 +372,50 @@@ __releases(&port->port_lock
   __acquires(&port->port_lock)
   */
   {
- -      struct list_head        *pool = &port->write_pool;
+ +      struct list_head        *pool;
         struct usb_ep           *in;
         int                     status = 0;
+ +      static long             prev_len;
         bool                    do_tty_wake = false;
   
- -      if (!port->port_usb)
- -              return status;
+ +      if (!port || !port->port_usb) {
+ +              pr_err("Error - port or port->usb is NULL.");
+ +              return -EIO;
+ +      }
   
- -      in = port->port_usb->in;
+ +      pool = &port->write_pool;
+ +      in   = port->port_usb->in;
   
         while (!port->write_busy && !list_empty(pool)) {
                 struct usb_request      *req;
                 int                     len;
   
- -              if (port->write_started >= QUEUE_SIZE)
+ +              if (port->write_started >= TX_QUEUE_SIZE)
                         break;
   
                 req = list_entry(pool->next, struct usb_request, list);
- -              len = gs_send_packet(port, req->buf, in->maxpacket);
+ +              len = gs_send_packet(port, req->buf, TX_BUF_SIZE);
                 if (len == 0) {
+ +                      /* Queue zero length packet explicitly to make it
+ +                       * work with UDCs which don't support req->zero flag
+ +                       */
+ +                      if (prev_len && (prev_len % in->maxpacket == 0)) {
+ +                              req->length = 0;
+ +                              list_del(&req->list);
+ +                              spin_unlock(&port->port_lock);
+ +                              status = usb_ep_queue(in, req, GFP_ATOMIC);
+ +                              spin_lock(&port->port_lock);
+ +                              if (!port->port_usb) {
+ +                                      gs_free_req(in, req);
+ +                                      break;
+ +                              }
+ +                              if (status) {
+ +                                      printk(KERN_ERR "%s: %s err %d\n",
+ +                                              __func__, "queue", status);
+ +                                      list_add(&req->list, pool);
+ +                              }
+ +                              prev_len = 0;
+ +                      }
                         wake_up_interruptible(&port->drain_wait);
                         break;
                 }
@@@ -423,6 -387,7 +423,6 @@@
   
                 req->length = len;
                 list_del(&req->list);
- -              req->zero = (gs_buf_data_avail(&port->port_write_buf) == 0);
   
                 pr_vdebug("ttyGS%d: tx len=%d, 0x%02x 0x%02x 0x%02x ...\n",
                           port->port_num, len, *((u8 *)req->buf),
@@@ -440,16 -405,6 +440,16 @@@
                 status = usb_ep_queue(in, req, GFP_ATOMIC);
                 spin_lock(&port->port_lock);
                 port->write_busy = false;
+ +              /*
+ +               * If port_usb is NULL, gserial disconnect is called
+ +               * while the spinlock is dropped and all requests are
+ +               * freed. Free the current request here.
+ +               */
+ +              if (!port->port_usb) {
+ +                      do_tty_wake = false;
+ +                      gs_free_req(in, req);
+ +                      break;
+ +              }
   
                 if (status) {
                         pr_debug("%s: %s %s err %d\n",
@@@ -458,10 -413,11 +458,10 @@@
                         break;
                 }
   
- -              port->write_started++;
+ +              prev_len = req->length;
+ +              port->nbytes_from_tty += req->length;
   
- -              /* abort immediately after disconnect */
- -              if (!port->port_usb)
- -                      break;
+ +              port->write_started++;
         }
   
         if (do_tty_wake && port->port.tty)
@@@ -478,17 -434,8 +478,17 @@@ __releases(&port->port_lock
   __acquires(&port->port_lock)
   */
   {
- -      struct list_head        *pool = &port->read_pool;
- -      struct usb_ep           *out = port->port_usb->out;
+ +      struct list_head        *pool;
+ +      struct usb_ep           *out;
+ +      unsigned                started = 0;
+ +
+ +      if (!port || !port->port_usb) {
+ +              pr_err("Error - port or port->usb is NULL.");
+ +              return -EIO;
+ +      }
+ +
+ +      pool = &port->read_pool;
+ +      out  = port->port_usb->out;
   
         while (!list_empty(pool)) {
                 struct usb_request      *req;
@@@ -500,12 -447,12 +500,12 @@@
                 if (!tty)
                         break;
   
- -              if (port->read_started >= QUEUE_SIZE)
+ +              if (port->read_started >= RX_QUEUE_SIZE)
                         break;
   
                 req = list_entry(pool->next, struct usb_request, list);
                 list_del(&req->list);
- -              req->length = out->maxpacket;
+ +              req->length = RX_BUF_SIZE;
   
                 /* drop lock while we call out; the controller driver
                  * may need to call us back (e.g. for disconnect)
@@@ -514,17 -461,6 +514,17 @@@
                 status = usb_ep_queue(out, req, GFP_ATOMIC);
                 spin_lock(&port->port_lock);
   
+ +              /*
+ +               * If port_usb is NULL, gserial disconnect is called
+ +               * while the spinlock is dropped and all requests are
+ +               * freed. Free the current request here.
+ +               */
+ +              if (!port->port_usb) {
+ +                      started = 0;
+ +                      gs_free_req(out, req);
+ +                      break;
+ +              }
+ +
                 if (status) {
                         pr_debug("%s: %s %s err %d\n",
                                         __func__, "queue", out->name, status);
@@@ -532,6 -468,10 +532,6 @@@
                         break;
                 }
                 port->read_started++;
- -
- -              /* abort immediately after disconnect */
- -              if (!port->port_usb)
- -                      break;
         }
         return port->read_started;
   }
@@@ -546,9 -486,9 +546,9 @@@
    * So QUEUE_SIZE packets plus however many the FIFO holds (usually two)
    * can be buffered before the TTY layer's buffers (currently 64 KB).
    */
- -static void gs_rx_push(unsigned long _port)
+ +static void gs_rx_push(struct work_struct *w)
   {
- -      struct gs_port          *port = (void *)_port;
+ +      struct gs_port          *port = container_of(w, struct gs_port, push);
         struct tty_struct       *tty;
         struct list_head        *queue = &port->read_queue;
         bool                    disconnect = false;
@@@ -598,7 -538,6 +598,7 @@@
   
                         count = tty_insert_flip_string(&port->port, packet,
                                         size);
+ +                      port->nbytes_to_tty += count;
                         if (count)
                                 do_push = true;
                         if (count != size) {
@@@ -627,13 -566,13 +627,13 @@@
          * this time around, there may be trouble unless there's an
          * implicit tty_unthrottle() call on its way...
          *
- -       * REVISIT we should probably add a timer to keep the tasklet
+ +       * REVISIT we should probably add a timer to keep the work queue
          * from starving ... but it's not clear that case ever happens.
          */
         if (!list_empty(queue) && tty) {
                 if (!test_bit(TTY_THROTTLED, &tty->flags)) {
                         if (do_push)
- -                              tasklet_schedule(&port->push);
+ +                              queue_work(gserial_wq, &port->push);
                         else
                                 pr_warn("ttyGS%d: RX not scheduled?\n",
                                         port->port_num);
@@@ -650,23 -589,19 +650,23 @@@
   static void gs_read_complete(struct usb_ep *ep, struct usb_request *req)
   {
         struct gs_port  *port = ep->driver_data;
+ +      unsigned long flags;
   
         /* Queue all received data until the tty layer is ready for it. */
- -      spin_lock(&port->port_lock);
+ +      spin_lock_irqsave(&port->port_lock, flags);
+ +      port->nbytes_from_host += req->actual;
         list_add_tail(&req->list, &port->read_queue);
- -      tasklet_schedule(&port->push);
- -      spin_unlock(&port->port_lock);
+ +      queue_work(gserial_wq, &port->push);
+ +      spin_unlock_irqrestore(&port->port_lock, flags);
   }
   
   static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)
   {
         struct gs_port  *port = ep->driver_data;
+ +      unsigned long flags;
   
- -      spin_lock(&port->port_lock);
+ +      spin_lock_irqsave(&port->port_lock, flags);
+ +      port->nbytes_to_host += req->actual;
         list_add(&req->list, &port->write_pool);
         port->write_started--;
   
@@@ -678,8 -613,7 +678,8 @@@
                 /* FALL THROUGH */
         case 0:
                 /* normal completion */
- -              gs_start_tx(port);
+ +              if (port->port_usb)
+ +                      gs_start_tx(port);
                 break;
   
         case -ESHUTDOWN:
@@@ -688,7 -622,7 +688,7 @@@
                 break;
         }
   
- -      spin_unlock(&port->port_lock);
+ +      spin_unlock_irqrestore(&port->port_lock, flags);
   }
   
   static void gs_free_requests(struct usb_ep *ep, struct list_head *head,
@@@ -706,20 -640,19 +706,20 @@@
   }
   
   static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head,
+ +              int queue_size, int req_size,
                 void (*fn)(struct usb_ep *, struct usb_request *),
                 int *allocated)
   {
         int                     i;
         struct usb_request      *req;
- -      int n = allocated ? QUEUE_SIZE - *allocated : QUEUE_SIZE;
+ +      int n = allocated ? queue_size - *allocated : queue_size;
   
         /* Pre-allocate up to QUEUE_SIZE transfers, but if we can't
          * do quite that many this time, don't fail ... we just won't
          * be as speedy as we might otherwise be.
          */
         for (i = 0; i < n; i++) {
- -              req = gs_alloc_req(ep, ep->maxpacket, GFP_ATOMIC);
+ +              req = gs_alloc_req(ep, req_size, GFP_ATOMIC);
                 if (!req)
                         return list_empty(head) ? -ENOMEM : 0;
                 req->complete = fn;
@@@ -741,32 -674,23 +741,32 @@@
    */
   static int gs_start_io(struct gs_port *port)
   {
- -      struct list_head        *head = &port->read_pool;
- -      struct usb_ep           *ep = port->port_usb->out;
+ +      struct list_head        *head;
+ +      struct usb_ep           *ep;
         int                     status;
         unsigned                started;
   
+ +      if (!port || !port->port_usb) {
+ +              pr_err("Error - port or port->usb is NULL.");
+ +              return -EIO;
+ +      }
+ +
+ +      head = &port->read_pool;
+ +      ep = port->port_usb->out;
+ +
         /* Allocate RX and TX I/O buffers.  We can't easily do this much
          * earlier (with GFP_KERNEL) because the requests are coupled to
          * endpoints, as are the packet sizes we'll be using.  Different
          * configurations may use different endpoints with a given port;
          * and high speed vs full speed changes packet sizes too.
          */
- -      status = gs_alloc_requests(ep, head, gs_read_complete,
- -              &port->read_allocated);
+ +      status = gs_alloc_requests(ep, head, RX_QUEUE_SIZE, RX_BUF_SIZE,
+ +                      gs_read_complete, &port->read_allocated);
         if (status)
                 return status;
   
         status = gs_alloc_requests(port->port_usb->in, &port->write_pool,
+ +                      TX_QUEUE_SIZE, TX_BUF_SIZE,
                         gs_write_complete, &port->write_allocated);
         if (status) {
                 gs_free_requests(ep, head, &port->read_allocated);
@@@ -777,9 -701,6 +777,9 @@@
         port->n_read = 0;
         started = gs_start_rx(port);
   
+ +      if (!port->port_usb)
+ +              return -EIO;
+ +
         /* unblock any pending writes into our circular buffer */
         if (started) {
                 tty_wakeup(port->port.tty);
@@@ -862,7 -783,7 +862,7 @@@ static int gs_open(struct tty_struct *t
                 spin_lock_irq(&port->port_lock);
   
                 if (status) {
- -                      pr_debug("gs_open: ttyGS%d (%p,%p) no buffer\n",
+ +                      pr_debug("gs_open: ttyGS%d (%pK,%pK) no buffer\n",
                                 port->port_num, tty, file);
                         port->openclose = false;
                         goto exit_unlock_port;
@@@ -892,7 -813,7 +892,7 @@@
                         gser->connect(gser);
         }
   
- -      pr_debug("gs_open: ttyGS%d (%p,%p)\n", port->port_num, tty, file);
+ +      pr_debug("gs_open: ttyGS%d (%pK,%pK)\n", port->port_num, tty, file);
   
         status = 0;
   
@@@ -928,8 -849,7 +928,8 @@@ static void gs_close(struct tty_struct 
                 goto exit;
         }
   
- -      pr_debug("gs_close: ttyGS%d (%p,%p) ...\n", port->port_num, tty, file);
+ +      pr_debug("gs_close: ttyGS%d (%pK,%pK) ...\n",
+ +                      port->port_num, tty, file);
   
         /* mark port as closing but in use; we can drop port lock
          * and sleep if necessary
@@@ -955,7 -875,7 +955,7 @@@
   
         /* Iff we're disconnected, there can be no I/O in flight so it's
          * ok to free the circular buffer; else just scrub it.  And don't
- -       * let the push tasklet fire again until we're re-opened.
+ +       * let the push work queue fire again until we're re-opened.
          */
         if (gser == NULL)
                 gs_buf_free(&port->port_write_buf);
@@@ -966,7 -886,7 +966,7 @@@
   
         port->openclose = false;
   
- -      pr_debug("gs_close: ttyGS%d (%p,%p) done!\n",
+ +      pr_debug("gs_close: ttyGS%d (%pK,%pK) done!\n",
                         port->port_num, tty, file);
   
         wake_up(&port->close_wait);
@@@ -980,10 -900,7 +980,10 @@@ static int gs_write(struct tty_struct *
         unsigned long   flags;
         int             status;
   
- -      pr_vdebug("gs_write: ttyGS%d (%p) writing %d bytes\n",
+ +      if (!port)
+ +              return 0;
+ +
+ +      pr_vdebug("gs_write: ttyGS%d (%pK) writing %d bytes\n",
                         port->port_num, tty, count);
   
         spin_lock_irqsave(&port->port_lock, flags);
@@@ -1003,9 -920,7 +1003,9 @@@ static int gs_put_char(struct tty_struc
         unsigned long   flags;
         int             status;
   
- -      pr_vdebug("gs_put_char: (%d,%p) char=0x%x, called from %ps\n",
+ +      if (!port)
+ +              return 0;
+ +      pr_vdebug("gs_put_char: (%d,%pK) char=0x%x, called from %pKs\n",
                 port->port_num, tty, ch, __builtin_return_address(0));
   
         spin_lock_irqsave(&port->port_lock, flags);
@@@ -1020,9 -935,7 +1020,9 @@@ static void gs_flush_chars(struct tty_s
         struct gs_port  *port = tty->driver_data;
         unsigned long   flags;
   
- -      pr_vdebug("gs_flush_chars: (%d,%p)\n", port->port_num, tty);
+ +      if (!port)
+ +              return;
+ +      pr_vdebug("gs_flush_chars: (%d,%pK)\n", port->port_num, tty);
   
         spin_lock_irqsave(&port->port_lock, flags);
         if (port->port_usb)
@@@ -1036,14 -949,12 +1036,14 @@@ static int gs_write_room(struct tty_str
         unsigned long   flags;
         int             room = 0;
   
+ +      if (!port)
+ +              return 0;
         spin_lock_irqsave(&port->port_lock, flags);
         if (port->port_usb)
                 room = gs_buf_space_avail(&port->port_write_buf);
         spin_unlock_irqrestore(&port->port_lock, flags);
   
- -      pr_vdebug("gs_write_room: (%d,%p) room=%d\n",
+ +      pr_vdebug("gs_write_room: (%d,%pK) room=%d\n",
                 port->port_num, tty, room);
   
         return room;
@@@ -1059,7 -970,7 +1059,7 @@@ static int gs_chars_in_buffer(struct tt
         chars = gs_buf_data_avail(&port->port_write_buf);
         spin_unlock_irqrestore(&port->port_lock, flags);
   
- -      pr_vdebug("gs_chars_in_buffer: (%d,%p) chars=%d\n",
+ +      pr_vdebug("gs_chars_in_buffer: (%d,%pK) chars=%d\n",
                 port->port_num, tty, chars);
   
         return chars;
@@@ -1071,20 -982,13 +1071,20 @@@ static void gs_unthrottle(struct tty_st
         struct gs_port          *port = tty->driver_data;
         unsigned long           flags;
   
+ +      /*
+ +       * tty's driver data is set to NULL during port close.  Nothing
+ +       * to do here.
+ +       */
+ +      if (!port)
+ +              return;
+ +
         spin_lock_irqsave(&port->port_lock, flags);
         if (port->port_usb) {
                 /* Kickstart read queue processing.  We don't do xon/xoff,
                  * rts/cts, or other handshaking with the host, but if the
                  * read queue backs up enough we'll be NAKing OUT packets.
                  */
- -              tasklet_schedule(&port->push);
+ +              queue_work(gserial_wq, &port->push);
                 pr_vdebug("ttyGS%d: unthrottle\n", port->port_num);
         }
         spin_unlock_irqrestore(&port->port_lock, flags);
@@@ -1096,8 -1000,6 +1096,8 @@@ static int gs_break_ctl(struct tty_stru
         int             status = 0;
         struct gserial  *gser;
   
+ +      if (!port)
+ +              return 0;
         pr_vdebug("gs_break_ctl: ttyGS%d, send break (%d) \n",
                         port->port_num, duration);
   
@@@ -1110,83 -1012,6 +1110,83 @@@
         return status;
   }
   
+ +static int gs_tiocmget(struct tty_struct *tty)
+ +{
+ +      struct gs_port  *port = tty->driver_data;
+ +      struct gserial  *gser;
+ +      unsigned int result = 0;
+ +
+ +      spin_lock_irq(&port->port_lock);
+ +      gser = port->port_usb;
+ +      if (!gser) {
+ +              result = -ENODEV;
+ +              goto fail;
+ +      }
+ +
+ +      if (gser->get_dtr)
+ +              result |= (gser->get_dtr(gser) ? TIOCM_DTR : 0);
+ +
+ +      if (gser->get_rts)
+ +              result |= (gser->get_rts(gser) ? TIOCM_RTS : 0);
+ +
+ +      if (gser->serial_state & TIOCM_CD)
+ +              result |= TIOCM_CD;
+ +
+ +      if (gser->serial_state & TIOCM_RI)
+ +              result |= TIOCM_RI;
+ +
+ +fail:
+ +      spin_unlock_irq(&port->port_lock);
+ +      return result;
+ +}
+ +
+ +static int gs_tiocmset(struct tty_struct *tty,
+ +              unsigned int set, unsigned int clear)
+ +{
+ +      struct gs_port  *port = tty->driver_data;
+ +      struct gserial *gser;
+ +      int status = 0;
+ +
+ +      spin_lock_irq(&port->port_lock);
+ +      gser = port->port_usb;
+ +
+ +      if (!gser) {
+ +              status = -ENODEV;
+ +              goto fail;
+ +      }
+ +
+ +      if (set & TIOCM_RI) {
+ +              if (gser->send_ring_indicator) {
+ +                      gser->serial_state |= TIOCM_RI;
+ +                      status = gser->send_ring_indicator(gser, 1);
+ +              }
+ +      }
+ +
+ +      if (clear & TIOCM_RI) {
+ +              if (gser->send_ring_indicator) {
+ +                      gser->serial_state &= ~TIOCM_RI;
+ +                      status = gser->send_ring_indicator(gser, 0);
+ +              }
+ +      }
+ +
+ +      if (set & TIOCM_CD) {
+ +              if (gser->send_carrier_detect) {
+ +                      gser->serial_state |= TIOCM_CD;
+ +                      status = gser->send_carrier_detect(gser, 1);
+ +              }
+ +      }
+ +
+ +      if (clear & TIOCM_CD) {
+ +              if (gser->send_carrier_detect) {
+ +                      gser->serial_state &= ~TIOCM_CD;
+ +                      status = gser->send_carrier_detect(gser, 0);
+ +              }
+ +      }
+ +fail:
+ +      spin_unlock_irq(&port->port_lock);
+ +      return status;
+ +}
+ +
   static const struct tty_operations gs_tty_ops = {
         .open =                 gs_open,
         .close =                gs_close,
@@@ -1197,8 -1022,6 +1197,8 @@@
         .chars_in_buffer =      gs_chars_in_buffer,
         .unthrottle =           gs_unthrottle,
         .break_ctl =            gs_break_ctl,
+ +      .tiocmget =             gs_tiocmget,
+ +      .tiocmset =             gs_tiocmset,
   };
   
   /*-------------------------------------------------------------------------*/
@@@ -1228,7 -1051,7 +1228,7 @@@ gs_port_alloc(unsigned port_num, struc
         init_waitqueue_head(&port->drain_wait);
         init_waitqueue_head(&port->close_wait);
   
- -      tasklet_init(&port->push, gs_rx_push, (unsigned long) port);
+ +      INIT_WORK(&port->push, gs_rx_push);
   
         INIT_LIST_HEAD(&port->read_pool);
         INIT_LIST_HEAD(&port->read_queue);
@@@ -1243,129 -1066,6 +1243,129 @@@ out
         return ret;
   }
   
+ +#if defined(CONFIG_DEBUG_FS)
+ +
+ +#define BUF_SIZE      512
+ +
+ +static ssize_t debug_read_status(struct file *file, char __user *ubuf,
+ +                              size_t count, loff_t *ppos)
+ +{
+ +      struct gs_port *ui_dev = file->private_data;
+ +      struct tty_struct       *tty;
+ +      struct gserial          *gser;
+ +      char *buf;
+ +      unsigned long flags;
+ +      int i = 0;
+ +      int ret;
+ +      int result = 0;
+ +
+ +      if (!ui_dev)
+ +              return -EINVAL;
+ +
+ +      tty = ui_dev->port.tty;
+ +      gser = ui_dev->port_usb;
+ +
+ +      buf = kzalloc(sizeof(char) * BUF_SIZE, GFP_KERNEL);
+ +      if (!buf)
+ +              return -ENOMEM;
+ +
+ +      spin_lock_irqsave(&ui_dev->port_lock, flags);
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "nbytes_from_host: %lu\n", ui_dev->nbytes_from_host);
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "nbytes_to_tty: %lu\n", ui_dev->nbytes_to_tty);
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i, "nbytes_with_usb_OUT_txr: %lu\n",
+ +                      (ui_dev->nbytes_from_host - ui_dev->nbytes_to_tty));
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "nbytes_from_tty: %lu\n", ui_dev->nbytes_from_tty);
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "nbytes_to_host: %lu\n", ui_dev->nbytes_to_host);
+ +
+ +      i += scnprintf(buf + i, BUF_SIZE - i, "nbytes_with_usb_IN_txr: %lu\n",
+ +                      (ui_dev->nbytes_from_tty - ui_dev->nbytes_to_host));
+ +
+ +      if (tty)
+ +              i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "tty_flags: %lu\n", tty->flags);
+ +
+ +      if (gser->get_dtr) {
+ +              result |= (gser->get_dtr(gser) ? TIOCM_DTR : 0);
+ +              i += scnprintf(buf + i, BUF_SIZE - i,
+ +                      "DTR_status: %d\n", result);
+ +      }
+ +
+ +      spin_unlock_irqrestore(&ui_dev->port_lock, flags);
+ +      ret = simple_read_from_buffer(ubuf, count, ppos, buf, i);
+ +      kfree(buf);
+ +      return ret;
+ +}
+ +
+ +static ssize_t debug_write_reset(struct file *file, const char __user *buf,
+ +              size_t count, loff_t *ppos)
+ +{
+ +      struct gs_port *ui_dev = file->private_data;
+ +      unsigned long flags;
+ +
+ +      if (!ui_dev)
+ +              return -EINVAL;
+ +
+ +      spin_lock_irqsave(&ui_dev->port_lock, flags);
+ +      ui_dev->nbytes_from_host = ui_dev->nbytes_to_tty =
+ +              ui_dev->nbytes_from_tty = ui_dev->nbytes_to_host = 0;
+ +      spin_unlock_irqrestore(&ui_dev->port_lock, flags);
+ +
+ +      return count;
+ +}
+ +
+ +static int serial_debug_open(struct inode *inode, struct file *file)
+ +{
+ +      file->private_data = inode->i_private;
+ +      return 0;
+ +}
+ +
+ +const struct file_operations debug_rst_ops = {
+ +      .open = serial_debug_open,
+ +      .write = debug_write_reset,
+ +};
+ +
+ +const struct file_operations debug_adb_ops = {
+ +      .open = serial_debug_open,
+ +      .read = debug_read_status,
+ +};
+ +
+ +struct dentry *gs_dent;
+ +static void usb_debugfs_init(struct gs_port *ui_dev, int port_num)
+ +{
+ +      char buf[48];
+ +
+ +      if (!ui_dev)
+ +              return;
+ +
+ +      snprintf(buf, 48, "usb_serial%d", port_num);
+ +      gs_dent = debugfs_create_dir(buf, 0);
+ +      if (!gs_dent || IS_ERR(gs_dent))
+ +              return;
+ +
+ +      debugfs_create_file("readstatus", 0444, gs_dent, ui_dev,
+ +                      &debug_adb_ops);
+ +      debugfs_create_file("reset", S_IRUGO | S_IWUSR,
+ +                      gs_dent, ui_dev, &debug_rst_ops);
+ +}
+ +
+ +static void usb_debugfs_remove(void)
+ +{
+ +      debugfs_remove_recursive(gs_dent);
+ +}
+ +#else
+ +static inline void usb_debugfs_init(struct gs_port *ui_dev, int port_num) {}
+ +static inline void usb_debugfs_remove(void) {}
+ +#endif
+ +
   static int gs_closed(struct gs_port *port)
   {
         int cond;
@@@ -1378,7 -1078,7 +1378,7 @@@
   
   static void gserial_free_port(struct gs_port *port)
   {
- -      tasklet_kill(&port->push);
+ +      cancel_work_sync(&port->push);
         /* wait for old opens to finish */
         wait_event(port->close_wait, gs_closed(port));
         WARN_ON(port->port_usb != NULL);
@@@ -1437,8 -1137,10 +1437,10 @@@ int gserial_alloc_line(unsigned char *l
                                 __func__, port_num, PTR_ERR(tty_dev));
   
                 ret = PTR_ERR(tty_dev);
+               mutex_lock(&ports[port_num].lock);
                 port = ports[port_num].port;
                 ports[port_num].port = NULL;
+               mutex_unlock(&ports[port_num].lock);
                 gserial_free_port(port);
                 goto err;
         }
@@@ -1582,9 -1284,6 +1584,9 @@@ void gserial_disconnect(struct gserial 
         port->read_allocated = port->read_started =
                 port->write_allocated = port->write_started = 0;
   
+ +      port->nbytes_from_host = port->nbytes_to_tty =
+ +              port->nbytes_from_tty = port->nbytes_to_host = 0;
+ +
         spin_unlock_irqrestore(&port->port_lock, flags);
   }
   EXPORT_SYMBOL_GPL(gserial_disconnect);
@@@ -1604,8 -1303,7 +1606,8 @@@ static int userial_init(void
   
         gs_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
         gs_tty_driver->subtype = SERIAL_TYPE_NORMAL;
- -      gs_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+ +      gs_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV
+ +                              | TTY_DRIVER_RESET_TERMIOS;
         gs_tty_driver->init_termios = tty_std_termios;
   
         /* 9600-8-N-1 ... matches defaults expected by "usbser.sys" on
@@@ -1621,12 -1319,6 +1623,12 @@@
         for (i = 0; i < MAX_U_SERIAL_PORTS; i++)
                 mutex_init(&ports[i].lock);
   
+ +      gserial_wq = create_singlethread_workqueue("k_gserial");
+ +      if (!gserial_wq) {
+ +              status = -ENOMEM;
+ +              goto fail;
+ +      }
+ +
         /* export the driver ... */
         status = tty_register_driver(gs_tty_driver);
         if (status) {
@@@ -1635,9 -1327,6 +1637,9 @@@
                 goto fail;
         }
   
+ +      for (i = 0; i < MAX_U_SERIAL_PORTS; i++)
+ +              usb_debugfs_init(ports[i].port, i);
+ +
         pr_debug("%s: registered %d ttyGS* device%s\n", __func__,
                         MAX_U_SERIAL_PORTS,
                         (MAX_U_SERIAL_PORTS == 1) ? "" : "s");
@@@ -1645,8 -1334,6 +1647,8 @@@
         return status;
   fail:
         put_tty_driver(gs_tty_driver);
+ +      if (gserial_wq)
+ +              destroy_workqueue(gserial_wq);
         gs_tty_driver = NULL;
         return status;
   }
@@@ -1654,8 -1341,6 +1656,8 @@@ module_init(userial_init)
   
   static void userial_cleanup(void)
   {
+ +      usb_debugfs_remove();
+ +      destroy_workqueue(gserial_wq);
         tty_unregister_driver(gs_tty_driver);
         put_tty_driver(gs_tty_driver);
         gs_tty_driver = NULL;
diff --combined drivers/usb/host/xhci-hub.c

index 08bcc1f,40c95ed..319e423
--- 1/drivers/usb/host/xhci-hub.c
--- 2/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@@ -20,7 -20,7 +20,7 @@@
    * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    */
   
- -
+ +#include <linux/gfp.h>
   #include <linux/slab.h>
   #include <asm/unaligned.h>
   
@@@ -376,6 -376,10 +376,6 @@@ static int xhci_stop_device(struct xhci
         int i;
   
         ret = 0;
- -      virt_dev = xhci->devs[slot_id];
- -      if (!virt_dev)
- -              return -ENODEV;
- -
         cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
         if (!cmd) {
                 xhci_dbg(xhci, "Couldn't allocate command structure.\n");
@@@ -383,13 -387,6 +383,13 @@@
         }
   
         spin_lock_irqsave(&xhci->lock, flags);
+ +      virt_dev = xhci->devs[slot_id];
+ +      if (!virt_dev) {
+ +              spin_unlock_irqrestore(&xhci->lock, flags);
+ +              xhci_free_command(xhci, cmd);
+ +              return -ENODEV;
+ +      }
+ +
         for (i = LAST_EP_INDEX; i > 0; i--) {
                 if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue) {
                         struct xhci_command *command;
@@@ -707,7 -704,7 +707,7 @@@ static u32 xhci_get_port_status(struct 
                 struct xhci_bus_state *bus_state,
                 __le32 __iomem **port_array,
                 u16 wIndex, u32 raw_port_status,
-               unsigned long flags)
+               unsigned long *flags)
         __releases(&xhci->lock)
         __acquires(&xhci->lock)
   {
@@@ -739,6 -736,14 +739,14 @@@
                         status |= USB_PORT_STAT_C_BH_RESET << 16;
                 if ((raw_port_status & PORT_CEC))
                         status |= USB_PORT_STAT_C_CONFIG_ERROR << 16;
+ 
+               /* USB3 remote wake resume signaling completed */
+               if (bus_state->port_remote_wakeup & (1 << wIndex) &&
+                   (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME &&
+                   (raw_port_status & PORT_PLS_MASK) != XDEV_RECOVERY) {
+                       bus_state->port_remote_wakeup &= ~(1 << wIndex);
+                       usb_hcd_end_port_resume(&hcd->self, wIndex);
+               }
         }
   
         if (hcd->speed < HCD_USB3) {
@@@ -789,12 -794,12 +797,12 @@@
                         xhci_set_link_state(xhci, port_array, wIndex,
                                         XDEV_U0);
   
-                       spin_unlock_irqrestore(&xhci->lock, flags);
+                       spin_unlock_irqrestore(&xhci->lock, *flags);
                         time_left = wait_for_completion_timeout(
                                         &bus_state->rexit_done[wIndex],
                                         msecs_to_jiffies(
                                                 XHCI_MAX_REXIT_TIMEOUT_MS));
-                       spin_lock_irqsave(&xhci->lock, flags);
+                       spin_lock_irqsave(&xhci->lock, *flags);
   
                         if (time_left) {
                                 slot_id = xhci_find_slot_id_by_port(hcd,
@@@ -881,151 -886,6 +889,151 @@@
         return status;
   }
   
+ +static void xhci_single_step_completion(struct urb *urb)
+ +{
+ +      struct completion *done = urb->context;
+ +
+ +      complete(done);
+ +}
+ +
+ +/*
+ + * Allocate a URB and initialize the various fields of it.
+ + * This API is used by the single_step_set_feature test of
+ + * EHSET where IN packet of the GetDescriptor request is
+ + * sent 15secs after the SETUP packet.
+ + * Return NULL if failed.
+ + */
+ +static struct urb *xhci_request_single_step_set_feature_urb(
+ +              struct usb_device *udev,
+ +              void *dr,
+ +              void *buf,
+ +              struct completion *done)
+ +{
+ +      struct urb *urb;
+ +      struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+ +      struct usb_host_endpoint *ep;
+ +
+ +      urb = usb_alloc_urb(0, GFP_KERNEL);
+ +      if (!urb)
+ +              return NULL;
+ +
+ +      urb->pipe = usb_rcvctrlpipe(udev, 0);
+ +      ep = udev->ep_in[usb_pipeendpoint(urb->pipe)];
+ +      if (!ep) {
+ +              usb_free_urb(urb);
+ +              return NULL;
+ +      }
+ +
+ +      /*
+ +       * Initialize the various URB fields as these are used by the HCD
+ +       * driver to queue it and as well as when completion happens.
+ +       */
+ +      urb->ep = ep;
+ +      urb->dev = udev;
+ +      urb->setup_packet = dr;
+ +      urb->transfer_buffer = buf;
+ +      urb->transfer_buffer_length = USB_DT_DEVICE_SIZE;
+ +      urb->complete = xhci_single_step_completion;
+ +      urb->status = -EINPROGRESS;
+ +      urb->actual_length = 0;
+ +      urb->transfer_flags = URB_DIR_IN;
+ +      usb_get_urb(urb);
+ +      atomic_inc(&urb->use_count);
+ +      atomic_inc(&urb->dev->urbnum);
+ +      usb_hcd_map_urb_for_dma(hcd, urb, GFP_KERNEL);
+ +      urb->context = done;
+ +      return urb;
+ +}
+ +
+ +/*
+ + * This function implements the USB_PORT_FEAT_TEST handling of the
+ + * SINGLE_STEP_SET_FEATURE test mode as defined in the Embedded
+ + * High-Speed Electrical Test (EHSET) specification. This simply
+ + * issues a GetDescriptor control transfer, with an inserted 15-second
+ + * delay after the end of the SETUP stage and before the IN token of
+ + * the DATA stage is set. The idea is that this gives the test operator
+ + * enough time to configure the oscilloscope to perform a measurement
+ + * of the response time between the DATA and ACK packets that follow.
+ + */
+ +static int xhci_ehset_single_step_set_feature(struct usb_hcd *hcd, int port)
+ +{
+ +      int retval;
+ +      struct usb_ctrlrequest *dr;
+ +      struct urb *urb;
+ +      struct usb_device *udev;
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +      struct usb_device_descriptor *buf;
+ +      unsigned long flags;
+ +      DECLARE_COMPLETION_ONSTACK(done);
+ +
+ +      /* Obtain udev of the rhub's child port */
+ +      udev = usb_hub_find_child(hcd->self.root_hub, port);
+ +      if (!udev) {
+ +              xhci_err(xhci, "No device attached to the RootHub\n");
+ +              return -ENODEV;
+ +      }
+ +      buf = kmalloc(USB_DT_DEVICE_SIZE, GFP_KERNEL);
+ +      if (!buf)
+ +              return -ENOMEM;
+ +
+ +      dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
+ +      if (!dr) {
+ +              kfree(buf);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      /* Fill Setup packet for GetDescriptor */
+ +      dr->bRequestType = USB_DIR_IN;
+ +      dr->bRequest = USB_REQ_GET_DESCRIPTOR;
+ +      dr->wValue = cpu_to_le16(USB_DT_DEVICE << 8);
+ +      dr->wIndex = 0;
+ +      dr->wLength = cpu_to_le16(USB_DT_DEVICE_SIZE);
+ +      urb = xhci_request_single_step_set_feature_urb(udev, dr, buf, &done);
+ +      if (!urb) {
+ +              retval = -ENOMEM;
+ +              goto cleanup;
+ +      }
+ +
+ +      /* Now complete just the SETUP stage */
+ +      spin_lock_irqsave(&xhci->lock, flags);
+ +      retval = xhci_submit_single_step_set_feature(hcd, urb, 1);
+ +      spin_unlock_irqrestore(&xhci->lock, flags);
+ +      if (retval)
+ +              goto out1;
+ +
+ +      if (!wait_for_completion_timeout(&done, msecs_to_jiffies(2000))) {
+ +              usb_kill_urb(urb);
+ +              retval = -ETIMEDOUT;
+ +              xhci_err(xhci, "%s SETUP stage timed out on ep0\n", __func__);
+ +              goto out1;
+ +      }
+ +
+ +      /* Sleep for 15 seconds; HC will send SOFs during this period */
+ +      msleep(15 * 1000);
+ +
+ +      /* Complete remaining DATA and status stages. Re-use same URB */
+ +      urb->status = -EINPROGRESS;
+ +      usb_get_urb(urb);
+ +      atomic_inc(&urb->use_count);
+ +      atomic_inc(&urb->dev->urbnum);
+ +
+ +      spin_lock_irqsave(&xhci->lock, flags);
+ +      retval = xhci_submit_single_step_set_feature(hcd, urb, 0);
+ +      spin_unlock_irqrestore(&xhci->lock, flags);
+ +      if (!retval && !wait_for_completion_timeout(&done,
+ +                                              msecs_to_jiffies(2000))) {
+ +              usb_kill_urb(urb);
+ +              retval = -ETIMEDOUT;
+ +              xhci_err(xhci, "%s IN stage timed out on ep0\n", __func__);
+ +      }
+ +out1:
+ +      usb_free_urb(urb);
+ +cleanup:
+ +      kfree(dr);
+ +      kfree(buf);
+ +      return retval;
+ +}
+ +
   int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                 u16 wIndex, char *buf, u16 wLength)
   {
@@@ -1040,7 -900,6 +1048,7 @@@
         u16 link_state = 0;
         u16 wake_mask = 0;
         u16 timeout = 0;
+ +      u16 test_mode = 0;
   
         max_ports = xhci_get_ports(hcd, &port_array);
         bus_state = &xhci->bus_state[hcd_index(hcd)];
@@@ -1086,7 -945,7 +1094,7 @@@
                         break;
                 }
                 status = xhci_get_port_status(hcd, bus_state, port_array,
-                               wIndex, temp, flags);
+                               wIndex, temp, &flags);
                 if (status == 0xffffffff)
                         goto error;
   
@@@ -1114,8 -973,8 +1122,8 @@@
                         link_state = (wIndex & 0xff00) >> 3;
                 if (wValue == USB_PORT_FEAT_REMOTE_WAKE_MASK)
                         wake_mask = wIndex & 0xff00;
- -              /* The MSB of wIndex is the U1/U2 timeout */
- -              timeout = (wIndex & 0xff00) >> 8;
+ +              /* The MSB of wIndex is the U1/U2 timeout OR TEST mode*/
+ +              test_mode = timeout = (wIndex & 0xff00) >> 8;
                 wIndex &= 0xff;
                 if (!wIndex || wIndex > max_ports)
                         goto error;
@@@ -1197,40 -1056,6 +1205,40 @@@
                                 temp = readl(port_array[wIndex]);
                                 break;
                         }
+ +
+ +                      /*
+ +                       * For xHCI 1.1 according to section 4.19.1.2.4.1 a
+ +                       * root hub port's transition to compliance mode upon
+ +                       * detecting LFPS timeout may be controlled by an
+ +                       * Compliance Transition Enabled (CTE) flag (not
+ +                       * software visible). This flag is set by writing 0xA
+ +                       * to PORTSC PLS field which will allow transition to
+ +                       * compliance mode the next time LFPS timeout is
+ +                       * encountered. A warm reset will clear it.
+ +                       *
+ +                       * The CTE flag is only supported if the HCCPARAMS2 CTC
+ +                       * flag is set, otherwise, the compliance substate is
+ +                       * automatically entered as on 1.0 and prior.
+ +                       */
+ +                      if (link_state == USB_SS_PORT_LS_COMP_MOD) {
+ +                              if (!HCC2_CTC(xhci->hcc_params2)) {
+ +                                      xhci_dbg(xhci, "CTC flag is 0, port already supports entering compliance mode\n");
+ +                                      break;
+ +                              }
+ +
+ +                              if ((temp & PORT_CONNECT)) {
+ +                                      xhci_warn(xhci, "Can't set compliance mode when port is connected\n");
+ +                                      goto error;
+ +                              }
+ +
+ +                              xhci_dbg(xhci, "Enable compliance mode transition for port %d\n",
+ +                                              wIndex);
+ +                              xhci_set_link_state(xhci, port_array, wIndex,
+ +                                              link_state);
+ +                              temp = readl(port_array[wIndex]);
+ +                              break;
+ +                      }
+ +
                         /* Port must be enabled */
                         if (!(temp & PORT_PE)) {
                                 retval = -ENODEV;
@@@ -1323,32 -1148,6 +1331,32 @@@
                         temp |= PORT_U2_TIMEOUT(timeout);
                         writel(temp, port_array[wIndex] + PORTPMSC);
                         break;
+ +              case USB_PORT_FEAT_TEST:
+ +                      slot_id = xhci_find_slot_id_by_port(hcd, xhci,
+ +                                                      wIndex + 1);
+ +                      if (test_mode && test_mode <= 5) {
+ +                              /* unlock to execute stop endpoint commands */
+ +                              spin_unlock_irqrestore(&xhci->lock, flags);
+ +                              xhci_stop_device(xhci, slot_id, 1);
+ +                              spin_lock_irqsave(&xhci->lock, flags);
+ +                              xhci_halt(xhci);
+ +
+ +                              temp = readl_relaxed(port_array[wIndex] +
+ +                                                              PORTPMSC);
+ +                              temp |= test_mode << 28;
+ +                              writel_relaxed(temp, port_array[wIndex] +
+ +                                                              PORTPMSC);
+ +                              /* to make sure above write goes through */
+ +                              mb();
+ +                      } else if (test_mode == 6) {
+ +                              spin_unlock_irqrestore(&xhci->lock, flags);
+ +                              retval = xhci_ehset_single_step_set_feature(hcd,
+ +                                                                      wIndex);
+ +                              spin_lock_irqsave(&xhci->lock, flags);
+ +                      } else {
+ +                              goto error;
+ +                      }
+ +                      break;
                 default:
                         goto error;
                 }
@@@ -1381,7 -1180,7 +1389,7 @@@
                                 xhci_set_link_state(xhci, port_array, wIndex,
                                                         XDEV_RESUME);
                                 spin_unlock_irqrestore(&xhci->lock, flags);
- -                              msleep(USB_RESUME_TIMEOUT);
+ +                              usleep_range(21000, 21500);
                                 spin_lock_irqsave(&xhci->lock, flags);
                                 xhci_set_link_state(xhci, port_array, wIndex,
                                                         XDEV_U0);
@@@ -1695,7 -1494,7 +1703,7 @@@ int xhci_bus_resume(struct usb_hcd *hcd
   
         if (need_usb2_u3_exit) {
                 spin_unlock_irqrestore(&xhci->lock, flags);
- -              msleep(USB_RESUME_TIMEOUT);
+ +              usleep_range(21000, 21500);
                 spin_lock_irqsave(&xhci->lock, flags);
         }
   
diff --combined drivers/usb/host/xhci-mem.c

index a6f886e,f274e7e..cfd163c
--- 1/drivers/usb/host/xhci-mem.c
--- 2/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@@ -1064,7 -1064,7 +1064,7 @@@ int xhci_alloc_virt_device(struct xhci_
   
         /* Point to output device context in dcbaa. */
         xhci->dcbaa->dev_context_ptrs[slot_id] = cpu_to_le64(dev->out_ctx->dma);
- -      xhci_dbg(xhci, "Set slot id %d dcbaa entry %p to 0x%llx\n",
+ +      xhci_dbg(xhci, "Set slot id %d dcbaa entry %pK to 0x%llx\n",
                  slot_id,
                  &xhci->dcbaa->dev_context_ptrs[slot_id],
                  le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id]));
@@@ -1235,7 -1235,7 +1235,7 @@@ int xhci_setup_addressable_virt_dev(str
                 if (udev->tt->multi)
                         slot_ctx->dev_info |= cpu_to_le32(DEV_MTT);
         }
- -      xhci_dbg(xhci, "udev->tt = %p\n", udev->tt);
+ +      xhci_dbg(xhci, "udev->tt = %pK\n", udev->tt);
         xhci_dbg(xhci, "udev->ttport = 0x%x\n", udev->ttport);
   
         /* Step 4 - ring already allocated */
@@@ -1527,8 -1527,6 +1527,8 @@@ int xhci_endpoint_init(struct xhci_hcd 
                 }
                 break;
         case USB_SPEED_FULL:
+ +              if (usb_endpoint_xfer_bulk(&ep->desc) && max_packet < 8)
+ +                      max_packet = 8;
         case USB_SPEED_LOW:
                 break;
         default:
@@@ -1842,151 -1840,25 +1842,151 @@@ void xhci_free_command(struct xhci_hcd 
         kfree(command);
   }
   
- -void xhci_mem_cleanup(struct xhci_hcd *xhci)
+ +void xhci_handle_sec_intr_events(struct xhci_hcd *xhci, int intr_num)
   {
+ +      union xhci_trb *erdp_trb, *current_trb;
+ +      struct xhci_segment     *seg;
+ +      u64 erdp_reg;
+ +      u32 iman_reg;
+ +      dma_addr_t deq;
+ +      unsigned long segment_offset;
+ +
+ +      /* disable irq, ack pending interrupt and ack all pending events */
+ +
+ +      iman_reg =
+ +              readl_relaxed(&xhci->sec_ir_set[intr_num]->irq_pending);
+ +      iman_reg &= ~IMAN_IE;
+ +      writel_relaxed(iman_reg,
+ +                      &xhci->sec_ir_set[intr_num]->irq_pending);
+ +      iman_reg =
+ +              readl_relaxed(&xhci->sec_ir_set[intr_num]->irq_pending);
+ +      if (iman_reg & IMAN_IP)
+ +              writel_relaxed(iman_reg,
+ +                      &xhci->sec_ir_set[intr_num]->irq_pending);
+ +
+ +      /* last acked event trb is in erdp reg  */
+ +      erdp_reg =
+ +              xhci_read_64(xhci, &xhci->sec_ir_set[intr_num]->erst_dequeue);
+ +      deq = (dma_addr_t)(erdp_reg & ~ERST_PTR_MASK);
+ +      if (!deq) {
+ +              pr_debug("%s: event ring handling not required\n", __func__);
+ +              return;
+ +      }
+ +
+ +      seg = xhci->sec_event_ring[intr_num]->first_seg;
+ +      segment_offset = deq - seg->dma;
+ +
+ +      /* find out virtual address of the last acked event trb */
+ +      erdp_trb = current_trb = &seg->trbs[0] +
+ +                              (segment_offset/sizeof(*current_trb));
+ +
+ +      /* read cycle state of the last acked trb to find out CCS */
+ +      xhci->sec_event_ring[intr_num]->cycle_state =
+ +                              (current_trb->event_cmd.flags & TRB_CYCLE);
+ +
+ +       while (1) {
+ +              /* last trb of the event ring: toggle cycle state */
+ +              if (current_trb == &seg->trbs[TRBS_PER_SEGMENT - 1]) {
+ +                      xhci->sec_event_ring[intr_num]->cycle_state ^= 1;
+ +                      current_trb = &seg->trbs[0];
+ +              } else {
+ +                      current_trb++;
+ +              }
+ +
+ +              /* cycle state transition */
+ +              if ((le32_to_cpu(current_trb->event_cmd.flags) & TRB_CYCLE) !=
+ +                  xhci->sec_event_ring[intr_num]->cycle_state)
+ +                      break;
+ +      }
+ +
+ +      if (erdp_trb != current_trb) {
+ +              deq =
+ +              xhci_trb_virt_to_dma(xhci->sec_event_ring[intr_num]->deq_seg,
+ +                                      current_trb);
+ +              if (deq == 0)
+ +                      xhci_warn(xhci,
+ +                              "WARN ivalid SW event ring dequeue ptr.\n");
+ +              /* Update HC event ring dequeue pointer */
+ +              erdp_reg &= ERST_PTR_MASK;
+ +              erdp_reg |= ((u64) deq & (u64) ~ERST_PTR_MASK);
+ +      }
+ +
+ +      /* Clear the event handler busy flag (RW1C); event ring is empty. */
+ +      erdp_reg |= ERST_EHB;
+ +      xhci_write_64(xhci, erdp_reg,
+ +                      &xhci->sec_ir_set[intr_num]->erst_dequeue);
+ +}
+ +
+ +int xhci_sec_event_ring_cleanup(struct usb_hcd *hcd, unsigned intr_num)
+ +{
+ +      int size;
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
         struct device   *dev = xhci_to_hcd(xhci)->self.controller;
+ +
+ +      if (intr_num >= xhci->max_interrupters) {
+ +              xhci_err(xhci, "invalid secondary interrupter num %d\n",
+ +                      intr_num);
+ +              return -EINVAL;
+ +      }
+ +
+ +      size =
+ +      sizeof(struct xhci_erst_entry)*(xhci->sec_erst[intr_num].num_entries);
+ +      if (xhci->sec_erst[intr_num].entries) {
+ +              xhci_handle_sec_intr_events(xhci, intr_num);
+ +              dma_free_coherent(dev, size, xhci->sec_erst[intr_num].entries,
+ +                              xhci->sec_erst[intr_num].erst_dma_addr);
+ +              xhci->sec_erst[intr_num].entries = NULL;
+ +      }
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed SEC ERST#%d",
+ +              intr_num);
+ +      if (xhci->sec_event_ring[intr_num])
+ +              xhci_ring_free(xhci, xhci->sec_event_ring[intr_num]);
+ +
+ +      xhci->sec_event_ring[intr_num] = NULL;
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "Freed sec event ring");
+ +
+ +      return 0;
+ +}
+ +
+ +void xhci_event_ring_cleanup(struct xhci_hcd *xhci)
+ +{
         int size;
- -      int i, j, num_ports;
+ +      unsigned int i;
+ +      struct device   *dev = xhci_to_hcd(xhci)->self.controller;
   
- -      cancel_delayed_work_sync(&xhci->cmd_timer);
+ +      /* sec event ring clean up */
+ +      for (i = 1; i < xhci->max_interrupters; i++)
+ +              xhci_sec_event_ring_cleanup(xhci_to_hcd(xhci), i);
   
- -      /* Free the Event Ring Segment Table and the actual Event Ring */
+ +      kfree(xhci->sec_ir_set);
+ +      xhci->sec_ir_set = NULL;
+ +      kfree(xhci->sec_erst);
+ +      xhci->sec_erst = NULL;
+ +      kfree(xhci->sec_event_ring);
+ +      xhci->sec_event_ring = NULL;
+ +
+ +      /* primary event ring clean up */
         size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries);
         if (xhci->erst.entries)
                 dma_free_coherent(dev, size,
                                 xhci->erst.entries, xhci->erst.erst_dma_addr);
         xhci->erst.entries = NULL;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed ERST");
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed primary ERST");
         if (xhci->event_ring)
                 xhci_ring_free(xhci, xhci->event_ring);
         xhci->event_ring = NULL;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed event ring");
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed priamry event ring");
+ +}
+ +
+ +void xhci_mem_cleanup(struct xhci_hcd *xhci)
+ +{
+ +      struct device   *dev = xhci_to_hcd(xhci)->self.controller;
+ +      int i, j, num_ports;
+ +
+ +      cancel_delayed_work_sync(&xhci->cmd_timer);
+ +
+ +      xhci_event_ring_cleanup(xhci);
   
         if (xhci->lpm_command)
                 xhci_free_command(xhci, xhci->lpm_command);
@@@ -2056,10 -1928,14 +2056,14 @@@ no_bw
         kfree(xhci->port_array);
         kfree(xhci->rh_bw);
         kfree(xhci->ext_caps);
+       kfree(xhci->usb2_rhub.psi);
+       kfree(xhci->usb3_rhub.psi);
   
         xhci->usb2_ports = NULL;
         xhci->usb3_ports = NULL;
         xhci->port_array = NULL;
+       xhci->usb2_rhub.psi = NULL;
+       xhci->usb3_rhub.psi = NULL;
         xhci->rh_bw = NULL;
         xhci->ext_caps = NULL;
   
@@@ -2088,15 -1964,15 +2092,15 @@@ static int xhci_test_trb_in_td(struct x
         if (seg != result_seg) {
                 xhci_warn(xhci, "WARN: %s TRB math test %d failed!\n",
                                 test_name, test_number);
- -              xhci_warn(xhci, "Tested TRB math w/ seg %p and "
+ +              xhci_warn(xhci, "Tested TRB math w/ seg %pK and "
                                 "input DMA 0x%llx\n",
                                 input_seg,
                                 (unsigned long long) input_dma);
- -              xhci_warn(xhci, "starting TRB %p (0x%llx DMA), "
- -                              "ending TRB %p (0x%llx DMA)\n",
+ +              xhci_warn(xhci, "starting TRB %pK (0x%llx DMA), "
+ +                              "ending TRB %pK (0x%llx DMA)\n",
                                 start_trb, start_dma,
                                 end_trb, end_dma);
- -              xhci_warn(xhci, "Expected seg %p, got seg %p\n",
+ +              xhci_warn(xhci, "Expected seg %pK, got seg %pK\n",
                                 result_seg, seg);
                 trb_in_td(xhci, input_seg, start_trb, end_trb, input_dma,
                           true);
@@@ -2227,6 -2103,30 +2231,6 @@@ static int xhci_check_trb_in_td_math(st
         return 0;
   }
   
- -static void xhci_set_hc_event_deq(struct xhci_hcd *xhci)
- -{
- -      u64 temp;
- -      dma_addr_t deq;
- -
- -      deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg,
- -                      xhci->event_ring->dequeue);
- -      if (deq == 0 && !in_interrupt())
- -              xhci_warn(xhci, "WARN something wrong with SW event ring "
- -                              "dequeue ptr.\n");
- -      /* Update HC event ring dequeue pointer */
- -      temp = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue);
- -      temp &= ERST_PTR_MASK;
- -      /* Don't clear the EHB bit (which is RW1C) because
- -       * there might be more events to service.
- -       */
- -      temp &= ~ERST_EHB;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Write event ring dequeue pointer, "
- -                      "preserving EHB bit");
- -      xhci_write_64(xhci, ((u64) deq & (u64) ~ERST_PTR_MASK) | temp,
- -                      &xhci->ir_set->erst_dequeue);
- -}
- -
   static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
                 __le32 __iomem *addr, u8 major_revision, int max_caps)
   {
@@@ -2242,7 -2142,7 +2246,7 @@@
                 rhub = &xhci->usb2_rhub;
         } else {
                 xhci_warn(xhci, "Ignoring unknown port speed, "
- -                              "Ext Cap %p, revision = 0x%x\n",
+ +                              "Ext Cap %pK, revision = 0x%x\n",
                                 addr, major_revision);
                 /* Ignoring port protocol we can't understand. FIXME */
                 return;
@@@ -2255,7 -2155,7 +2259,7 @@@
         port_offset = XHCI_EXT_PORT_OFF(temp);
         port_count = XHCI_EXT_PORT_COUNT(temp);
         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "Ext Cap %p, port offset = %u, "
+ +                      "Ext Cap %pK, port offset = %u, "
                         "count = %u, revision = 0x%x",
                         addr, port_offset, port_count, major_revision);
         /* Port count includes the current port offset */
@@@ -2317,7 -2217,7 +2321,7 @@@
         for (i = port_offset; i < (port_offset + port_count); i++) {
                 /* Duplicate entry.  Ignore the port if the revisions differ. */
                 if (xhci->port_array[i] != 0) {
- -                      xhci_warn(xhci, "Duplicate port entry, Ext Cap %p,"
+ +                      xhci_warn(xhci, "Duplicate port entry, Ext Cap %pK,"
                                         " port %u\n", addr, i);
                         xhci_warn(xhci, "Port was marked as USB %u, "
                                         "duplicated as USB %u\n",
@@@ -2473,7 -2373,7 +2477,7 @@@ static int xhci_setup_port_arrays(struc
                                 NUM_PORT_REGS*i;
                         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
                                         "USB 2.0 port at index %u, "
- -                                      "addr = %p", i,
+ +                                      "addr = %pK", i,
                                         xhci->usb2_ports[port_index]);
                         port_index++;
                         if (port_index == xhci->num_usb2_ports)
@@@ -2494,7 -2394,7 +2498,7 @@@
                                         NUM_PORT_REGS*i;
                                 xhci_dbg_trace(xhci, trace_xhci_dbg_init,
                                                 "USB 3.0 port at index %u, "
- -                                              "addr = %p", i,
+ +                                              "addr = %pK", i,
                                                 xhci->usb3_ports[port_index]);
                                 port_index++;
                                 if (port_index == xhci->num_usb3_ports)
@@@ -2504,184 -2404,13 +2508,184 @@@
         return 0;
   }
   
+ +int xhci_event_ring_setup(struct xhci_hcd *xhci, struct xhci_ring **er,
+ +      struct xhci_intr_reg __iomem *ir_set, struct xhci_erst *erst,
+ +      unsigned int intr_num, gfp_t flags)
+ +{
+ +      dma_addr_t dma, deq;
+ +      u64 val_64;
+ +      unsigned int val;
+ +      struct xhci_segment *seg;
+ +      struct device *dev = xhci_to_hcd(xhci)->self.controller;
+ +
+ +      *er = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1,
+ +                              TYPE_EVENT, flags);
+ +              if (!*er)
+ +                      return -ENOMEM;
+ +
+ +      erst->entries = dma_alloc_coherent(dev,
+ +                      sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS, &dma,
+ +                      flags);
+ +      if (!erst->entries) {
+ +              xhci_ring_free(xhci, *er);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "intr# %d: Allocated event ring segment table at 0x%llx",
+ +              intr_num, (unsigned long long)dma);
+ +
+ +      memset(erst->entries, 0, sizeof(struct xhci_erst_entry)*ERST_NUM_SEGS);
+ +      erst->num_entries = ERST_NUM_SEGS;
+ +      erst->erst_dma_addr = dma;
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "intr# %d: num segs = %i, virt addr = %pK, dma addr = 0x%llx",
+ +                      intr_num,
+ +                      erst->num_entries,
+ +                      erst->entries,
+ +                      (unsigned long long)erst->erst_dma_addr);
+ +
+ +      /* set ring base address and size for each segment table entry */
+ +      for (val = 0, seg = (*er)->first_seg; val < ERST_NUM_SEGS; val++) {
+ +              struct xhci_erst_entry *entry = &erst->entries[val];
+ +
+ +              entry->seg_addr = cpu_to_le64(seg->dma);
+ +              entry->seg_size = cpu_to_le32(TRBS_PER_SEGMENT);
+ +              entry->rsvd = 0;
+ +              seg = seg->next;
+ +      }
+ +
+ +      /* set ERST count with the number of entries in the segment table */
+ +      val = readl_relaxed(&ir_set->erst_size);
+ +      val &= ERST_SIZE_MASK;
+ +      val |= ERST_NUM_SEGS;
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "Write ERST size = %i to ir_set %d (some bits preserved)", val,
+ +              intr_num);
+ +      writel_relaxed(val, &ir_set->erst_size);
+ +
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +                      "intr# %d: Set ERST entries to point to event ring.",
+ +                      intr_num);
+ +      /* set the segment table base address */
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +                      "Set ERST base address for ir_set %d = 0x%llx",
+ +                      intr_num,
+ +                      (unsigned long long)erst->erst_dma_addr);
+ +      val_64 = xhci_read_64(xhci, &ir_set->erst_base);
+ +      val_64 &= ERST_PTR_MASK;
+ +      val_64 |= (erst->erst_dma_addr & (u64) ~ERST_PTR_MASK);
+ +      xhci_write_64(xhci, val_64, &ir_set->erst_base);
+ +
+ +      /* Set the event ring dequeue address */
+ +      deq = xhci_trb_virt_to_dma((*er)->deq_seg, (*er)->dequeue);
+ +      if (deq == 0 && !in_interrupt())
+ +              xhci_warn(xhci,
+ +              "intr# %d:WARN something wrong with SW event ring deq ptr.\n",
+ +              intr_num);
+ +      /* Update HC event ring dequeue pointer */
+ +      val_64 = xhci_read_64(xhci, &ir_set->erst_dequeue);
+ +      val_64 &= ERST_PTR_MASK;
+ +      /* Don't clear the EHB bit (which is RW1C) because
+ +       * there might be more events to service.
+ +       */
+ +      val_64 &= ~ERST_EHB;
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "intr# %d:Write event ring dequeue pointer, preserving EHB bit",
+ +              intr_num);
+ +      xhci_write_64(xhci, ((u64) deq & (u64) ~ERST_PTR_MASK) | val_64,
+ +                      &ir_set->erst_dequeue);
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +                      "Wrote ERST address to ir_set %d.", intr_num);
+ +      xhci_print_ir_set(xhci, intr_num);
+ +
+ +      return 0;
+ +}
+ +
+ +int xhci_sec_event_ring_setup(struct usb_hcd *hcd, unsigned intr_num)
+ +{
+ +      int ret;
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +
+ +      if ((xhci->xhc_state & XHCI_STATE_HALTED) || !xhci->sec_ir_set
+ +              || !xhci->sec_event_ring || !xhci->sec_erst ||
+ +              intr_num >= xhci->max_interrupters) {
+ +              xhci_err(xhci,
+ +              "%s:state %x ir_set %pK evt_ring %pK erst %pK intr# %d\n",
+ +              __func__, xhci->xhc_state, xhci->sec_ir_set,
+ +              xhci->sec_event_ring, xhci->sec_erst, intr_num);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (xhci->sec_event_ring && xhci->sec_event_ring[intr_num]
+ +              && xhci->sec_event_ring[intr_num]->first_seg)
+ +              goto done;
+ +
+ +      xhci->sec_ir_set[intr_num] = &xhci->run_regs->ir_set[intr_num];
+ +      ret = xhci_event_ring_setup(xhci,
+ +                              &xhci->sec_event_ring[intr_num],
+ +                              xhci->sec_ir_set[intr_num],
+ +                              &xhci->sec_erst[intr_num],
+ +                              intr_num, GFP_KERNEL);
+ +      if (ret) {
+ +              xhci_err(xhci, "sec event ring setup failed inter#%d\n",
+ +                      intr_num);
+ +              return ret;
+ +      }
+ +done:
+ +      return 0;
+ +}
+ +
+ +int xhci_event_ring_init(struct xhci_hcd *xhci, gfp_t flags)
+ +{
+ +      int ret = 0;
+ +
+ +      /* primary + secondary */
+ +      xhci->max_interrupters = HCS_MAX_INTRS(xhci->hcs_params1);
+ +
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "// Allocating primary event ring");
+ +
+ +      /* Set ir_set to interrupt register set 0 */
+ +      xhci->ir_set = &xhci->run_regs->ir_set[0];
+ +      ret = xhci_event_ring_setup(xhci, &xhci->event_ring, xhci->ir_set,
+ +              &xhci->erst, 0, flags);
+ +      if (ret) {
+ +              xhci_err(xhci, "failed to setup primary event ring\n");
+ +              goto fail;
+ +      }
+ +
+ +      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +              "// Allocating sec event ring related pointers");
+ +
+ +      xhci->sec_ir_set = kcalloc(xhci->max_interrupters,
+ +                              sizeof(*xhci->sec_ir_set), flags);
+ +      if (!xhci->sec_ir_set) {
+ +              ret = -ENOMEM;
+ +              goto fail;
+ +      }
+ +
+ +      xhci->sec_event_ring = kcalloc(xhci->max_interrupters,
+ +                              sizeof(*xhci->sec_event_ring), flags);
+ +      if (!xhci->sec_event_ring) {
+ +              ret = -ENOMEM;
+ +              goto fail;
+ +      }
+ +
+ +      xhci->sec_erst = kcalloc(xhci->max_interrupters,
+ +                              sizeof(*xhci->sec_erst), flags);
+ +      if (!xhci->sec_erst)
+ +              ret = -ENOMEM;
+ +fail:
+ +      return ret;
+ +}
+ +
   int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
   {
         dma_addr_t      dma;
         struct device   *dev = xhci_to_hcd(xhci)->self.controller;
         unsigned int    val, val2;
         u64             val_64;
- -      struct xhci_segment     *seg;
         u32 page_size, temp;
         int i;
   
@@@ -2734,7 -2463,7 +2738,7 @@@
         memset(xhci->dcbaa, 0, sizeof *(xhci->dcbaa));
         xhci->dcbaa->dma = dma;
         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Device context base array address = 0x%llx (DMA), %p (virt)",
+ +                      "// Device context base array address = 0x%llx (DMA), %pK (virt)",
                         (unsigned long long)xhci->dcbaa->dma, xhci->dcbaa);
         xhci_write_64(xhci, dma, &xhci->op_regs->dcbaa_ptr);
   
@@@ -2775,7 -2504,7 +2779,7 @@@
         if (!xhci->cmd_ring)
                 goto fail;
         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "Allocated command ring at %p", xhci->cmd_ring);
+ +                      "Allocated command ring at %pK", xhci->cmd_ring);
         xhci_dbg_trace(xhci, trace_xhci_dbg_init, "First segment DMA is 0x%llx",
                         (unsigned long long)xhci->cmd_ring->first_seg->dma);
   
@@@ -2807,16 -2536,73 +2811,16 @@@
         xhci->dba = (void __iomem *) xhci->cap_regs + val;
         xhci_dbg_regs(xhci);
         xhci_print_run_regs(xhci);
- -      /* Set ir_set to interrupt register set 0 */
- -      xhci->ir_set = &xhci->run_regs->ir_set[0];
   
         /*
          * Event ring setup: Allocate a normal ring, but also setup
          * the event ring segment table (ERST).  Section 4.9.3.
          */
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Allocating event ring");
- -      xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT,
- -                                              flags);
- -      if (!xhci->event_ring)
- -              goto fail;
- -      if (xhci_check_trb_in_td_math(xhci) < 0)
+ +      if (xhci_event_ring_init(xhci, GFP_KERNEL))
                 goto fail;
   
- -      xhci->erst.entries = dma_alloc_coherent(dev,
- -                      sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS, &dma,
- -                      flags);
- -      if (!xhci->erst.entries)
+ +      if (xhci_check_trb_in_td_math(xhci) < 0)
                 goto fail;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Allocated event ring segment table at 0x%llx",
- -                      (unsigned long long)dma);
- -
- -      memset(xhci->erst.entries, 0, sizeof(struct xhci_erst_entry)*ERST_NUM_SEGS);
- -      xhci->erst.num_entries = ERST_NUM_SEGS;
- -      xhci->erst.erst_dma_addr = dma;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "Set ERST to 0; private num segs = %i, virt addr = %p, dma addr = 0x%llx",
- -                      xhci->erst.num_entries,
- -                      xhci->erst.entries,
- -                      (unsigned long long)xhci->erst.erst_dma_addr);
- -
- -      /* set ring base address and size for each segment table entry */
- -      for (val = 0, seg = xhci->event_ring->first_seg; val < ERST_NUM_SEGS; val++) {
- -              struct xhci_erst_entry *entry = &xhci->erst.entries[val];
- -              entry->seg_addr = cpu_to_le64(seg->dma);
- -              entry->seg_size = cpu_to_le32(TRBS_PER_SEGMENT);
- -              entry->rsvd = 0;
- -              seg = seg->next;
- -      }
- -
- -      /* set ERST count with the number of entries in the segment table */
- -      val = readl(&xhci->ir_set->erst_size);
- -      val &= ERST_SIZE_MASK;
- -      val |= ERST_NUM_SEGS;
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Write ERST size = %i to ir_set 0 (some bits preserved)",
- -                      val);
- -      writel(val, &xhci->ir_set->erst_size);
- -
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Set ERST entries to point to event ring.");
- -      /* set the segment table base address */
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Set ERST base address for ir_set 0 = 0x%llx",
- -                      (unsigned long long)xhci->erst.erst_dma_addr);
- -      val_64 = xhci_read_64(xhci, &xhci->ir_set->erst_base);
- -      val_64 &= ERST_PTR_MASK;
- -      val_64 |= (xhci->erst.erst_dma_addr & (u64) ~ERST_PTR_MASK);
- -      xhci_write_64(xhci, val_64, &xhci->ir_set->erst_base);
- -
- -      /* Set the event ring dequeue address */
- -      xhci_set_hc_event_deq(xhci);
- -      xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "Wrote ERST address to ir_set 0.");
- -      xhci_print_ir_set(xhci, 0);
   
         /*
          * XXX: Might need to set the Interrupter Moderation Register to
diff --combined drivers/usb/host/xhci-ring.c

index c8184b7,536251c..c837f99
--- 1/drivers/usb/host/xhci-ring.c
--- 2/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@@ -68,8 -68,6 +68,8 @@@
   #include <linux/slab.h>
   #include "xhci.h"
   #include "xhci-trace.h"
+ +extern void kick_usbpd_vbus_sm(void);
+ +extern bool is_xiaomi_headset;
   
   /*
    * Returns zero if the TRB isn't in this segment, otherwise it returns the DMA
@@@ -284,9 -282,6 +284,9 @@@ void xhci_ring_cmd_db(struct xhci_hcd *
   
   static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
   {
+ +      if (is_xiaomi_headset)
+ +              delay = msecs_to_jiffies(1000);
+ +
         return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
   }
   
@@@ -315,7 -310,7 +315,7 @@@ static void xhci_handle_stopped_cmd_rin
   
                 i_cmd->status = COMP_CMD_STOP;
   
- -              xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
+ +              xhci_dbg(xhci, "Turn aborted command %pK to no-op\n",
                          i_cmd->command_trb);
                 /* get cycle state from the original cmd trb */
                 cycle_state = le32_to_cpu(
@@@ -349,7 -344,6 +349,7 @@@ static int xhci_abort_cmd_ring(struct x
   {
         u64 temp_64;
         int ret;
+ +      int delay;
   
         xhci_dbg(xhci, "Abort command ring\n");
   
@@@ -360,28 -354,29 +360,28 @@@
                         &xhci->op_regs->cmd_ring);
   
         /* Section 4.6.1.2 of xHCI 1.0 spec says software should
- -       * time the completion od all xHCI commands, including
+ +       * time the completion of all xHCI commands, including
          * the Command Abort operation. If software doesn't see
- -       * CRR negated in a timely manner (e.g. longer than 5
- -       * seconds), then it should assume that the there are
- -       * larger problems with the xHC and assert HCRST.
+ +       * CRR negated in a timely manner, then it should assume
+ +       * that the there are larger problems with the xHC and assert HCRST.
          */
- -      ret = xhci_handshake(&xhci->op_regs->cmd_ring,
- -                      CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
+ +      if (is_xiaomi_headset) {
+ +              delay = 500 * 1000;
+ +      } else {
+ +              delay = 5000 * 1000;
+ +      }
+ +
+ +      ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring,
+ +                      CMD_RING_RUNNING, 0, 1000 * 1000);
         if (ret < 0) {
- -              /* we are about to kill xhci, give it one more chance */
- -              xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
- -                            &xhci->op_regs->cmd_ring);
- -              udelay(1000);
- -              ret = xhci_handshake(&xhci->op_regs->cmd_ring,
- -                                   CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
- -              if (ret < 0) {
- -                      xhci_err(xhci, "Stopped the command ring failed, "
- -                               "maybe the host is dead\n");
- -                      xhci->xhc_state |= XHCI_STATE_DYING;
- -                      xhci_quiesce(xhci);
- -                      xhci_halt(xhci);
- -                      return -ESHUTDOWN;
- -              }
+ +              if (is_xiaomi_headset)
+ +                      return -EPERM;
+ +              xhci_err(xhci,
+ +                       "Stop command ring failed, maybe the host is dead\n");
+ +              xhci->xhc_state |= XHCI_STATE_DYING;
+ +              xhci_quiesce(xhci);
+ +              xhci_halt(xhci);
+ +              return -ESHUTDOWN;
         }
         /*
          * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
@@@ -597,7 -592,7 +597,7 @@@ void xhci_find_new_dequeue_state(struc
                         "Cycle state = 0x%x", state->new_cycle_state);
   
         xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- -                      "New dequeue segment = %p (virtual)",
+ +                      "New dequeue segment = %pK (virtual)",
                         state->new_deq_seg);
         addr = xhci_trb_virt_to_dma(state->new_deq_seg, state->new_deq_ptr);
         xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
@@@ -632,8 -627,8 +632,8 @@@ static void td_to_noop(struct xhci_hcd 
                         xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                         "Cancel (unchain) link TRB");
                         xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- -                                      "Address = %p (0x%llx dma); "
- -                                      "in seg %p (0x%llx dma)",
+ +                                      "Address = %pK (0x%llx dma); "
+ +                                      "in seg %pK (0x%llx dma)",
                                         cur_trb,
                                         (unsigned long long)xhci_trb_virt_to_dma(cur_seg, cur_trb),
                                         cur_seg,
@@@ -769,7 -764,7 +769,7 @@@ static void xhci_handle_cmd_stop_ep(str
                          * short, don't muck with the stream ID after
                          * submission.
                          */
- -                      xhci_warn(xhci, "WARN Cancelled URB %p "
+ +                      xhci_warn(xhci, "WARN Cancelled URB %pK "
                                         "has invalid stream ID %u.\n",
                                         cur_td->urb,
                                         cur_td->urb->stream_id);
@@@ -1108,7 -1103,7 +1108,7 @@@ static void xhci_handle_cmd_set_deq(str
                                 ep_ring, ep_index);
                 } else {
                         xhci_warn(xhci, "Mismatch between completed Set TR Deq Ptr command & xHCI internal state.\n");
- -                      xhci_warn(xhci, "ep deq seg = %p, deq ptr = %p\n",
+ +                      xhci_warn(xhci, "ep deq seg = %pK, deq ptr = %pK\n",
                                   ep->queued_deq_seg, ep->queued_deq_ptr);
                 }
         }
@@@ -1302,14 -1297,6 +1302,14 @@@ void xhci_handle_command_timeout(struc
                 xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
                 xhci_dbg(xhci, "Command timeout\n");
                 ret = xhci_abort_cmd_ring(xhci, flags);
+ +              if (ret == -EPERM) {
+ +                      xhci_err(xhci, "Abort command ring failed reset usb device\n");
+ +                      xhci_cleanup_command_queue(xhci);
+ +                      spin_unlock_irqrestore(&xhci->lock, flags);
+ +                      kick_usbpd_vbus_sm();
+ +                      return;
+ +              }
+ +
                 if (unlikely(ret == -ESHUTDOWN)) {
                         xhci_err(xhci, "Abort command ring failed\n");
                         xhci_cleanup_command_queue(xhci);
@@@ -1615,9 -1602,6 +1615,6 @@@ static void handle_port_status(struct x
                 usb_hcd_resume_root_hub(hcd);
         }
   
-       if (hcd->speed >= HCD_USB3 && (temp & PORT_PLS_MASK) == XDEV_INACTIVE)
-               bus_state->port_remote_wakeup &= ~(1 << faked_port_index);
- 
         if ((temp & PORT_PLC) && (temp & PORT_PLS_MASK) == XDEV_RESUME) {
                 xhci_dbg(xhci, "port resume event for port %d\n", port_id);
   
@@@ -1636,6 -1620,7 +1633,7 @@@
                         bus_state->port_remote_wakeup |= 1 << faked_port_index;
                         xhci_test_and_clear_bit(xhci, port_array,
                                         faked_port_index, PORT_PLC);
+                       usb_hcd_start_port_resume(&hcd->self, faked_port_index);
                         xhci_set_link_state(xhci, port_array, faked_port_index,
                                                 XDEV_U0);
                         /* Need to wait until the next link state change
@@@ -1673,8 -1658,6 +1671,6 @@@
                 if (slot_id && xhci->devs[slot_id])
                         xhci_ring_device(xhci, slot_id);
                 if (bus_state->port_remote_wakeup & (1 << faked_port_index)) {
-                       bus_state->port_remote_wakeup &=
-                               ~(1 << faked_port_index);
                         xhci_test_and_clear_bit(xhci, port_array,
                                         faked_port_index, PORT_PLC);
                         usb_wakeup_notification(hcd->self.root_hub,
@@@ -2640,7 -2623,7 +2636,7 @@@ cleanup
                                                  URB_SHORT_NOT_OK)) ||
                                         (status != 0 &&
                                          !usb_endpoint_xfer_isoc(&urb->ep->desc)))
- -                              xhci_dbg(xhci, "Giveback URB %p, len = %d, "
+ +                              xhci_dbg(xhci, "Giveback URB %pK, len = %d, "
                                                 "expected = %d, status = %d\n",
                                                 urb, urb->actual_length,
                                                 urb->transfer_buffer_length,
@@@ -3590,156 -3573,6 +3586,156 @@@ int xhci_queue_ctrl_tx(struct xhci_hcd 
         return 0;
   }
   
+ +/*
+ + * Variant of xhci_queue_ctrl_tx() used to implement EHSET
+ + * SINGLE_STEP_SET_FEATURE test mode. It differs in that the control
+ + * transfer is broken up so that the SETUP stage can happen and call
+ + * the URB's completion handler before the DATA/STATUS stages are
+ + * executed by the xHC hardware. This assumes the control transfer is a
+ + * GetDescriptor, with a DATA stage in the IN direction, and an OUT
+ + * STATUS stage.
+ + *
+ + * This function is called twice, usually with a 15-second delay in between.
+ + * - with is_setup==true, the SETUP stage for the control request
+ + *   (GetDescriptor) is queued in the TRB ring and sent to HW immediately
+ + * - with is_setup==false, the DATA and STATUS TRBs are queued and exceuted
+ + *
+ + * Caller must have locked xhci->lock
+ + */
+ +int xhci_submit_single_step_set_feature(struct usb_hcd *hcd, struct urb *urb,
+ +                                      int is_setup)
+ +{
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +      struct xhci_ring *ep_ring;
+ +      int num_trbs;
+ +      int ret;
+ +      unsigned int slot_id, ep_index;
+ +      struct usb_ctrlrequest *setup;
+ +      struct xhci_generic_trb *start_trb;
+ +      int start_cycle;
+ +      u32 field, length_field, remainder;
+ +      struct urb_priv *urb_priv;
+ +      struct xhci_td *td;
+ +
+ +      ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+ +      if (!ep_ring)
+ +              return -EINVAL;
+ +
+ +      /* Need buffer for data stage */
+ +      if (urb->transfer_buffer_length <= 0)
+ +              return -EINVAL;
+ +
+ +      /*
+ +       * Need to copy setup packet into setup TRB, so we can't use the setup
+ +       * DMA address.
+ +       */
+ +      if (!urb->setup_packet)
+ +              return -EINVAL;
+ +      setup = (struct usb_ctrlrequest *) urb->setup_packet;
+ +
+ +      slot_id = urb->dev->slot_id;
+ +      ep_index = xhci_get_endpoint_index(&urb->ep->desc);
+ +
+ +      urb_priv = kzalloc(sizeof(struct urb_priv) +
+ +                                sizeof(struct xhci_td *), GFP_ATOMIC);
+ +      if (!urb_priv)
+ +              return -ENOMEM;
+ +
+ +      td = urb_priv->td[0] = kzalloc(sizeof(struct xhci_td), GFP_ATOMIC);
+ +      if (!td) {
+ +              kfree(urb_priv);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      urb_priv->length = 1;
+ +      urb_priv->td_cnt = 0;
+ +      urb->hcpriv = urb_priv;
+ +
+ +      num_trbs = is_setup ? 1 : 2;
+ +
+ +      ret = prepare_transfer(xhci, xhci->devs[slot_id],
+ +                      ep_index, urb->stream_id,
+ +                      num_trbs, urb, 0, GFP_ATOMIC);
+ +      if (ret < 0) {
+ +              kfree(td);
+ +              kfree(urb_priv);
+ +              return ret;
+ +      }
+ +
+ +      /*
+ +       * Don't give the first TRB to the hardware (by toggling the cycle bit)
+ +       * until we've finished creating all the other TRBs.  The ring's cycle
+ +       * state may change as we enqueue the other TRBs, so save it too.
+ +       */
+ +      start_trb = &ep_ring->enqueue->generic;
+ +      start_cycle = ep_ring->cycle_state;
+ +
+ +      if (is_setup) {
+ +              /* Queue only the setup TRB */
+ +              field = TRB_IDT | TRB_IOC | TRB_TYPE(TRB_SETUP);
+ +              if (start_cycle == 0)
+ +                      field |= 0x1;
+ +
+ +              /* xHCI 1.0 6.4.1.2.1: Transfer Type field */
+ +              if (xhci->hci_version == 0x100) {
+ +                      if (setup->bRequestType & USB_DIR_IN)
+ +                              field |= TRB_TX_TYPE(TRB_DATA_IN);
+ +                      else
+ +                              field |= TRB_TX_TYPE(TRB_DATA_OUT);
+ +              }
+ +
+ +              /* Save the DMA address of the last TRB in the TD */
+ +              td->last_trb = ep_ring->enqueue;
+ +
+ +              queue_trb(xhci, ep_ring, false,
+ +                        setup->bRequestType | setup->bRequest << 8 |
+ +                              le16_to_cpu(setup->wValue) << 16,
+ +                        le16_to_cpu(setup->wIndex) |
+ +                              le16_to_cpu(setup->wLength) << 16,
+ +                        TRB_LEN(8) | TRB_INTR_TARGET(0),
+ +                        field);
+ +      } else {
+ +              /* Queue data TRB */
+ +              field = TRB_ISP | TRB_TYPE(TRB_DATA);
+ +              if (start_cycle == 0)
+ +                      field |= 0x1;
+ +              if (setup->bRequestType & USB_DIR_IN)
+ +                      field |= TRB_DIR_IN;
+ +
+ +              remainder = xhci_td_remainder(xhci, 0,
+ +                                         urb->transfer_buffer_length,
+ +                                         urb->transfer_buffer_length,
+ +                                         urb, 1);
+ +
+ +              length_field = TRB_LEN(urb->transfer_buffer_length) |
+ +                      TRB_TD_SIZE(remainder) |
+ +                      TRB_INTR_TARGET(0);
+ +
+ +              queue_trb(xhci, ep_ring, true,
+ +                        lower_32_bits(urb->transfer_dma),
+ +                        upper_32_bits(urb->transfer_dma),
+ +                        length_field,
+ +                        field);
+ +
+ +              /* Save the DMA address of the last TRB in the TD */
+ +              td->last_trb = ep_ring->enqueue;
+ +
+ +              /* Queue status TRB */
+ +              field = TRB_IOC | TRB_TYPE(TRB_STATUS);
+ +              if (!(setup->bRequestType & USB_DIR_IN))
+ +                      field |= TRB_DIR_IN;
+ +
+ +              queue_trb(xhci, ep_ring, false,
+ +                        0,
+ +                        0,
+ +                        TRB_INTR_TARGET(0),
+ +                        field | ep_ring->cycle_state);
+ +      }
+ +
+ +      giveback_first_trb(xhci, slot_id, ep_index, 0, start_cycle, start_trb);
+ +      return 0;
+ +}
+ +
   static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
                 struct urb *urb, int i)
   {
@@@ -4344,7 -4177,7 +4340,7 @@@ void xhci_queue_new_dequeue_state(struc
         int ret;
   
         xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- -              "Set TR Deq Ptr cmd, new deq seg = %p (0x%llx dma), new deq ptr = %p (0x%llx dma), new cycle = %u",
+ +              "Set TR Deq Ptr cmd, new deq seg = %pK (0x%llx dma), new deq ptr = %pK (0x%llx dma), new cycle = %u",
                 deq_state->new_deq_seg,
                 (unsigned long long)deq_state->new_deq_seg->dma,
                 deq_state->new_deq_ptr,
@@@ -4356,7 -4189,7 +4352,7 @@@
                                     deq_state->new_deq_ptr);
         if (addr == 0) {
                 xhci_warn(xhci, "WARN Cannot submit Set TR Deq Ptr\n");
- -              xhci_warn(xhci, "WARN deq seg = %p, deq pt = %p\n",
+ +              xhci_warn(xhci, "WARN deq seg = %pK, deq pt = %pK\n",
                           deq_state->new_deq_seg, deq_state->new_deq_ptr);
                 return;
         }
diff --combined drivers/usb/host/xhci.c

index 89b4cc8,af4e681..b627f06
--- 1/drivers/usb/host/xhci.c
--- 2/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@@ -75,27 -75,6 +75,27 @@@ int xhci_handshake(void __iomem *ptr, u
         return ret;
   }
   
+ +int xhci_handshake_check_state(struct xhci_hcd *xhci,
+ +              void __iomem *ptr, u32 mask, u32 done, int usec)
+ +{
+ +      u32     result;
+ +
+ +      do {
+ +              result = readl_relaxed(ptr);
+ +              if (result == ~(u32)0) /* card removed */
+ +                      return -ENODEV;
+ +              /* host removed. Bail out */
+ +              if (xhci->xhc_state & XHCI_STATE_REMOVING)
+ +                      return -ENODEV;
+ +              result &= mask;
+ +              if (result == done)
+ +                      return 0;
+ +              udelay(1);
+ +              usec--;
+ +      } while (usec > 0);
+ +      return -ETIMEDOUT;
+ +}
+ +
   /*
    * Disable interrupts and begin the xHCI halting process.
    */
@@@ -133,20 -112,10 +133,20 @@@ int xhci_halt(struct xhci_hcd *xhci
                         STS_HALT, STS_HALT, XHCI_MAX_HALT_USEC);
         if (!ret) {
                 xhci->xhc_state |= XHCI_STATE_HALTED;
- -              xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
- -      } else
+ +      } else {
                 xhci_warn(xhci, "Host not halted after %u microseconds.\n",
                                 XHCI_MAX_HALT_USEC);
+ +      }
+ +
+ +      xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+ +
+ +      if (delayed_work_pending(&xhci->cmd_timer)) {
+ +              xhci_dbg_trace(xhci, trace_xhci_dbg_init,
+ +                              "Cleanup command queue");
+ +              cancel_delayed_work(&xhci->cmd_timer);
+ +              xhci_cleanup_command_queue(xhci);
+ +      }
+ +
         return ret;
   }
   
@@@ -157,13 -126,7 +157,13 @@@ static int xhci_start(struct xhci_hcd *
   {
         u32 temp;
         int ret;
+ +      struct usb_hcd *hcd = xhci_to_hcd(xhci);
   
+ +      /*
+ +       * disable irq to avoid xhci_irq flooding due to unhandeled port
+ +       * change event in halt state, as soon as xhci_start clears halt bit
+ +       */
+ +      disable_irq(hcd->irq);
         temp = readl(&xhci->op_regs->command);
         temp |= (CMD_RUN);
         xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Turn on HC, cmd = 0x%x.",
@@@ -184,8 -147,6 +184,8 @@@
                 /* clear state flags. Including dying, halted or removing */
                 xhci->xhc_state = 0;
   
+ +      enable_irq(hcd->irq);
+ +
         return ret;
   }
   
@@@ -684,7 -645,7 +684,7 @@@ int xhci_run(struct usb_hcd *hcd
   
         temp = readl(&xhci->ir_set->irq_pending);
         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- -                      "// Enabling event ring interrupter %p by writing 0x%x to irq_pending",
+ +                      "// Enabling event ring interrupter %pK by writing 0x%x to irq_pending",
                         xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp));
         writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending);
         xhci_print_ir_set(xhci, 0);
@@@ -782,10 -743,6 +782,10 @@@ void xhci_shutdown(struct usb_hcd *hcd
                 usb_disable_xhci_ports(to_pci_dev(hcd->self.controller));
   
         spin_lock_irq(&xhci->lock);
+ +      if (!HCD_HW_ACCESSIBLE(hcd)) {
+ +              spin_unlock_irq(&xhci->lock);
+ +              return;
+ +      }
         xhci_halt(xhci);
         /* Workaround for spurious wakeups at shutdown with HSW */
         if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
@@@ -797,11 -754,8 +797,8 @@@
         xhci_dbg_trace(xhci, trace_xhci_dbg_init,
                         "xhci_shutdown completed - status = %x",
                         readl(&xhci->op_regs->status));
- 
-       /* Yet another workaround for spurious wakeups at shutdown with HSW */
-       if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
-               pci_set_power_state(to_pci_dev(hcd->self.controller), PCI_D3hot);
   }
+ EXPORT_SYMBOL_GPL(xhci_shutdown);
   
   #ifdef CONFIG_PM
   static void xhci_save_registers(struct xhci_hcd *xhci)
@@@ -972,11 -926,11 +969,11 @@@ static bool xhci_pending_portevent(stru
   int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
   {
         int                     rc = 0;
-       unsigned int            delay = XHCI_MAX_HALT_USEC;
+       unsigned int            delay = XHCI_MAX_HALT_USEC * 2;
         struct usb_hcd          *hcd = xhci_to_hcd(xhci);
         u32                     command;
   
- -      if (!hcd->state)
+ +      if (!hcd->state || xhci->suspended)
                 return 0;
   
         if (hcd->state != HC_STATE_SUSPENDED ||
@@@ -1046,7 -1000,6 +1043,7 @@@
         /* step 5: remove core well power */
         /* synchronize irq when using MSI-X */
         xhci_msix_sync_irqs(xhci);
+ +      xhci->suspended = true;
   
         return rc;
   }
@@@ -1066,7 -1019,7 +1063,7 @@@ int xhci_resume(struct xhci_hcd *xhci, 
         int                     retval = 0;
         bool                    comp_timer_running = false;
   
- -      if (!hcd->state)
+ +      if (!hcd->state || !xhci->suspended)
                 return 0;
   
         /* Wait a bit if either of the roothubs need to settle from the
@@@ -1216,7 -1169,6 +1213,7 @@@
   
         /* Re-enable port polling. */
         xhci_dbg(xhci, "%s: starting port polling.\n", __func__);
+ +      xhci->suspended = false;
         set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
         usb_hcd_poll_rh_status(xhci->shared_hcd);
         set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
@@@ -1540,7 -1492,7 +1537,7 @@@ int xhci_urb_enqueue(struct usb_hcd *hc
   exit:
         return ret;
   dying:
- -      xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for "
+ +      xhci_dbg(xhci, "Ep 0x%x: URB %pK submitted for "
                         "non-responsive xHCI host.\n",
                         urb->ep->desc.bEndpointAddress, urb);
         ret = -ESHUTDOWN;
@@@ -1676,7 -1628,7 +1673,7 @@@ int xhci_urb_dequeue(struct usb_hcd *hc
         i = urb_priv->td_cnt;
         if (i < urb_priv->length)
                 xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- -                              "Cancel URB %p, dev %s, ep 0x%x, "
+ +                              "Cancel URB %pK, dev %s, ep 0x%x, "
                                 "starting at offset 0x%llx",
                                 urb, urb->dev->devpath,
                                 urb->ep->desc.bEndpointAddress,
@@@ -1744,7 -1696,7 +1741,7 @@@ int xhci_drop_endpoint(struct usb_hcd *
         if (xhci->xhc_state & XHCI_STATE_DYING)
                 return -ENODEV;
   
- -      xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
+ +      xhci_dbg(xhci, "%s called for udev %pK\n", __func__, udev);
         drop_flag = xhci_get_endpoint_flag(&ep->desc);
         if (drop_flag == SLOT_FLAG || drop_flag == EP0_FLAG) {
                 xhci_dbg(xhci, "xHCI %s - can't drop slot or ep 0 %#x\n",
@@@ -1772,7 -1724,7 +1769,7 @@@
             xhci_get_endpoint_flag(&ep->desc)) {
                 /* Do not warn when called after a usb_device_reset */
                 if (xhci->devs[udev->slot_id]->eps[ep_index].ring != NULL)
- -                      xhci_warn(xhci, "xHCI %s called with disabled ep %p\n",
+ +                      xhci_warn(xhci, "xHCI %s called with disabled ep %pK\n",
                                   __func__, ep);
                 return 0;
         }
@@@ -1864,7 -1816,7 +1861,7 @@@ int xhci_add_endpoint(struct usb_hcd *h
          * ignore this request.
          */
         if (le32_to_cpu(ctrl_ctx->add_flags) & added_ctxs) {
- -              xhci_warn(xhci, "xHCI %s called with enabled ep %p\n",
+ +              xhci_warn(xhci, "xHCI %s called with enabled ep %pK\n",
                                 __func__, ep);
                 return 0;
         }
@@@ -2845,7 -2797,7 +2842,7 @@@ int xhci_check_bandwidth(struct usb_hc
                 (xhci->xhc_state & XHCI_STATE_REMOVING))
                 return -ENODEV;
   
- -      xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
+ +      xhci_dbg(xhci, "%s called for udev %pK\n", __func__, udev);
         virt_dev = xhci->devs[udev->slot_id];
   
         command = xhci_alloc_command(xhci, false, true, GFP_KERNEL);
@@@ -2942,7 -2894,7 +2939,7 @@@ void xhci_reset_bandwidth(struct usb_hc
                 return;
         xhci = hcd_to_xhci(hcd);
   
- -      xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
+ +      xhci_dbg(xhci, "%s called for udev %pK\n", __func__, udev);
         virt_dev = xhci->devs[udev->slot_id];
         /* Free any rings allocated for added endpoints */
         for (i = 0; i < 31; ++i) {
@@@ -2995,7 -2947,7 +2992,7 @@@ static void xhci_setup_input_ctx_for_qu
         if (addr == 0) {
                 xhci_warn(xhci, "WARN Cannot submit config ep after "
                                 "reset ep command\n");
- -              xhci_warn(xhci, "WARN deq seg = %p, deq ptr = %p\n",
+ +              xhci_warn(xhci, "WARN deq seg = %pK, deq ptr = %pK\n",
                                 deq_state->new_deq_seg,
                                 deq_state->new_deq_ptr);
                 return;
@@@ -3729,7 -3681,6 +3726,7 @@@ void xhci_free_dev(struct usb_hcd *hcd
                 del_timer_sync(&virt_dev->eps[i].stop_cmd_timer);
         }
   
+ +      virt_dev->udev = NULL;
         spin_lock_irqsave(&xhci->lock, flags);
   
         virt_dev->udev = NULL;
@@@ -4023,7 -3974,7 +4020,7 @@@ static int xhci_setup_device(struct usb
         xhci_dbg_trace(xhci, trace_xhci_dbg_address,
                         "Op regs DCBAA ptr = %#016llx", temp_64);
         xhci_dbg_trace(xhci, trace_xhci_dbg_address,
- -              "Slot ID %d dcbaa entry @%p = %#016llx",
+ +              "Slot ID %d dcbaa entry @%pK = %#016llx",
                 udev->slot_id,
                 &xhci->dcbaa->dev_context_ptrs[udev->slot_id],
                 (unsigned long long)
@@@ -5072,61 -5023,6 +5069,61 @@@ int xhci_gen_setup(struct usb_hcd *hcd
   }
   EXPORT_SYMBOL_GPL(xhci_gen_setup);
   
+ +dma_addr_t xhci_get_sec_event_ring_dma_addr(struct usb_hcd *hcd,
+ +      unsigned intr_num)
+ +{
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +
+ +      if (intr_num >= xhci->max_interrupters) {
+ +              xhci_err(xhci, "intr num %d >= max intrs %d\n", intr_num,
+ +                      xhci->max_interrupters);
+ +              return 0;
+ +      }
+ +
+ +      if (!(xhci->xhc_state & XHCI_STATE_HALTED) &&
+ +              xhci->sec_event_ring && xhci->sec_event_ring[intr_num]
+ +              && xhci->sec_event_ring[intr_num]->first_seg)
+ +              return xhci->sec_event_ring[intr_num]->first_seg->dma;
+ +
+ +      return 0;
+ +}
+ +
+ +static dma_addr_t xhci_get_dcba_dma_addr(struct usb_hcd *hcd,
+ +      struct usb_device *udev)
+ +{
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +
+ +      if (!(xhci->xhc_state & XHCI_STATE_HALTED) && xhci->dcbaa)
+ +              return xhci->dcbaa->dev_context_ptrs[udev->slot_id];
+ +
+ +      return 0;
+ +}
+ +
+ +dma_addr_t xhci_get_xfer_ring_dma_addr(struct usb_hcd *hcd,
+ +      struct usb_device *udev, struct usb_host_endpoint *ep)
+ +{
+ +      int ret;
+ +      unsigned int ep_index;
+ +      struct xhci_virt_device *virt_dev;
+ +
+ +      struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ +
+ +      ret = xhci_check_args(hcd, udev, ep, 1, true, __func__);
+ +      if (ret <= 0) {
+ +              xhci_err(xhci, "%s: invalid args\n", __func__);
+ +              return 0;
+ +      }
+ +
+ +      virt_dev = xhci->devs[udev->slot_id];
+ +      ep_index = xhci_get_endpoint_index(&ep->desc);
+ +
+ +      if (virt_dev->eps[ep_index].ring &&
+ +              virt_dev->eps[ep_index].ring->first_seg)
+ +              return virt_dev->eps[ep_index].ring->first_seg->dma;
+ +
+ +      return 0;
+ +}
+ +
   static const struct hc_driver xhci_hc_driver = {
         .description =          "xhci-hcd",
         .product_desc =         "xHCI Host Controller",
@@@ -5186,11 -5082,6 +5183,11 @@@
         .enable_usb3_lpm_timeout =      xhci_enable_usb3_lpm_timeout,
         .disable_usb3_lpm_timeout =     xhci_disable_usb3_lpm_timeout,
         .find_raw_port_number = xhci_find_raw_port_number,
+ +      .sec_event_ring_setup =         xhci_sec_event_ring_setup,
+ +      .sec_event_ring_cleanup =       xhci_sec_event_ring_cleanup,
+ +      .get_sec_event_ring_dma_addr =  xhci_get_sec_event_ring_dma_addr,
+ +      .get_xfer_ring_dma_addr =       xhci_get_xfer_ring_dma_addr,
+ +      .get_dcba_dma_addr =            xhci_get_dcba_dma_addr,
   };
   
   void xhci_init_driver(struct hc_driver *drv,
diff --combined drivers/usb/host/xhci.h

index c0928f4,0a3aa38..7e7f314
--- 1/drivers/usb/host/xhci.h
--- 2/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@@ -314,6 -314,7 +314,7 @@@ struct xhci_op_regs 
   #define XDEV_U3               (0x3 << 5)
   #define XDEV_INACTIVE (0x6 << 5)
   #define XDEV_POLLING  (0x7 << 5)
+ #define XDEV_RECOVERY (0x8 << 5)
   #define XDEV_COMP_MODE  (0xa << 5)
   #define XDEV_RESUME   (0xf << 5)
   /* true: port has power (see HCC_PPC) */
@@@ -1521,9 -1522,6 +1522,9 @@@ struct xhci_hcd 
         /* Our HCD's current interrupter register set */
         struct  xhci_intr_reg __iomem *ir_set;
   
+ +      /* secondary interrupter */
+ +      struct  xhci_intr_reg __iomem **sec_ir_set;
+ +
         /* Cached register copies of read-only HC data */
         __u32           hcs_params1;
         __u32           hcs_params2;
@@@ -1565,11 -1563,6 +1566,11 @@@
         struct xhci_command     *current_cmd;
         struct xhci_ring        *event_ring;
         struct xhci_erst        erst;
+ +
+ +      /* secondary event ring and erst */
+ +      struct xhci_ring        **sec_event_ring;
+ +      struct xhci_erst        *sec_erst;
+ +
         /* Scratchpad */
         struct xhci_scratchpad  *scratchpad;
         /* Store LPM test failed devices' information */
@@@ -1672,7 -1665,6 +1673,7 @@@
         /* Compliance Mode Recovery Data */
         struct timer_list       comp_mode_recovery_timer;
         u32                     port_status_u0;
+ +      bool                    suspended;
   /* Compliance Mode Timer Triggered every 2 seconds */
   #define COMP_MODE_RCVRY_MSECS 2000
   };
@@@ -1830,14 -1822,10 +1831,14 @@@ struct xhci_command *xhci_alloc_command
   void xhci_urb_free_priv(struct urb_priv *urb_priv);
   void xhci_free_command(struct xhci_hcd *xhci,
                 struct xhci_command *command);
+ +int xhci_sec_event_ring_setup(struct usb_hcd *hcd, unsigned intr_num);
+ +int xhci_sec_event_ring_cleanup(struct usb_hcd *hcd, unsigned intr_num);
   
   /* xHCI host controller glue */
   typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
   int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec);
+ +int xhci_handshake_check_state(struct xhci_hcd *xhci,
+ +              void __iomem *ptr, u32 mask, u32 done, int usec);
   void xhci_quiesce(struct xhci_hcd *xhci);
   int xhci_halt(struct xhci_hcd *xhci);
   int xhci_reset(struct xhci_hcd *xhci);
@@@ -1846,6 -1834,7 +1847,7 @@@ int xhci_run(struct usb_hcd *hcd)
   void xhci_stop(struct usb_hcd *hcd);
   void xhci_shutdown(struct usb_hcd *hcd);
   int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks);
+ void xhci_shutdown(struct usb_hcd *hcd);
   void xhci_init_driver(struct hc_driver *drv,
                       const struct xhci_driver_overrides *over);
   
@@@ -1972,8 -1961,4 +1974,8 @@@ struct xhci_input_control_ctx *xhci_get
   struct xhci_slot_ctx *xhci_get_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx);
   struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int ep_index);
   
+ +/* EHSET */
+ +int xhci_submit_single_step_set_feature(struct usb_hcd *hcd, struct urb *urb,
+ +                                      int is_setup);
+ +
   #endif /* __LINUX_XHCI_HCD_H */
diff --combined drivers/video/hdmi.c

index 111a0ab,2e325fe..ce7c4a2
--- 1/drivers/video/hdmi.c
--- 2/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@@ -538,10 -538,6 +538,10 @@@ hdmi_picture_aspect_get_name(enum hdmi_
                 return "4:3";
         case HDMI_PICTURE_ASPECT_16_9:
                 return "16:9";
+ +      case HDMI_PICTURE_ASPECT_64_27:
+ +              return "64:27";
+ +      case HDMI_PICTURE_ASPECT_256_135:
+ +              return "256:135";
         case HDMI_PICTURE_ASPECT_RESERVED:
                 return "Reserved";
         }
@@@ -1036,12 -1032,12 +1036,12 @@@ static int hdmi_avi_infoframe_unpack(st
         if (ptr[0] & 0x10)
                 frame->active_aspect = ptr[1] & 0xf;
         if (ptr[0] & 0x8) {
-               frame->top_bar = (ptr[5] << 8) + ptr[6];
-               frame->bottom_bar = (ptr[7] << 8) + ptr[8];
+               frame->top_bar = (ptr[6] << 8) | ptr[5];
+               frame->bottom_bar = (ptr[8] << 8) | ptr[7];
         }
         if (ptr[0] & 0x4) {
-               frame->left_bar = (ptr[9] << 8) + ptr[10];
-               frame->right_bar = (ptr[11] << 8) + ptr[12];
+               frame->left_bar = (ptr[10] << 8) | ptr[9];
+               frame->right_bar = (ptr[12] << 8) | ptr[11];
         }
         frame->scan_mode = ptr[0] & 0x3;
   
diff --combined drivers/virtio/virtio_balloon.c

index f77358f,b791b24..e3f6e43
--- 1/drivers/virtio/virtio_balloon.c
--- 2/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@@ -30,7 -30,6 +30,7 @@@
   #include <linux/balloon_compaction.h>
   #include <linux/oom.h>
   #include <linux/wait.h>
+ +#include <linux/mount.h>
   
   /*
    * Balloon device works in 4K page units.  So each page is pointed to by
@@@ -46,10 -45,6 +46,10 @@@ static int oom_pages = OOM_VBALLOON_DEF
   module_param(oom_pages, int, S_IRUSR | S_IWUSR);
   MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
   
+ +#ifdef CONFIG_BALLOON_COMPACTION
+ +static struct vfsmount *balloon_mnt;
+ +#endif
+ +
   struct virtio_balloon {
         struct virtio_device *vdev;
         struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@@ -401,7 -396,7 +401,7 @@@ static int init_vqs(struct virtio_ballo
   {
         struct virtqueue *vqs[3];
         vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- -      const char *names[] = { "inflate", "deflate", "stats" };
+ +      static const char * const names[] = { "inflate", "deflate", "stats" };
         int err, nvqs;
   
         /*
@@@ -473,6 -468,17 +473,17 @@@ static int virtballoon_migratepage(stru
   
         get_page(newpage); /* balloon reference */
   
+       /*
+         * When we migrate a page to a different zone and adjusted the
+         * managed page count when inflating, we have to fixup the count of
+         * both involved zones.
+         */
+       if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) &&
+           page_zone(page) != page_zone(newpage)) {
+               adjust_managed_page_count(page, 1);
+               adjust_managed_page_count(newpage, -1);
+       }
+ 
         /* balloon's page migration 1st step  -- inflate "newpage" */
         spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
         balloon_page_insert(vb_dev_info, newpage);
@@@ -497,24 -503,6 +508,24 @@@
   
         return MIGRATEPAGE_SUCCESS;
   }
+ +
+ +static struct dentry *balloon_mount(struct file_system_type *fs_type,
+ +              int flags, const char *dev_name, void *data)
+ +{
+ +      static const struct dentry_operations ops = {
+ +              .d_dname = simple_dname,
+ +      };
+ +
+ +      return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
+ +                              BALLOON_KVM_MAGIC);
+ +}
+ +
+ +static struct file_system_type balloon_fs = {
+ +      .name           = "balloon-kvm",
+ +      .mount          = balloon_mount,
+ +      .kill_sb        = kill_anon_super,
+ +};
+ +
   #endif /* CONFIG_BALLOON_COMPACTION */
   
   static int virtballoon_probe(struct virtio_device *vdev)
@@@ -542,6 -530,9 +553,6 @@@
         vb->need_stats_update = 0;
   
         balloon_devinfo_init(&vb->vb_dev_info);
- -#ifdef CONFIG_BALLOON_COMPACTION
- -      vb->vb_dev_info.migratepage = virtballoon_migratepage;
- -#endif
   
         err = init_vqs(vb);
         if (err)
@@@ -551,27 -542,7 +562,27 @@@
         vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
         err = register_oom_notifier(&vb->nb);
         if (err < 0)
- -              goto out_oom_notify;
+ +              goto out_del_vqs;
+ +
+ +#ifdef CONFIG_BALLOON_COMPACTION
+ +      balloon_mnt = kern_mount(&balloon_fs);
+ +      if (IS_ERR(balloon_mnt)) {
+ +              err = PTR_ERR(balloon_mnt);
+ +              unregister_oom_notifier(&vb->nb);
+ +              goto out_del_vqs;
+ +      }
+ +
+ +      vb->vb_dev_info.migratepage = virtballoon_migratepage;
+ +      vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+ +      if (IS_ERR(vb->vb_dev_info.inode)) {
+ +              err = PTR_ERR(vb->vb_dev_info.inode);
+ +              kern_unmount(balloon_mnt);
+ +              unregister_oom_notifier(&vb->nb);
+ +              vb->vb_dev_info.inode = NULL;
+ +              goto out_del_vqs;
+ +      }
+ +      vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+ +#endif
   
         virtio_device_ready(vdev);
   
@@@ -585,6 -556,7 +596,6 @@@
   
   out_del_vqs:
         unregister_oom_notifier(&vb->nb);
- -out_oom_notify:
         vdev->config->del_vqs(vdev);
   out_free_vb:
         kfree(vb);
@@@ -612,8 -584,6 +623,8 @@@ static void virtballoon_remove(struct v
         unregister_oom_notifier(&vb->nb);
         kthread_stop(vb->thread);
         remove_common(vb);
+ +      if (vb->vb_dev_info.inode)
+ +              iput(vb->vb_dev_info.inode);
         kfree(vb);
   }
   
diff --combined fs/cifs/file.c

index 5cad110,2ffdaed..18ecfa4
--- 1/fs/cifs/file.c
--- 2/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@@ -312,9 -312,6 +312,6 @@@ cifs_new_fileinfo(struct cifs_fid *fid
         INIT_LIST_HEAD(&fdlocks->locks);
         fdlocks->cfile = cfile;
         cfile->llist = fdlocks;
-       cifs_down_write(&cinode->lock_sem);
-       list_add(&fdlocks->llist, &cinode->llist);
-       up_write(&cinode->lock_sem);
   
         cfile->count = 1;
         cfile->pid = current->tgid;
@@@ -338,6 -335,10 +335,10 @@@
                 oplock = 0;
         }
   
+       cifs_down_write(&cinode->lock_sem);
+       list_add(&fdlocks->llist, &cinode->llist);
+       up_write(&cinode->lock_sem);
+ 
         spin_lock(&tcon->open_file_lock);
         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
                 oplock = fid->pending_open->oplock;
@@@ -702,6 -703,13 +703,13 @@@ cifs_reopen_file(struct cifsFileInfo *c
         if (backup_cred(cifs_sb))
                 create_options |= CREATE_OPEN_BACKUP_INTENT;
   
+       /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+       if (cfile->f_flags & O_SYNC)
+               create_options |= CREATE_WRITE_THROUGH;
+ 
+       if (cfile->f_flags & O_DIRECT)
+               create_options |= CREATE_NO_BUFFER;
+ 
         if (server->ops->get_lease_key)
                 server->ops->get_lease_key(inode, &cfile->fid);
   
@@@ -3443,13 -3451,13 +3451,13 @@@ readpages_get_pages(struct address_spac
          * should have access to this page, we're safe to simply set
          * PG_locked without checking it first.
          */
- -      __set_page_locked(page);
+ +      __SetPageLocked(page);
         rc = add_to_page_cache_locked(page, mapping,
                                       page->index, gfp);
   
         /* give up if we can't stick it in the cache */
         if (rc) {
- -              __clear_page_locked(page);
+ +              __ClearPageLocked(page);
                 return rc;
         }
   
@@@ -3470,9 -3478,9 +3478,9 @@@
                 if (*bytes + PAGE_CACHE_SIZE > rsize)
                         break;
   
- -              __set_page_locked(page);
+ +              __SetPageLocked(page);
                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
- -                      __clear_page_locked(page);
+ +                      __ClearPageLocked(page);
                         break;
                 }
                 list_move_tail(&page->lru, tmplist);
diff --combined fs/fuse/dir.c

index 4a9f20a,485a553..7a5ff8d
--- 1/fs/fuse/dir.c
--- 2/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@@ -240,7 -240,8 +240,8 @@@ static int fuse_dentry_revalidate(struc
                 kfree(forget);
                 if (ret == -ENOMEM)
                         goto out;
-               if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+               if (ret || fuse_invalid_attr(&outarg.attr) ||
+                   (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
                         goto invalid;
   
                 fuse_change_attributes(inode, &outarg.attr,
@@@ -267,50 -268,6 +268,50 @@@ invalid
         goto out;
   }
   
+ +/*
+ + * Get the canonical path. Since we must translate to a path, this must be done
+ + * in the context of the userspace daemon, however, the userspace daemon cannot
+ + * look up paths on its own. Instead, we handle the lookup as a special case
+ + * inside of the write request.
+ + */
+ +static void fuse_dentry_canonical_path(const struct path *path, struct path *canonical_path) {
+ +      struct inode *inode = path->dentry->d_inode;
+ +      struct fuse_conn *fc = get_fuse_conn(inode);
+ +      struct fuse_req *req;
+ +      int err;
+ +      char *path_name;
+ +
+ +      req = fuse_get_req(fc, 1);
+ +      err = PTR_ERR(req);
+ +      if (IS_ERR(req))
+ +              goto default_path;
+ +
+ +      path_name = (char*)__get_free_page(GFP_KERNEL);
+ +      if (!path_name) {
+ +              fuse_put_request(fc, req);
+ +              goto default_path;
+ +      }
+ +
+ +      req->in.h.opcode = FUSE_CANONICAL_PATH;
+ +      req->in.h.nodeid = get_node_id(inode);
+ +      req->in.numargs = 0;
+ +      req->out.numargs = 1;
+ +      req->out.args[0].size = PATH_MAX;
+ +      req->out.args[0].value = path_name;
+ +      req->canonical_path = canonical_path;
+ +      req->out.argvar = 1;
+ +      fuse_request_send(fc, req);
+ +      err = req->out.h.error;
+ +      fuse_put_request(fc, req);
+ +      free_page((unsigned long)path_name);
+ +      if (!err)
+ +              return;
+ +default_path:
+ +      canonical_path->dentry = path->dentry;
+ +      canonical_path->mnt = path->mnt;
+ +      path_get(canonical_path);
+ +}
+ +
   static int invalid_nodeid(u64 nodeid)
   {
         return !nodeid || nodeid == FUSE_ROOT_ID;
@@@ -318,7 -275,6 +319,7 @@@
   
   const struct dentry_operations fuse_dentry_operations = {
         .d_revalidate   = fuse_dentry_revalidate,
+ +      .d_canonical_path = fuse_dentry_canonical_path,
   };
   
   int fuse_valid_type(int m)
@@@ -327,6 -283,12 +328,12 @@@
                 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
   }
   
+ bool fuse_invalid_attr(struct fuse_attr *attr)
+ {
+       return !fuse_valid_type(attr->mode) ||
+               attr->size > LLONG_MAX;
+ }
+ 
   int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
                      struct fuse_entry_out *outarg, struct inode **inode)
   {
@@@ -358,7 -320,7 +365,7 @@@
         err = -EIO;
         if (!outarg->nodeid)
                 goto out_put_forget;
-       if (!fuse_valid_type(outarg->attr.mode))
+       if (fuse_invalid_attr(&outarg->attr))
                 goto out_put_forget;
   
         *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
@@@ -473,20 -435,18 +480,21 @@@ static int fuse_create_open(struct inod
         args.out.args[0].value = &outentry;
         args.out.args[1].size = sizeof(outopen);
         args.out.args[1].value = &outopen;
+ +      args.out.passthrough_filp = NULL;
         err = fuse_simple_request(fc, &args);
         if (err)
                 goto out_free_ff;
   
         err = -EIO;
-       if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
+       if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
+           fuse_invalid_attr(&outentry.attr))
                 goto out_free_ff;
   
         ff->fh = outopen.fh;
         ff->nodeid = outentry.nodeid;
         ff->open_flags = outopen.open_flags;
+ +      if (args.out.passthrough_filp != NULL)
+ +              ff->passthrough_filp = args.out.passthrough_filp;
         inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
                           &outentry.attr, entry_attr_timeout(&outentry), 0);
         if (!inode) {
@@@ -587,7 -547,7 +595,7 @@@ static int create_new_entry(struct fuse
                 goto out_put_forget_req;
   
         err = -EIO;
-       if (invalid_nodeid(outarg.nodeid))
+       if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
                 goto out_put_forget_req;
   
         if ((outarg.attr.mode ^ mode) & S_IFMT)
@@@ -860,7 -820,8 +868,8 @@@ static int fuse_link(struct dentry *ent
   
                 spin_lock(&fc->lock);
                 fi->attr_version = ++fc->attr_version;
-               inc_nlink(inode);
+               if (likely(inode->i_nlink < UINT_MAX))
+                       inc_nlink(inode);
                 spin_unlock(&fc->lock);
                 fuse_invalidate_attr(inode);
                 fuse_update_ctime(inode);
@@@ -940,7 -901,8 +949,8 @@@ static int fuse_do_getattr(struct inod
         args.out.args[0].value = &outarg;
         err = fuse_simple_request(fc, &args);
         if (!err) {
-               if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+               if (fuse_invalid_attr(&outarg.attr) ||
+                   (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
                         make_bad_inode(inode);
                         err = -EIO;
                 } else {
@@@ -1245,7 -1207,7 +1255,7 @@@ static int fuse_direntplus_link(struct 
   
         if (invalid_nodeid(o->nodeid))
                 return -EIO;
-       if (!fuse_valid_type(o->attr.mode))
+       if (fuse_invalid_attr(&o->attr))
                 return -EIO;
   
         fc = get_fuse_conn(dir);
@@@ -1717,7 -1679,8 +1727,8 @@@ int fuse_do_setattr(struct inode *inode
                 goto error;
         }
   
-       if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+       if (fuse_invalid_attr(&outarg.attr) ||
+           (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
                 make_bad_inode(inode);
                 err = -EIO;
                 goto error;
diff --combined fs/fuse/fuse_i.h

index 5a8cef2,8844007..f7b70b8
--- 1/fs/fuse/fuse_i.h
--- 2/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@@ -158,10 -158,6 +158,10 @@@ struct fuse_file 
   
         /** Has flock been performed on this file? */
         bool flock:1;
+ +
+ +      /* the read write file */
+ +      struct file *passthrough_filp;
+ +      bool passthrough_enabled;
   };
   
   /** One input argument of a request */
@@@ -241,7 -237,6 +241,7 @@@ struct fuse_args 
                 unsigned argvar:1;
                 unsigned numargs;
                 struct fuse_arg args[2];
+ +              struct file *passthrough_filp;
         } out;
   };
   
@@@ -378,9 -373,6 +378,9 @@@ struct fuse_req 
         /** Inode used in the request or NULL */
         struct inode *inode;
   
+ +      /** Path used for completing d_canonical_path */
+ +      struct path *canonical_path;
+ +
         /** AIO control block */
         struct fuse_io_priv *io;
   
@@@ -392,9 -384,6 +392,9 @@@
   
         /** Request is stolen from fuse_file->reserved_req */
         struct file *stolen_file;
+ +
+ +      /** fuse passthrough file  */
+ +      struct file *passthrough_filp;
   };
   
   struct fuse_iqueue {
@@@ -552,9 -541,6 +552,9 @@@ struct fuse_conn 
         /** write-back cache policy (default is write-through) */
         unsigned writeback_cache:1;
   
+ +      /** passthrough IO. */
+ +      unsigned passthrough:1;
+ +
         /*
          * The following bitfields are only for optimization purposes
          * and hence races in setting them will not cause malfunction
@@@ -901,6 -887,8 +901,8 @@@ void fuse_ctl_remove_conn(struct fuse_c
    */
   int fuse_valid_type(int m);
   
+ bool fuse_invalid_attr(struct fuse_attr *attr);
+ 
   /**
    * Is current process allowed to perform filesystem operation?
    */
diff --combined fs/proc/array.c

index 015cdc6,6238f45..d4b1c23
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -172,15 -172,15 +172,15 @@@ static inline void task_state(struct se
         seq_printf(m,
                 "State:\t%s\n"
                 "Tgid:\t%d\n"
- -              "Ngid:\t%d\n"
                 "Pid:\t%d\n"
                 "PPid:\t%d\n"
                 "TracerPid:\t%d\n"
                 "Uid:\t%d\t%d\t%d\t%d\n"
                 "Gid:\t%d\t%d\t%d\t%d\n"
+ +              "Ngid:\t%d\n"
                 "FDSize:\t%d\nGroups:\t",
                 get_task_state(p),
- -              tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
+ +              tgid, pid_nr_ns(pid, ns), ppid, tpid,
                 from_kuid_munged(user_ns, cred->uid),
                 from_kuid_munged(user_ns, cred->euid),
                 from_kuid_munged(user_ns, cred->suid),
@@@ -189,7 -189,7 +189,7 @@@
                 from_kgid_munged(user_ns, cred->egid),
                 from_kgid_munged(user_ns, cred->sgid),
                 from_kgid_munged(user_ns, cred->fsgid),
- -              max_fds);
+ +              ngid, max_fds);
   
         group_info = cred->group_info;
         for (g = 0; g < group_info->ngroups; g++)
@@@ -425,9 -425,21 +425,21 @@@ static int do_task_stat(struct seq_fil
         mm = get_task_mm(task);
         if (mm) {
                 vsize = task_vsize(mm);
-               if (permitted) {
-                       eip = KSTK_EIP(task);
-                       esp = KSTK_ESP(task);
+               /*
+                * esp and eip are intentionally zeroed out.  There is no
+                * non-racy way to read them without freezing the task.
+                * Programs that need reliable values can use ptrace(2).
+                *
+                * The only exception is if the task is core dumping because
+                * a program is not able to use ptrace(2) in that case. It is
+                * safe because the task has stopped executing permanently.
+                */
+               if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
+                       if (try_get_task_stack(task)) {
+                               eip = KSTK_EIP(task);
+                               esp = KSTK_ESP(task);
+                               put_task_stack(task);
+                       }
                 }
         }
   
diff --combined include/linux/dma-mapping.h

index 3f6e858,62dd667..1d4cb72
--- 1/include/linux/dma-mapping.h
--- 2/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@@ -61,10 -61,6 +61,10 @@@ struct dma_map_ops 
         int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
         int (*dma_supported)(struct device *dev, u64 mask);
         int (*set_dma_mask)(struct device *dev, u64 mask);
+ +      void *(*remap)(struct device *dev, void *cpu_addr, dma_addr_t handle,
+ +                      size_t size, struct dma_attrs *attrs);
+ +      void (*unremap)(struct device *dev, void *remapped_address,
+ +                      size_t size);
   #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
         u64 (*get_required_mask)(struct device *dev);
   #endif
@@@ -93,40 -89,6 +93,40 @@@ static inline int is_device_dma_capable
   #include <asm-generic/dma-mapping-broken.h>
   #endif
   
+ +#ifndef CONFIG_NO_DMA
+ +static inline void *dma_remap(struct device *dev, void *cpu_addr,
+ +              dma_addr_t dma_handle, size_t size, struct dma_attrs *attrs)
+ +{
+ +      const struct dma_map_ops *ops = get_dma_ops(dev);
+ +      BUG_ON(!ops);
+ +
+ +      if (!ops->remap) {
+ +              WARN_ONCE(1, "Remap function not implemented for %pS\n",
+ +                              ops->remap);
+ +              return NULL;
+ +      }
+ +
+ +      return ops->remap(dev, cpu_addr, dma_handle, size, attrs);
+ +}
+ +
+ +
+ +static inline void dma_unremap(struct device *dev, void *remapped_addr,
+ +                              size_t size)
+ +{
+ +      const struct dma_map_ops *ops = get_dma_ops(dev);
+ +      BUG_ON(!ops);
+ +
+ +      if (!ops->unremap) {
+ +              WARN_ONCE(1, "unremap function not implemented for %pS\n",
+ +                              ops->unremap);
+ +              return;
+ +      }
+ +
+ +      return ops->unremap(dev, remapped_addr, size);
+ +}
+ +#endif
+ +
+ +
   static inline u64 dma_get_mask(struct device *dev)
   {
         if (dev && dev->dma_mask && *dev->dma_mask)
@@@ -189,8 -151,7 +189,7 @@@ static inline unsigned int dma_get_max_
         return SZ_64K;
   }
   
- static inline unsigned int dma_set_max_seg_size(struct device *dev,
-                                               unsigned int size)
+ static inline int dma_set_max_seg_size(struct device *dev, unsigned int size)
   {
         if (dev->dma_parms) {
                 dev->dma_parms->max_segment_size = size;
diff --combined include/linux/netdevice.h

index 31db3f7,d999e50..ef891fd
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -1617,6 -1617,11 +1617,11 @@@ struct net_device 
         unsigned char           if_port;
         unsigned char           dma;
   
+       /* Note : dev->mtu is often read without holding a lock.
+        * Writers usually hold RTNL.
+        * It is recommended to use READ_ONCE() to annotate the reads,
+        * and to use WRITE_ONCE() to annotate the writes.
+        */
         unsigned int            mtu;
         unsigned short          type;
         unsigned short          hard_header_len;
@@@ -2589,7 -2594,6 +2594,7 @@@ extern int netdev_flow_limit_table_len
    */
   struct softnet_data {
         struct list_head        poll_list;
+ +      struct napi_struct      *current_napi;
         struct sk_buff_head     process_queue;
   
         /* stats */
@@@ -2597,8 -2601,6 +2602,8 @@@
         unsigned int            time_squeeze;
         unsigned int            cpu_collision;
         unsigned int            received_rps;
+ +      unsigned int            gro_coalesced;
+ +
   #ifdef CONFIG_RPS
         struct softnet_data     *rps_ipi_list;
   #endif
@@@ -3086,7 -3088,6 +3091,7 @@@ struct sk_buff *napi_get_frags(struct n
   gro_result_t napi_gro_frags(struct napi_struct *napi);
   struct packet_offload *gro_find_receive_by_type(__be16 type);
   struct packet_offload *gro_find_complete_by_type(__be16 type);
+ +extern struct napi_struct *get_current_napi_context(void);
   
   static inline void napi_free_frags(struct napi_struct *napi)
   {
diff --combined include/linux/regulator/consumer.h

index 734ae72,bf62713..a4f06dc
--- 1/include/linux/regulator/consumer.h
--- 2/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@@ -103,7 -103,6 +103,7 @@@ struct regmap
    *                      Data passed is old voltage cast to (void *).
    * PRE_DISABLE    Regulator is about to be disabled
    * ABORT_DISABLE  Regulator disable failed for some reason
+ + * ENABLE         Regulator was enabled.
    *
    * NOTE: These events can be OR'ed together when passed into handler.
    */
@@@ -120,7 -119,6 +120,7 @@@
   #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE  0x200
   #define REGULATOR_EVENT_PRE_DISABLE           0x400
   #define REGULATOR_EVENT_ABORT_DISABLE         0x800
+ +#define REGULATOR_EVENT_ENABLE                        0x1000
   
   /**
    * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
@@@ -144,10 -142,6 +144,10 @@@ struct regulator
    *            using the bulk regulator APIs.
    * @consumer: The regulator consumer for the supply.  This will be managed
    *            by the bulk API.
+ + * @min_uV:   The minimum requested voltage for the regulator (in microvolts),
+ + *            or 0 to not set a voltage.
+ + * @max_uV:   The maximum requested voltage for the regulator (in microvolts),
+ + *            or 0 to use @min_uV.
    *
    * The regulator APIs provide a series of regulator_bulk_() API calls as
    * a convenience to consumers which require multiple supplies.  This
@@@ -156,8 -150,6 +156,8 @@@
   struct regulator_bulk_data {
         const char *supply;
         struct regulator *consumer;
+ +      int min_uV;
+ +      int max_uV;
   
         /* private: Internal use */
         int ret;
@@@ -222,8 -214,6 +222,8 @@@ int __must_check devm_regulator_bulk_ge
                                          struct regulator_bulk_data *consumers);
   int __must_check regulator_bulk_enable(int num_consumers,
                                        struct regulator_bulk_data *consumers);
+ +int regulator_bulk_set_voltage(int num_consumers,
+ +                        struct regulator_bulk_data *consumers);
   int regulator_bulk_disable(int num_consumers,
                            struct regulator_bulk_data *consumers);
   int regulator_bulk_force_disable(int num_consumers,
@@@ -234,7 -224,6 +234,7 @@@ void regulator_bulk_free(int num_consum
   int regulator_can_change_voltage(struct regulator *regulator);
   int regulator_count_voltages(struct regulator *regulator);
   int regulator_list_voltage(struct regulator *regulator, unsigned selector);
+ +int regulator_list_corner_voltage(struct regulator *regulator, int corner);
   int regulator_is_supported_voltage(struct regulator *regulator,
                                    int min_uV, int max_uV);
   unsigned int regulator_get_linear_step(struct regulator *regulator);
@@@ -496,7 -485,7 +496,7 @@@ static inline unsigned int regulator_ge
   
   static inline int regulator_set_load(struct regulator *regulator, int load_uA)
   {
-       return REGULATOR_MODE_NORMAL;
+       return 0;
   }
   
   static inline int regulator_allow_bypass(struct regulator *regulator,
@@@ -567,11 -556,6 +567,11 @@@ static inline int regulator_list_voltag
         return -EINVAL;
   }
   
+ +static inline int regulator_list_corner_voltage(struct regulator *regulator,
+ +      int corner)
+ +{
+ +      return -EINVAL;
+ +}
   #endif
   
   static inline int regulator_set_voltage_triplet(struct regulator *regulator,
diff --combined include/linux/serial_core.h

index f276869,b5b527e..ca15d91
--- 1/include/linux/serial_core.h
--- 2/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@@ -66,7 -66,6 +66,7 @@@ struct uart_ops 
         void            (*set_ldisc)(struct uart_port *, struct ktermios *);
         void            (*pm)(struct uart_port *, unsigned int state,
                               unsigned int oldstate);
+ +      void            (*wake_peer)(struct uart_port *);
   
         /*
          * Return a string describing the type of the port
@@@ -160,6 -159,7 +160,7 @@@ struct uart_port 
         struct console          *cons;                  /* struct console, if any */
   #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(SUPPORT_SYSRQ)
         unsigned long           sysrq;                  /* sysrq timeout */
+       unsigned int            sysrq_ch;               /* char for sysrq */
   #endif
   
         /* flags must be updated while holding port mutex */
@@@ -342,26 -342,22 +343,26 @@@ struct earlycon_device 
   
   struct earlycon_id {
         char    name[16];
+ +      char    compatible[128];
         int     (*setup)(struct earlycon_device *, const char *options);
   } __aligned(32);
   
+ +extern const struct earlycon_id __earlycon_table[];
+ +extern const struct earlycon_id __earlycon_table_end[];
+ +
+ +#define OF_EARLYCON_DECLARE(_name, compat, fn)                                \
+ +      static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \
+ +           __used __section(__earlycon_table)                         \
+ +              = { .name = __stringify(_name),                         \
+ +                  .compatible = compat,                               \
+ +                  .setup = fn  }
+ +
+ +#define EARLYCON_DECLARE(_name, fn)   OF_EARLYCON_DECLARE(_name, "", fn)
+ +
   extern int setup_earlycon(char *buf);
   extern int of_setup_earlycon(unsigned long addr,
                              int (*setup)(struct earlycon_device *, const char *));
   
- -#define EARLYCON_DECLARE(_name, func)                                 \
- -      static const struct earlycon_id __earlycon_##_name              \
- -              __used __section(__earlycon_table)                      \
- -               = { .name  = __stringify(_name),                       \
- -                   .setup = func  }
- -
- -#define OF_EARLYCON_DECLARE(name, compat, fn)                         \
- -      _OF_DECLARE(earlycon, name, compat, fn, void *)
- -
   struct uart_port *uart_get_console(struct uart_port *ports, int nr,
                                    struct console *c);
   int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
@@@ -402,7 -398,7 +403,7 @@@ int uart_resume_port(struct uart_drive
   static inline int uart_tx_stopped(struct uart_port *port)
   {
         struct tty_struct *tty = port->state->port.tty;
- -      if (tty->stopped || port->hw_stopped)
+ +      if ((tty && tty->stopped) || port->hw_stopped)
                 return 1;
         return 0;
   }
@@@ -445,8 -441,42 +446,42 @@@ uart_handle_sysrq_char(struct uart_por
         }
         return 0;
   }
+ static inline int
+ uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch)
+ {
+       if (port->sysrq) {
+               if (ch && time_before(jiffies, port->sysrq)) {
+                       port->sysrq_ch = ch;
+                       port->sysrq = 0;
+                       return 1;
+               }
+               port->sysrq = 0;
+       }
+       return 0;
+ }
+ static inline void
+ uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags)
+ {
+       int sysrq_ch;
+ 
+       sysrq_ch = port->sysrq_ch;
+       port->sysrq_ch = 0;
+ 
+       spin_unlock_irqrestore(&port->lock, irqflags);
+ 
+       if (sysrq_ch)
+               handle_sysrq(sysrq_ch);
+ }
   #else
- #define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; })
+ static inline int
+ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; }
+ static inline int
+ uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; }
+ static inline void
+ uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags)
+ {
+       spin_unlock_irqrestore(&port->lock, irqflags);
+ }
   #endif
   
   /*
diff --combined include/linux/thread_info.h

index 25f861e,897e835..7cd05da
--- 1/include/linux/thread_info.h
--- 2/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@@ -9,17 -9,50 +9,20 @@@
   
   #include <linux/types.h>
   #include <linux/bug.h>
+ +#include <linux/restart_block.h>
   
- -#ifdef CONFIG_THREAD_INFO_IN_TASK
- -#define current_thread_info() ((struct thread_info *)current)
- -#endif
- -
+ +#ifdef CONFIG_THREAD_INFO_IN_TASK
+ struct timespec;
+ struct compat_timespec;
+ 
   /*
- - * System call restart block.
+ + * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
+ + * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
+ + * including <asm/current.h> can cause a circular dependency on some platforms.
    */
- -struct restart_block {
- -      long (*fn)(struct restart_block *);
- -      union {
- -              /* For futex_wait and futex_wait_requeue_pi */
- -              struct {
- -                      u32 __user *uaddr;
- -                      u32 val;
- -                      u32 flags;
- -                      u32 bitset;
- -                      u64 time;
- -                      u32 __user *uaddr2;
- -              } futex;
- -              /* For nanosleep */
- -              struct {
- -                      clockid_t clockid;
- -                      struct timespec __user *rmtp;
- -#ifdef CONFIG_COMPAT
- -                      struct compat_timespec __user *compat_rmtp;
+ +#include <asm/current.h>
+ +#define current_thread_info() ((struct thread_info *)current)
   #endif
- -                      u64 expires;
- -              } nanosleep;
- -              /* For poll */
- -              struct {
- -                      struct pollfd __user *ufds;
- -                      int nfds;
- -                      int has_timeout;
- -                      unsigned long tv_sec;
- -                      unsigned long tv_nsec;
- -              } poll;
- -      };
- -};
- -
- -extern long do_no_restart_syscall(struct restart_block *parm);
   
   #include <linux/bitops.h>
   #include <asm/thread_info.h>
@@@ -112,31 -145,6 +115,31 @@@ static inline bool test_and_clear_resto
   #error "no set_restore_sigmask() provided and default one won't work"
   #endif
   
+ +#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+ +static inline int arch_within_stack_frames(const void * const stack,
+ +                                         const void * const stackend,
+ +                                         const void *obj, unsigned long len)
+ +{
+ +      return 0;
+ +}
+ +#endif
+ +
+ +#ifdef CONFIG_HARDENED_USERCOPY
+ +extern void __check_object_size(const void *ptr, unsigned long n,
+ +                                      bool to_user);
+ +
+ +static __always_inline void check_object_size(const void *ptr, unsigned long n,
+ +                                            bool to_user)
+ +{
+ +      if (!__builtin_constant_p(n))
+ +              __check_object_size(ptr, n, to_user);
+ +}
+ +#else
+ +static inline void check_object_size(const void *ptr, unsigned long n,
+ +                                   bool to_user)
+ +{ }
+ +#endif /* CONFIG_HARDENED_USERCOPY */
+ +
   #endif        /* __KERNEL__ */
   
   #endif /* _LINUX_THREAD_INFO_H */
diff --combined include/net/ip.h

index 61f2e26,6067b7a..0a61106
--- 1/include/net/ip.h
--- 2/include/net/ip.h
+++ b/include/net/ip.h
@@@ -172,7 -172,6 +172,7 @@@ struct ip_reply_arg 
                                 /* -1 if not needed */ 
         int         bound_dev_if;
         u8          tos;
+ +      kuid_t      uid;
   }; 
   
   #define IP_REPLY_ARG_NOSRCCHECK 1
@@@ -243,8 -242,6 +243,8 @@@ static inline int inet_is_local_reserve
   }
   #endif
   
+ +extern int sysctl_reserved_port_bind;
+ +
   /* From inetpeer.c */
   extern int inet_peer_threshold;
   extern int inet_peer_minttl;
@@@ -599,4 -596,9 +599,9 @@@ extern int sysctl_icmp_msgs_burst
   int ip_misc_proc_init(void);
   #endif
   
+ static inline bool inetdev_valid_mtu(unsigned int mtu)
+ {
+       return likely(mtu >= IPV4_MIN_MTU);
+ }
+ 
   #endif        /* _IP_H */
diff --combined include/net/tcp.h

index 3330b90,3bdd6ef..7a58ee2
--- 1/include/net/tcp.h
--- 2/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -143,9 -143,6 +143,9 @@@ void tcp_time_wait(struct sock *sk, in
                                                  * most likely due to retrans in 3WHS.
                                                  */
   
+ +/* Number of full MSS to receive before Acking RFC2581 */
+ +#define TCP_DELACK_SEG          1
+ +
   #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
                                                          * for local resources.
                                                          */
@@@ -289,14 -286,8 +289,14 @@@ extern int sysctl_tcp_autocorking
   extern int sysctl_tcp_invalid_ratelimit;
   extern int sysctl_tcp_pacing_ss_ratio;
   extern int sysctl_tcp_pacing_ca_ratio;
+ +extern int sysctl_tcp_default_init_rwnd;
   
   extern atomic_long_t tcp_memory_allocated;
+ +
+ +/* sysctl variables for controlling various tcp parameters */
+ +extern int sysctl_tcp_delack_seg;
+ +extern int sysctl_tcp_use_userconfig;
+ +
   extern struct percpu_counter tcp_sockets_allocated;
   extern int tcp_memory_pressure;
   
@@@ -387,14 -378,7 +387,14 @@@ ssize_t tcp_splice_read(struct socket *
                         struct pipe_inode_info *pipe, size_t len,
                         unsigned int flags);
   
+ +/* sysctl master controller */
+ +extern int tcp_use_userconfig_sysctl_handler(struct ctl_table *, int,
+ +                              void __user *, size_t *, loff_t *);
+ +extern int tcp_proc_delayed_ack_control(struct ctl_table *, int,
+ +                              void __user *, size_t *, loff_t *);
+ +
   void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
+ +
   static inline void tcp_dec_quickack_mode(struct sock *sk,
                                          const unsigned int pkts)
   {
@@@ -518,19 -502,27 +518,27 @@@ struct sock *cookie_v4_check(struct soc
    */
   static inline void tcp_synq_overflow(const struct sock *sk)
   {
-       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
         unsigned long now = jiffies;
   
-       if (time_after(now, last_overflow + HZ))
-               tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+       if (!time_between32(now, last_overflow, last_overflow + HZ))
+               WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
   }
   
   /* syncookies: no recent synqueue overflow on this listening socket? */
   static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
   {
-       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- 
-       return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+       unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+ 
+       /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
+        * then we're under synflood. However, we have to use
+        * 'last_overflow - HZ' as lower bound. That's because a concurrent
+        * tcp_synq_overflow() could update .ts_recent_stamp after we read
+        * jiffies but before we store .ts_recent_stamp into last_overflow,
+        * which could lead to rejecting a valid syncookie.
+        */
+       return !time_between32(jiffies, last_overflow - HZ,
+                              last_overflow + TCP_SYNCOOKIE_VALID);
   }
   
   static inline u32 tcp_cookie_time(void)
@@@ -722,14 -714,11 +730,14 @@@ u32 __tcp_select_window(struct sock *sk
   
   void tcp_send_window_probe(struct sock *sk);
   
- -/* TCP timestamps are only 32-bits, this causes a slight
- - * complication on 64-bit systems since we store a snapshot
- - * of jiffies in the buffer control blocks below.  We decided
- - * to use only the low 32-bits of jiffies and hide the ugly
- - * casts with the following macro.
+ +/* TCP uses 32bit jiffies to save some space.
+ + * Note that this is different from tcp_time_stamp, which
+ + * historically has been the same until linux-4.13.
+ + */
+ +#define tcp_jiffies32 ((u32)jiffies)
+ +
+ +/* Generator for TCP TS option (RFC 7323)
+ + * Currently tied to 'jiffies' but will soon be driven by 1 ms clock.
    */
   #define tcp_time_stamp                ((__u32)(jiffies))
   
@@@ -1192,8 -1181,6 +1200,8 @@@ void tcp_set_state(struct sock *sk, in
   
   void tcp_done(struct sock *sk);
   
+ +int tcp_abort(struct sock *sk, int err);
+ +
   static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
   {
         rx_opt->dsack = 0;
diff --combined kernel/module.c

index ad49282,2f695b6..3b7aac2
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -1014,6 -1014,8 +1014,8 @@@ SYSCALL_DEFINE2(delete_module, const ch
         strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
   
         free_module(mod);
+       /* someone could wait for the module in add_unformed_module() */
+       wake_up_all(&module_wq);
         return 0;
   out:
         mutex_unlock(&module_mutex);
@@@ -2505,7 -2507,7 +2507,7 @@@ static void layout_symtab(struct modul
   
         /* We'll tack temporary mod_kallsyms on the end. */
         mod->init_size = ALIGN(mod->init_size,
- -                             __alignof__(struct mod_kallsyms));
+ +                                    __alignof__(struct mod_kallsyms));
         info->mod_kallsyms_init_off = mod->init_size;
         mod->init_size += sizeof(struct mod_kallsyms);
         mod->init_size = debug_align(mod->init_size);
@@@ -2585,13 -2587,7 +2587,13 @@@ void * __weak module_alloc(unsigned lon
         return vmalloc_exec(size);
   }
   
- -#ifdef CONFIG_DEBUG_KMEMLEAK
+ +#if defined(CONFIG_DEBUG_KMEMLEAK) && defined(CONFIG_DEBUG_MODULE_SCAN_OFF)
+ +static void kmemleak_load_module(const struct module *mod,
+ +                               const struct load_info *info)
+ +{
+ +      kmemleak_no_scan(mod->module_core);
+ +}
+ +#elif defined(CONFIG_DEBUG_KMEMLEAK)
   static void kmemleak_load_module(const struct module *mod,
                                  const struct load_info *info)
   {
diff --combined kernel/sched/fair.c

index 6cd04ee,b42d2b8..099c11a
--- 1/kernel/sched/fair.c
--- 2/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -30,12 -30,10 +30,12 @@@
   #include <linux/mempolicy.h>
   #include <linux/migrate.h>
   #include <linux/task_work.h>
- -
- -#include <trace/events/sched.h>
+ +#include <linux/module.h>
   
   #include "sched.h"
+ +#include <trace/events/sched.h>
+ +#include "tune.h"
+ +#include "walt.h"
   
   /*
    * Targeted preemption latency for CPU-bound tasks:
@@@ -52,9 -50,6 +52,9 @@@
   unsigned int sysctl_sched_latency = 6000000ULL;
   unsigned int normalized_sysctl_sched_latency = 6000000ULL;
   
+ +unsigned int sysctl_sched_sync_hint_enable = 1;
+ +unsigned int sysctl_sched_cstate_aware = 1;
+ +
   /*
    * The initial- and re-scaling of tunables is configurable
    * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@@ -119,12 -114,6 +119,12 @@@ unsigned int __read_mostly sysctl_sched
   unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
   #endif
   
+ +/*
+ + * The margin used when comparing utilization with CPU capacity:
+ + * util * margin < capacity * 1024
+ + */
+ +unsigned int capacity_margin = 1280; /* ~20% */
+ +
   static inline void update_load_add(struct load_weight *lw, unsigned long inc)
   {
         lw->weight += inc;
@@@ -247,9 -236,6 +247,9 @@@ static u64 __calc_delta(u64 delta_exec
         return mul_u64_u32_shr(delta_exec, fact, shift);
   }
   
+ +#ifdef CONFIG_SMP
+ +static int active_load_balance_cpu_stop(void *data);
+ +#endif
   
   const struct sched_class fair_sched_class;
   
@@@ -300,59 -286,19 +300,59 @@@ static inline struct cfs_rq *group_cfs_
   static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
   {
         if (!cfs_rq->on_list) {
+ +              struct rq *rq = rq_of(cfs_rq);
+ +              int cpu = cpu_of(rq);
                 /*
                  * Ensure we either appear before our parent (if already
                  * enqueued) or force our parent to appear after us when it is
- -               * enqueued.  The fact that we always enqueue bottom-up
- -               * reduces this to two cases.
+ +               * enqueued. The fact that we always enqueue bottom-up
+ +               * reduces this to two cases and a special case for the root
+ +               * cfs_rq. Furthermore, it also means that we will always reset
+ +               * tmp_alone_branch either when the branch is connected
+ +               * to a tree or when we reach the beg of the tree
                  */
                 if (cfs_rq->tg->parent &&
- -                  cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
- -                      list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
- -                              &rq_of(cfs_rq)->leaf_cfs_rq_list);
- -              } else {
+ +                  cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
+ +                      /*
+ +                       * If parent is already on the list, we add the child
+ +                       * just before. Thanks to circular linked property of
+ +                       * the list, this means to put the child at the tail
+ +                       * of the list that starts by parent.
+ +                       */
+ +                      list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+ +                              &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
+ +                      /*
+ +                       * The branch is now connected to its tree so we can
+ +                       * reset tmp_alone_branch to the beginning of the
+ +                       * list.
+ +                       */
+ +                      rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ +              } else if (!cfs_rq->tg->parent) {
+ +                      /*
+ +                       * cfs rq without parent should be put
+ +                       * at the tail of the list.
+ +                       */
                         list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
- -                              &rq_of(cfs_rq)->leaf_cfs_rq_list);
+ +                              &rq->leaf_cfs_rq_list);
+ +                      /*
+ +                       * We have reach the beg of a tree so we can reset
+ +                       * tmp_alone_branch to the beginning of the list.
+ +                       */
+ +                      rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+ +              } else {
+ +                      /*
+ +                       * The parent has not already been added so we want to
+ +                       * make sure that it will be put after us.
+ +                       * tmp_alone_branch points to the beg of the branch
+ +                       * where we will add parent.
+ +                       */
+ +                      list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+ +                              rq->tmp_alone_branch);
+ +                      /*
+ +                       * update tmp_alone_branch to points to the new beg
+ +                       * of the branch
+ +                       */
+ +                      rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
                 }
   
                 cfs_rq->on_list = 1;
@@@ -710,7 -656,7 +710,7 @@@ static u64 sched_vslice(struct cfs_rq *
   }
   
   #ifdef CONFIG_SMP
- -static int select_idle_sibling(struct task_struct *p, int cpu);
+ +static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
   static unsigned long task_h_load(struct task_struct *p);
   
   /*
@@@ -727,112 -673,25 +727,112 @@@ void init_entity_runnable_average(struc
   {
         struct sched_avg *sa = &se->avg;
   
- -      sa->last_update_time = 0;
+ +      memset(sa, 0, sizeof(*sa));
         /*
+ +       * util_avg is initialized in post_init_entity_util_avg.
+ +       * util_est should start from zero.
          * sched_avg's period_contrib should be strictly less then 1024, so
          * we give it 1023 to make sure it is almost a period (1024us), and
          * will definitely be update (after enqueue).
          */
         sa->period_contrib = 1023;
- -      sa->load_avg = scale_load_down(se->load.weight);
+ +      /*
+ +       * Tasks are intialized with full load to be seen as heavy tasks until
+ +       * they get a chance to stabilize to their real load level.
+ +       * Group entities are intialized with zero load to reflect the fact that
+ +       * nothing has been attached to the task group yet.
+ +       */
+ +      if (entity_is_task(se))
+ +              sa->load_avg = scale_load_down(se->load.weight);
         sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
- -      sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
- -      sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
         /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
   }
   
- -#else
+ +static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+ +static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
+ +static void attach_entity_cfs_rq(struct sched_entity *se);
+ +static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
+ +
+ +/*
+ + * With new tasks being created, their initial util_avgs are extrapolated
+ + * based on the cfs_rq's current util_avg:
+ + *
+ + *   util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
+ + *
+ + * However, in many cases, the above util_avg does not give a desired
+ + * value. Moreover, the sum of the util_avgs may be divergent, such
+ + * as when the series is a harmonic series.
+ + *
+ + * To solve this problem, we also cap the util_avg of successive tasks to
+ + * only 1/2 of the left utilization budget:
+ + *
+ + *   util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
+ + *
+ + * where n denotes the nth task.
+ + *
+ + * For example, a simplest series from the beginning would be like:
+ + *
+ + *  task  util_avg: 512, 256, 128,  64,  32,   16,    8, ...
+ + * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
+ + *
+ + * Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
+ + * if util_avg > util_avg_cap.
+ + */
+ +void post_init_entity_util_avg(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ +      struct sched_avg *sa = &se->avg;
+ +      long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+ +
+ +      if (cap > 0) {
+ +              if (cfs_rq->avg.util_avg != 0) {
+ +                      sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+ +                      sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+ +
+ +                      if (sa->util_avg > cap)
+ +                              sa->util_avg = cap;
+ +              } else {
+ +                      sa->util_avg = cap;
+ +              }
+ +              /*
+ +               * If we wish to restore tuning via setting initial util,
+ +               * this is where we should do it.
+ +               */
+ +              sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+ +      }
+ +
+ +      if (entity_is_task(se)) {
+ +              struct task_struct *p = task_of(se);
+ +              if (p->sched_class != &fair_sched_class) {
+ +                      /*
+ +                       * For !fair tasks do:
+ +                       *
+ +                      update_cfs_rq_load_avg(now, cfs_rq, false);
+ +                      attach_entity_load_avg(cfs_rq, se);
+ +                      switched_from_fair(rq, p);
+ +                       *
+ +                       * such that the next switched_to_fair() has the
+ +                       * expected state.
+ +                       */
+ +                      se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
+ +                      return;
+ +              }
+ +      }
+ +
+ +      attach_entity_cfs_rq(se);
+ +}
+ +
+ +#else /* !CONFIG_SMP */
   void init_entity_runnable_average(struct sched_entity *se)
   {
   }
- -#endif
+ +void post_init_entity_util_avg(struct sched_entity *se)
+ +{
+ +}
+ +static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
+ +{
+ +}
+ +#endif /* CONFIG_SMP */
   
   /*
    * Update the current task's runtime statistics.
@@@ -877,56 -736,12 +877,56 @@@ static void update_curr_fair(struct rq 
         update_curr(cfs_rq_of(&rq->curr->se));
   }
   
+ +#ifdef CONFIG_SCHEDSTATS
+ +static inline void
+ +update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ +{
+ +      u64 wait_start = rq_clock(rq_of(cfs_rq));
+ +
+ +      if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
+ +          likely(wait_start > se->statistics.wait_start))
+ +              wait_start -= se->statistics.wait_start;
+ +
+ +      se->statistics.wait_start = wait_start;
+ +}
+ +
+ +static void
+ +update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ +{
+ +      struct task_struct *p;
+ +      u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+ +
+ +      if (entity_is_task(se)) {
+ +              p = task_of(se);
+ +              if (task_on_rq_migrating(p)) {
+ +                      /*
+ +                       * Preserve migrating task's wait time so wait_start
+ +                       * time stamp can be adjusted to accumulate wait time
+ +                       * prior to migration.
+ +                       */
+ +                      se->statistics.wait_start = delta;
+ +                      return;
+ +              }
+ +              trace_sched_stat_wait(p, delta);
+ +      }
+ +
+ +      se->statistics.wait_max = max(se->statistics.wait_max, delta);
+ +      se->statistics.wait_count++;
+ +      se->statistics.wait_sum += delta;
+ +      se->statistics.wait_start = 0;
+ +}
+ +#else
   static inline void
   update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
- -      schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
   }
   
+ +static inline void
+ +update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ +{
+ +}
+ +#endif
+ +
   /*
    * Task is being enqueued - update stats:
    */
@@@ -940,6 -755,23 +940,6 @@@ static void update_stats_enqueue(struc
                 update_stats_wait_start(cfs_rq, se);
   }
   
- -static void
- -update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
- -{
- -      schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
- -                      rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
- -      schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
- -      schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
- -                      rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
- -#ifdef CONFIG_SCHEDSTATS
- -      if (entity_is_task(se)) {
- -              trace_sched_stat_wait(task_of(se),
- -                      rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
- -      }
- -#endif
- -      schedstat_set(se->statistics.wait_start, 0);
- -}
- -
   static inline void
   update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
@@@ -1556,8 -1388,7 +1556,8 @@@ balance
          * Call select_idle_sibling to maybe find a better one.
          */
         if (!cur)
- -              env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+ +              env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+ +                                                 env->dst_cpu);
   
   assign:
         assigned = true;
@@@ -2563,22 -2394,28 +2563,22 @@@ account_entity_dequeue(struct cfs_rq *c
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
   # ifdef CONFIG_SMP
- -static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+ +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
   {
- -      long tg_weight;
+ +      long tg_weight, load, shares;
   
         /*
- -       * Use this CPU's real-time load instead of the last load contribution
- -       * as the updating of the contribution is delayed, and we will use the
- -       * the real-time load to calc the share. See update_tg_load_avg().
+ +       * This really should be: cfs_rq->avg.load_avg, but instead we use
+ +       * cfs_rq->load.weight, which is its upper bound. This helps ramp up
+ +       * the shares for small weight interactive tasks.
          */
- -      tg_weight = atomic_long_read(&tg->load_avg);
- -      tg_weight -= cfs_rq->tg_load_avg_contrib;
- -      tg_weight += cfs_rq->load.weight;
- -
- -      return tg_weight;
- -}
+ +      load = scale_load_down(cfs_rq->load.weight);
   
- -static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
- -{
- -      long tg_weight, load, shares;
+ +      tg_weight = atomic_long_read(&tg->load_avg);
   
- -      tg_weight = calc_tg_weight(tg, cfs_rq);
- -      load = cfs_rq->load.weight;
+ +      /* Ensure tg_weight >= load */
+ +      tg_weight -= cfs_rq->tg_load_avg_contrib;
+ +      tg_weight += load;
   
         shares = (tg->shares * load);
         if (tg_weight)
@@@ -2597,7 -2434,6 +2597,7 @@@ static inline long calc_cfs_shares(stru
         return tg->shares;
   }
   # endif /* CONFIG_SMP */
+ +
   static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                             unsigned long weight)
   {
@@@ -2616,20 -2452,16 +2616,20 @@@
   
   static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
   
- -static void update_cfs_shares(struct cfs_rq *cfs_rq)
+ +static void update_cfs_shares(struct sched_entity *se)
   {
+ +      struct cfs_rq *cfs_rq = group_cfs_rq(se);
         struct task_group *tg;
- -      struct sched_entity *se;
         long shares;
   
- -      tg = cfs_rq->tg;
- -      se = tg->se[cpu_of(rq_of(cfs_rq))];
- -      if (!se || throttled_hierarchy(cfs_rq))
+ +      if (!cfs_rq)
+ +              return;
+ +
+ +      if (throttled_hierarchy(cfs_rq))
                 return;
+ +
+ +      tg = cfs_rq->tg;
+ +
   #ifndef CONFIG_SMP
         if (likely(se->load.weight == tg->shares))
                 return;
@@@ -2638,35 -2470,14 +2638,35 @@@
   
         reweight_entity(cfs_rq_of(se), se, shares);
   }
+ +
   #else /* CONFIG_FAIR_GROUP_SCHED */
- -static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
+ +static inline void update_cfs_shares(struct sched_entity *se)
   {
   }
   #endif /* CONFIG_FAIR_GROUP_SCHED */
   
   #ifdef CONFIG_SMP
- -/* Precomputed fixed inverse multiplies for multiplication by y^n */
+ +u32 sched_get_wake_up_idle(struct task_struct *p)
+ +{
+ +      u32 enabled = p->flags & PF_WAKE_UP_IDLE;
+ +
+ +      return !!enabled;
+ +}
+ +EXPORT_SYMBOL(sched_get_wake_up_idle);
+ +
+ +int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle)
+ +{
+ +      int enable = !!wake_up_idle;
+ +
+ +      if (enable)
+ +              p->flags |= PF_WAKE_UP_IDLE;
+ +      else
+ +              p->flags &= ~PF_WAKE_UP_IDLE;
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(sched_set_wake_up_idle);
+ +
   static const u32 runnable_avg_yN_inv[] = {
         0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
         0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
@@@ -2746,1183 -2557,120 +2746,1183 @@@ static u32 __compute_runnable_contrib(u
         return contrib + runnable_avg_yN_sum[n];
   }
   
- -#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
- -#error "load tracking assumes 2^10 as unit"
- -#endif
+ +#ifdef CONFIG_SCHED_HMP
+ +
+ +/* CPU selection flag */
+ +#define SBC_FLAG_PREV_CPU                             0x1
+ +#define SBC_FLAG_BEST_CAP_CPU                         0x2
+ +#define SBC_FLAG_CPU_COST                             0x4
+ +#define SBC_FLAG_MIN_COST                             0x8
+ +#define SBC_FLAG_IDLE_LEAST_LOADED                    0x10
+ +#define SBC_FLAG_IDLE_CSTATE                          0x20
+ +#define SBC_FLAG_COST_CSTATE_TIE_BREAKER              0x40
+ +#define SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER     0x80
+ +#define SBC_FLAG_CSTATE_LOAD                          0x100
+ +#define SBC_FLAG_BEST_SIBLING                         0x200
+ +#define SBC_FLAG_WAKER_CPU                            0x400
+ +#define SBC_FLAG_PACK_TASK                            0x800
+ +
+ +/* Cluster selection flag */
+ +#define SBC_FLAG_COLOC_CLUSTER                                0x10000
+ +#define SBC_FLAG_WAKER_CLUSTER                                0x20000
+ +#define SBC_FLAG_BACKUP_CLUSTER                               0x40000
+ +#define SBC_FLAG_BOOST_CLUSTER                                0x80000
+ +
+ +struct cpu_select_env {
+ +      struct task_struct *p;
+ +      struct related_thread_group *rtg;
+ +      u8 reason;
+ +      u8 need_idle:1;
+ +      u8 need_waker_cluster:1;
+ +      u8 sync:1;
+ +      enum sched_boost_policy boost_policy;
+ +      u8 pack_task:1;
+ +      int prev_cpu;
+ +      DECLARE_BITMAP(candidate_list, NR_CPUS);
+ +      DECLARE_BITMAP(backup_list, NR_CPUS);
+ +      u64 task_load;
+ +      u64 cpu_load;
+ +      u32 sbc_best_flag;
+ +      u32 sbc_best_cluster_flag;
+ +      struct cpumask search_cpus;
+ +};
   
- -#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+ +struct cluster_cpu_stats {
+ +      int best_idle_cpu, least_loaded_cpu;
+ +      int best_capacity_cpu, best_cpu, best_sibling_cpu;
+ +      int min_cost, best_sibling_cpu_cost;
+ +      int best_cpu_wakeup_latency;
+ +      u64 min_load, best_load, best_sibling_cpu_load;
+ +      s64 highest_spare_capacity;
+ +};
   
   /*
- - * We can represent the historical contribution to runnable average as the
- - * coefficients of a geometric series.  To do this we sub-divide our runnable
- - * history into segments of approximately 1ms (1024us); label the segment that
- - * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
- - *
- - * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
- - *      p0            p1           p2
- - *     (now)       (~1ms ago)  (~2ms ago)
- - *
- - * Let u_i denote the fraction of p_i that the entity was runnable.
- - *
- - * We then designate the fractions u_i as our co-efficients, yielding the
- - * following representation of historical load:
- - *   u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
- - *
- - * We choose y based on the with of a reasonably scheduling period, fixing:
- - *   y^32 = 0.5
- - *
- - * This means that the contribution to load ~32ms ago (u_32) will be weighted
- - * approximately half as much as the contribution to load within the last ms
- - * (u_0).
+ + * Should task be woken to any available idle cpu?
    *
- - * When a period "rolls over" and we have new u_0`, multiplying the previous
- - * sum again by y is sufficient to update:
- - *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
- - *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
+ + * Waking tasks to idle cpu has mixed implications on both performance and
+ + * power. In many cases, scheduler can't estimate correctly impact of using idle
+ + * cpus on either performance or power. PF_WAKE_UP_IDLE allows external kernel
+ + * module to pass a strong hint to scheduler that the task in question should be
+ + * woken to idle cpu, generally to improve performance.
    */
- -static __always_inline int
- -__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
- -                unsigned long weight, int running, struct cfs_rq *cfs_rq)
+ +static inline int wake_to_idle(struct task_struct *p)
   {
- -      u64 delta, scaled_delta, periods;
- -      u32 contrib;
- -      unsigned int delta_w, scaled_delta_w, decayed = 0;
- -      unsigned long scale_freq, scale_cpu;
+ +      return (current->flags & PF_WAKE_UP_IDLE) ||
+ +               (p->flags & PF_WAKE_UP_IDLE);
+ +}
   
- -      delta = now - sa->last_update_time;
- -      /*
- -       * This should only happen when time goes backwards, which it
- -       * unfortunately does during sched clock init when we swap over to TSC.
- -       */
- -      if ((s64)delta < 0) {
- -              sa->last_update_time = now;
- -              return 0;
- -      }
+ +static int spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq)
+ +{
+ +      u64 total_load;
   
- -      /*
- -       * Use 1024ns as the unit of measurement since it's a reasonable
- -       * approximation of 1us and fast to compute.
- -       */
- -      delta >>= 10;
- -      if (!delta)
- -              return 0;
- -      sa->last_update_time = now;
+ +      total_load = env->task_load + env->cpu_load;
   
- -      scale_freq = arch_scale_freq_capacity(NULL, cpu);
- -      scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ +      if (total_load > sched_spill_load ||
+ +          (rq->nr_running + 1) > sysctl_sched_spill_nr_run)
+ +              return 1;
   
- -      /* delta_w is the amount already accumulated against our next period */
- -      delta_w = sa->period_contrib;
- -      if (delta + delta_w >= 1024) {
- -              decayed = 1;
+ +      return 0;
+ +}
   
- -              /* how much left for next period will start over, we don't know yet */
- -              sa->period_contrib = 0;
+ +static int skip_cpu(int cpu, struct cpu_select_env *env)
+ +{
+ +      int tcpu = task_cpu(env->p);
+ +      int skip = 0;
   
- -              /*
- -               * Now that we know we're crossing a period boundary, figure
- -               * out how much from delta we need to complete the current
- -               * period and accrue it.
- -               */
- -              delta_w = 1024 - delta_w;
- -              scaled_delta_w = cap_scale(delta_w, scale_freq);
- -              if (weight) {
- -                      sa->load_sum += weight * scaled_delta_w;
- -                      if (cfs_rq) {
- -                              cfs_rq->runnable_load_sum +=
- -                                              weight * scaled_delta_w;
- -                      }
- -              }
- -              if (running)
- -                      sa->util_sum += scaled_delta_w * scale_cpu;
+ +      if (!env->reason)
+ +              return 0;
   
- -              delta -= delta_w;
+ +      if (is_reserved(cpu))
+ +              return 1;
   
- -              /* Figure out how many additional periods this update spans */
- -              periods = delta / 1024;
- -              delta %= 1024;
+ +      switch (env->reason) {
+ +      case UP_MIGRATION:
+ +              skip = !idle_cpu(cpu);
+ +              break;
+ +      case IRQLOAD_MIGRATION:
+ +              /* Purposely fall through */
+ +      default:
+ +              skip = (cpu == tcpu);
+ +              break;
+ +      }
   
- -              sa->load_sum = decay_load(sa->load_sum, periods + 1);
- -              if (cfs_rq) {
- -                      cfs_rq->runnable_load_sum =
- -                              decay_load(cfs_rq->runnable_load_sum, periods + 1);
- -              }
- -              sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
+ +      return skip;
+ +}
   
- -              /* Efficiently calculate \sum (1..n_period) 1024*y^i */
- -              contrib = __compute_runnable_contrib(periods);
- -              contrib = cap_scale(contrib, scale_freq);
- -              if (weight) {
- -                      sa->load_sum += weight * contrib;
- -                      if (cfs_rq)
- -                              cfs_rq->runnable_load_sum += weight * contrib;
- -              }
- -              if (running)
- -                      sa->util_sum += contrib * scale_cpu;
- -      }
+ +static inline int
+ +acceptable_capacity(struct sched_cluster *cluster, struct cpu_select_env *env)
+ +{
+ +      int tcpu;
+ +
+ +      if (!env->reason)
+ +              return 1;
+ +
+ +      tcpu = task_cpu(env->p);
+ +      switch (env->reason) {
+ +      case UP_MIGRATION:
+ +              return cluster->capacity > cpu_capacity(tcpu);
+ +
+ +      case DOWN_MIGRATION:
+ +              return cluster->capacity < cpu_capacity(tcpu);
+ +
+ +      default:
+ +              break;
+ +      }
+ +
+ +      return 1;
+ +}
+ +
+ +static int
+ +skip_cluster(struct sched_cluster *cluster, struct cpu_select_env *env)
+ +{
+ +      if (!test_bit(cluster->id, env->candidate_list))
+ +              return 1;
+ +
+ +      if (!acceptable_capacity(cluster, env)) {
+ +              __clear_bit(cluster->id, env->candidate_list);
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static struct sched_cluster *
+ +select_least_power_cluster(struct cpu_select_env *env)
+ +{
+ +      struct sched_cluster *cluster;
+ +
+ +      if (env->rtg) {
+ +              int cpu = cluster_first_cpu(env->rtg->preferred_cluster);
+ +
+ +              env->task_load = scale_load_to_cpu(task_load(env->p), cpu);
+ +
+ +              if (task_load_will_fit(env->p, env->task_load,
+ +                                      cpu, env->boost_policy)) {
+ +                      env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
+ +
+ +                      if (env->boost_policy == SCHED_BOOST_NONE)
+ +                              return env->rtg->preferred_cluster;
+ +
+ +                      for_each_sched_cluster(cluster) {
+ +                              if (cluster != env->rtg->preferred_cluster) {
+ +                                      __set_bit(cluster->id,
+ +                                              env->backup_list);
+ +                                      __clear_bit(cluster->id,
+ +                                              env->candidate_list);
+ +                              }
+ +                      }
+ +
+ +                      return env->rtg->preferred_cluster;
+ +              }
+ +
+ +              /*
+ +               * Since the task load does not fit on the preferred
+ +               * cluster anymore, pretend that the task does not
+ +               * have any preferred cluster. This allows the waking
+ +               * task to get the appropriate CPU it needs as per the
+ +               * non co-location placement policy without having to
+ +               * wait until the preferred cluster is updated.
+ +               */
+ +              env->rtg = NULL;
+ +      }
+ +
+ +      for_each_sched_cluster(cluster) {
+ +              if (!skip_cluster(cluster, env)) {
+ +                      int cpu = cluster_first_cpu(cluster);
+ +
+ +                      env->task_load = scale_load_to_cpu(task_load(env->p),
+ +                                                                       cpu);
+ +                      if (task_load_will_fit(env->p, env->task_load, cpu,
+ +                                             env->boost_policy))
+ +                              return cluster;
+ +
+ +                      __set_bit(cluster->id, env->backup_list);
+ +                      __clear_bit(cluster->id, env->candidate_list);
+ +              }
+ +      }
+ +
+ +      return NULL;
+ +}
+ +
+ +static struct sched_cluster *
+ +next_candidate(const unsigned long *list, int start, int end)
+ +{
+ +      int cluster_id;
+ +
+ +      cluster_id = find_next_bit(list, end, start - 1 + 1);
+ +      if (cluster_id >= end)
+ +              return NULL;
+ +
+ +      return sched_cluster[cluster_id];
+ +}
+ +
+ +static void
+ +update_spare_capacity(struct cluster_cpu_stats *stats,
+ +                    struct cpu_select_env *env, int cpu, int capacity,
+ +                    u64 cpu_load)
+ +{
+ +      s64 spare_capacity = sched_ravg_window - cpu_load;
+ +
+ +      if (spare_capacity > 0 &&
+ +          (spare_capacity > stats->highest_spare_capacity ||
+ +           (spare_capacity == stats->highest_spare_capacity &&
+ +            ((!env->need_waker_cluster &&
+ +              capacity > cpu_capacity(stats->best_capacity_cpu)) ||
+ +             (env->need_waker_cluster &&
+ +              cpu_rq(cpu)->nr_running <
+ +              cpu_rq(stats->best_capacity_cpu)->nr_running))))) {
+ +              /*
+ +               * If sync waker is the only runnable of CPU, cr_avg of the
+ +               * CPU is 0 so we have high chance to place the wakee on the
+ +               * waker's CPU which likely causes preemtion of the waker.
+ +               * This can lead migration of preempted waker.  Place the
+ +               * wakee on the real idle CPU when it's possible by checking
+ +               * nr_running to avoid such preemption.
+ +               */
+ +              stats->highest_spare_capacity = spare_capacity;
+ +              stats->best_capacity_cpu = cpu;
+ +      }
+ +}
+ +
+ +static inline void find_backup_cluster(
+ +struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+ +{
+ +      struct sched_cluster *next = NULL;
+ +      int i;
+ +      struct cpumask search_cpus;
+ +
+ +      extern int num_clusters;
+ +
+ +      while (!bitmap_empty(env->backup_list, num_clusters)) {
+ +              next = next_candidate(env->backup_list, 0, num_clusters);
+ +              __clear_bit(next->id, env->backup_list);
+ +
+ +              cpumask_and(&search_cpus, &env->search_cpus, &next->cpus);
+ +              for_each_cpu(i, &search_cpus) {
+ +                      trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ +                      sched_irqload(i), power_cost(i, task_load(env->p) +
+ +                                      cpu_cravg_sync(i, env->sync)), 0);
+ +
+ +                      update_spare_capacity(stats, env, i, next->capacity,
+ +                                        cpu_load_sync(i, env->sync));
+ +              }
+ +              env->sbc_best_cluster_flag = SBC_FLAG_BACKUP_CLUSTER;
+ +      }
+ +}
+ +
+ +struct sched_cluster *
+ +next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env,
+ +                                      struct cluster_cpu_stats *stats)
+ +{
+ +      struct sched_cluster *next = NULL;
+ +
+ +      extern int num_clusters;
+ +
+ +      __clear_bit(cluster->id, env->candidate_list);
+ +
+ +      if (env->rtg && preferred_cluster(cluster, env->p))
+ +              return NULL;
+ +
+ +      do {
+ +              if (bitmap_empty(env->candidate_list, num_clusters))
+ +                      return NULL;
+ +
+ +              next = next_candidate(env->candidate_list, 0, num_clusters);
+ +              if (next) {
+ +                      if (next->min_power_cost > stats->min_cost) {
+ +                              clear_bit(next->id, env->candidate_list);
+ +                              next = NULL;
+ +                              continue;
+ +                      }
+ +
+ +                      if (skip_cluster(next, env))
+ +                              next = NULL;
+ +              }
+ +      } while (!next);
+ +
+ +      env->task_load = scale_load_to_cpu(task_load(env->p),
+ +                                      cluster_first_cpu(next));
+ +      return next;
+ +}
+ +
+ +#ifdef CONFIG_SCHED_HMP_CSTATE_AWARE
+ +static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
+ +                                 struct cpu_select_env *env, int cpu_cost)
+ +{
+ +      int wakeup_latency;
+ +      int prev_cpu = env->prev_cpu;
+ +
+ +      wakeup_latency = cpu_rq(cpu)->wakeup_latency;
+ +
+ +      if (env->need_idle) {
+ +              stats->min_cost = cpu_cost;
+ +              if (idle_cpu(cpu)) {
+ +                      if (wakeup_latency < stats->best_cpu_wakeup_latency ||
+ +                          (wakeup_latency == stats->best_cpu_wakeup_latency &&
+ +                           cpu == prev_cpu)) {
+ +                              stats->best_idle_cpu = cpu;
+ +                              stats->best_cpu_wakeup_latency = wakeup_latency;
+ +                      }
+ +              } else {
+ +                      if (env->cpu_load < stats->min_load ||
+ +                              (env->cpu_load == stats->min_load &&
+ +                                                      cpu == prev_cpu)) {
+ +                              stats->least_loaded_cpu = cpu;
+ +                              stats->min_load = env->cpu_load;
+ +                      }
+ +              }
+ +
+ +              return;
+ +      }
+ +
+ +      if (cpu_cost < stats->min_cost)  {
+ +              stats->min_cost = cpu_cost;
+ +              stats->best_cpu_wakeup_latency = wakeup_latency;
+ +              stats->best_load = env->cpu_load;
+ +              stats->best_cpu = cpu;
+ +              env->sbc_best_flag = SBC_FLAG_CPU_COST;
+ +              return;
+ +      }
+ +
+ +      /* CPU cost is the same. Start breaking the tie by C-state */
+ +
+ +      if (wakeup_latency > stats->best_cpu_wakeup_latency)
+ +              return;
+ +
+ +      if (wakeup_latency < stats->best_cpu_wakeup_latency) {
+ +              stats->best_cpu_wakeup_latency = wakeup_latency;
+ +              stats->best_load = env->cpu_load;
+ +              stats->best_cpu = cpu;
+ +              env->sbc_best_flag = SBC_FLAG_COST_CSTATE_TIE_BREAKER;
+ +              return;
+ +      }
+ +
+ +      /* C-state is the same. Use prev CPU to break the tie */
+ +      if (cpu == prev_cpu) {
+ +              stats->best_cpu = cpu;
+ +              env->sbc_best_flag = SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER;
+ +              return;
+ +      }
+ +
+ +      if (stats->best_cpu != prev_cpu &&
+ +          ((wakeup_latency == 0 && env->cpu_load < stats->best_load) ||
+ +          (wakeup_latency > 0 && env->cpu_load > stats->best_load))) {
+ +              stats->best_load = env->cpu_load;
+ +              stats->best_cpu = cpu;
+ +              env->sbc_best_flag = SBC_FLAG_CSTATE_LOAD;
+ +      }
+ +}
+ +#else /* CONFIG_SCHED_HMP_CSTATE_AWARE */
+ +static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
+ +                                 struct cpu_select_env *env, int cpu_cost)
+ +{
+ +      int prev_cpu = env->prev_cpu;
+ +
+ +      if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) {
+ +              if (stats->best_sibling_cpu_cost > cpu_cost ||
+ +                  (stats->best_sibling_cpu_cost == cpu_cost &&
+ +                   stats->best_sibling_cpu_load > env->cpu_load)) {
+ +                      stats->best_sibling_cpu_cost = cpu_cost;
+ +                      stats->best_sibling_cpu_load = env->cpu_load;
+ +                      stats->best_sibling_cpu = cpu;
+ +              }
+ +      }
+ +
+ +      if ((cpu_cost < stats->min_cost) ||
+ +          ((stats->best_cpu != prev_cpu &&
+ +            stats->min_load > env->cpu_load) || cpu == prev_cpu)) {
+ +              if (env->need_idle) {
+ +                      if (idle_cpu(cpu)) {
+ +                              stats->min_cost = cpu_cost;
+ +                              stats->best_idle_cpu = cpu;
+ +                      }
+ +              } else {
+ +                      stats->min_cost = cpu_cost;
+ +                      stats->min_load = env->cpu_load;
+ +                      stats->best_cpu = cpu;
+ +                      env->sbc_best_flag = SBC_FLAG_MIN_COST;
+ +              }
+ +      }
+ +}
+ +#endif /* CONFIG_SCHED_HMP_CSTATE_AWARE */
+ +
+ +static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
+ +                                       struct cpu_select_env *env)
+ +{
+ +      int cpu_cost;
+ +
+ +      /*
+ +       * We try to find the least loaded *busy* CPU irrespective
+ +       * of the power cost.
+ +       */
+ +      if (env->pack_task)
+ +              cpu_cost = cpu_min_power_cost(cpu);
+ +
+ +      else
+ +              cpu_cost = power_cost(cpu, task_load(env->p) +
+ +                              cpu_cravg_sync(cpu, env->sync));
+ +
+ +      if (cpu_cost <= stats->min_cost)
+ +              __update_cluster_stats(cpu, stats, env, cpu_cost);
+ +}
+ +
+ +static void find_best_cpu_in_cluster(struct sched_cluster *c,
+ +       struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+ +{
+ +      int i;
+ +      struct cpumask search_cpus;
+ +
+ +      cpumask_and(&search_cpus, &env->search_cpus, &c->cpus);
+ +
+ +      env->need_idle = wake_to_idle(env->p) || c->wake_up_idle;
+ +
+ +      for_each_cpu(i, &search_cpus) {
+ +              env->cpu_load = cpu_load_sync(i, env->sync);
+ +
+ +              trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ +                      sched_irqload(i),
+ +                      power_cost(i, task_load(env->p) +
+ +                                      cpu_cravg_sync(i, env->sync)), 0);
+ +
+ +              if (skip_cpu(i, env))
+ +                      continue;
+ +
+ +              update_spare_capacity(stats, env, i, c->capacity,
+ +                                    env->cpu_load);
+ +
+ +              /*
+ +               * need_idle takes precedence over sched boost but when both
+ +               * are set, idlest CPU with in all the clusters is selected
+ +               * when boost_policy = BOOST_ON_ALL whereas idlest CPU in the
+ +               * big cluster is selected within boost_policy = BOOST_ON_BIG.
+ +               */
+ +              if ((!env->need_idle &&
+ +                  env->boost_policy != SCHED_BOOST_NONE) ||
+ +                  env->need_waker_cluster ||
+ +                  sched_cpu_high_irqload(i) ||
+ +                  spill_threshold_crossed(env, cpu_rq(i)))
+ +                      continue;
+ +
+ +              update_cluster_stats(i, stats, env);
+ +      }
+ +}
+ +
+ +static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats)
+ +{
+ +      stats->best_cpu = stats->best_idle_cpu = -1;
+ +      stats->best_capacity_cpu = stats->best_sibling_cpu  = -1;
+ +      stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX;
+ +      stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX;
+ +      stats->highest_spare_capacity = 0;
+ +      stats->least_loaded_cpu = -1;
+ +      stats->best_cpu_wakeup_latency = INT_MAX;
+ +      /* No need to initialize stats->best_load */
+ +}
+ +
+ +static inline bool env_has_special_flags(struct cpu_select_env *env)
+ +{
+ +      if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
+ +          env->reason)
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +static inline bool
+ +bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+ +{
+ +      int prev_cpu;
+ +      struct task_struct *task = env->p;
+ +      struct sched_cluster *cluster;
+ +
+ +      if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
+ +              return false;
+ +
+ +      prev_cpu = env->prev_cpu;
+ +      if (!cpumask_test_cpu(prev_cpu, &env->search_cpus))
+ +              return false;
+ +
+ +      if (task->ravg.mark_start - task->last_cpu_selected_ts >=
+ +                              sched_long_cpu_selection_threshold)
+ +              return false;
+ +
+ +      /*
+ +       * This function should be used by task wake up path only as it's
+ +       * assuming p->last_switch_out_ts as last sleep time.
+ +       * p->last_switch_out_ts can denote last preemption time as well as
+ +       * last sleep time.
+ +       */
+ +      if (task->ravg.mark_start - task->last_switch_out_ts >=
+ +                                      sched_short_sleep_task_threshold)
+ +              return false;
+ +
+ +      env->task_load = scale_load_to_cpu(task_load(task), prev_cpu);
+ +      cluster = cpu_rq(prev_cpu)->cluster;
+ +
+ +      if (!task_load_will_fit(task, env->task_load, prev_cpu,
+ +                              sched_boost_policy())) {
+ +
+ +              __set_bit(cluster->id, env->backup_list);
+ +              __clear_bit(cluster->id, env->candidate_list);
+ +              return false;
+ +      }
+ +
+ +      env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
+ +      if (sched_cpu_high_irqload(prev_cpu) ||
+ +                      spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
+ +              update_spare_capacity(stats, env, prev_cpu,
+ +                              cluster->capacity, env->cpu_load);
+ +              cpumask_clear_cpu(prev_cpu, &env->search_cpus);
+ +              return false;
+ +      }
+ +
+ +      return true;
+ +}
+ +
+ +static inline bool
+ +wake_to_waker_cluster(struct cpu_select_env *env)
+ +{
+ +      return env->sync &&
+ +             task_load(current) > sched_big_waker_task_load &&
+ +             task_load(env->p) < sched_small_wakee_task_load;
+ +}
+ +
+ +static inline bool
+ +bias_to_waker_cpu(struct cpu_select_env *env, int cpu)
+ +{
+ +      return sysctl_sched_prefer_sync_wakee_to_waker &&
+ +             cpu_rq(cpu)->nr_running == 1 &&
+ +             cpumask_test_cpu(cpu, &env->search_cpus);
+ +}
+ +
+ +static inline int
+ +cluster_allowed(struct cpu_select_env *env, struct sched_cluster *cluster)
+ +{
+ +      return cpumask_intersects(&env->search_cpus, &cluster->cpus);
+ +}
+ +
+ +/* return cheapest cpu that can fit this task */
+ +static int select_best_cpu(struct task_struct *p, int target, int reason,
+ +                         int sync)
+ +{
+ +      struct sched_cluster *cluster, *pref_cluster = NULL;
+ +      struct cluster_cpu_stats stats;
+ +      struct related_thread_group *grp;
+ +      unsigned int sbc_flag = 0;
+ +      int cpu = raw_smp_processor_id();
+ +      bool special;
+ +
+ +      struct cpu_select_env env = {
+ +              .p                      = p,
+ +              .reason                 = reason,
+ +              .need_idle              = wake_to_idle(p),
+ +              .need_waker_cluster     = 0,
+ +              .sync                   = sync,
+ +              .prev_cpu               = target,
+ +              .rtg                    = NULL,
+ +              .sbc_best_flag          = 0,
+ +              .sbc_best_cluster_flag  = 0,
+ +              .pack_task              = false,
+ +      };
+ +
+ +      env.boost_policy = task_sched_boost(p) ?
+ +                      sched_boost_policy() : SCHED_BOOST_NONE;
+ +
+ +      bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
+ +      bitmap_zero(env.backup_list, NR_CPUS);
+ +
+ +      cpumask_and(&env.search_cpus, tsk_cpus_allowed(p), cpu_active_mask);
+ +      cpumask_andnot(&env.search_cpus, &env.search_cpus, cpu_isolated_mask);
+ +
+ +      init_cluster_cpu_stats(&stats);
+ +      special = env_has_special_flags(&env);
+ +
+ +      rcu_read_lock();
+ +
+ +      grp = task_related_thread_group(p);
+ +
+ +      if (grp && grp->preferred_cluster) {
+ +              pref_cluster = grp->preferred_cluster;
+ +              if (!cluster_allowed(&env, pref_cluster))
+ +                      clear_bit(pref_cluster->id, env.candidate_list);
+ +              else
+ +                      env.rtg = grp;
+ +      } else if (!special) {
+ +              cluster = cpu_rq(cpu)->cluster;
+ +              if (wake_to_waker_cluster(&env)) {
+ +                      if (bias_to_waker_cpu(&env, cpu)) {
+ +                              target = cpu;
+ +                              sbc_flag = SBC_FLAG_WAKER_CLUSTER |
+ +                                         SBC_FLAG_WAKER_CPU;
+ +                              goto out;
+ +                      } else if (cluster_allowed(&env, cluster)) {
+ +                              env.need_waker_cluster = 1;
+ +                              bitmap_zero(env.candidate_list, NR_CPUS);
+ +                              __set_bit(cluster->id, env.candidate_list);
+ +                              env.sbc_best_cluster_flag =
+ +                                                      SBC_FLAG_WAKER_CLUSTER;
+ +                      }
+ +              } else if (bias_to_prev_cpu(&env, &stats)) {
+ +                      sbc_flag = SBC_FLAG_PREV_CPU;
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      if (!special && is_short_burst_task(p)) {
+ +              env.pack_task = true;
+ +              sbc_flag = SBC_FLAG_PACK_TASK;
+ +      }
+ +retry:
+ +      cluster = select_least_power_cluster(&env);
+ +
+ +      if (!cluster)
+ +              goto out;
+ +
+ +      /*
+ +       * 'cluster' now points to the minimum power cluster which can satisfy
+ +       * task's perf goals. Walk down the cluster list starting with that
+ +       * cluster. For non-small tasks, skip clusters that don't have
+ +       * mostly_idle/idle cpus
+ +       */
+ +
+ +      do {
+ +              find_best_cpu_in_cluster(cluster, &env, &stats);
+ +
+ +      } while ((cluster = next_best_cluster(cluster, &env, &stats)));
+ +
+ +      if (env.need_idle) {
+ +              if (stats.best_idle_cpu >= 0) {
+ +                      target = stats.best_idle_cpu;
+ +                      sbc_flag |= SBC_FLAG_IDLE_CSTATE;
+ +              } else if (stats.least_loaded_cpu >= 0) {
+ +                      target = stats.least_loaded_cpu;
+ +                      sbc_flag |= SBC_FLAG_IDLE_LEAST_LOADED;
+ +              }
+ +      } else if (stats.best_cpu >= 0) {
+ +              if (stats.best_sibling_cpu >= 0 &&
+ +                              stats.best_cpu != task_cpu(p) &&
+ +                              stats.min_cost == stats.best_sibling_cpu_cost) {
+ +                      stats.best_cpu = stats.best_sibling_cpu;
+ +                      sbc_flag |= SBC_FLAG_BEST_SIBLING;
+ +              }
+ +              sbc_flag |= env.sbc_best_flag;
+ +              target = stats.best_cpu;
+ +      } else {
+ +              if (env.rtg && env.boost_policy == SCHED_BOOST_NONE) {
+ +                      env.rtg = NULL;
+ +                      goto retry;
+ +              }
+ +
+ +              /*
+ +               * With boost_policy == SCHED_BOOST_ON_BIG, we reach here with
+ +               * backup_list = little cluster, candidate_list = none and
+ +               * stats->best_capacity_cpu points the best spare capacity
+ +               * CPU among the CPUs in the big cluster.
+ +               */
+ +              if (env.boost_policy == SCHED_BOOST_ON_BIG &&
+ +                  stats.best_capacity_cpu >= 0)
+ +                      sbc_flag |= SBC_FLAG_BOOST_CLUSTER;
+ +              else
+ +                      find_backup_cluster(&env, &stats);
+ +
+ +              if (stats.best_capacity_cpu >= 0) {
+ +                      target = stats.best_capacity_cpu;
+ +                      sbc_flag |= SBC_FLAG_BEST_CAP_CPU;
+ +              }
+ +      }
+ +      p->last_cpu_selected_ts = sched_ktime_clock();
+ +out:
+ +      sbc_flag |= env.sbc_best_cluster_flag;
+ +      rcu_read_unlock();
+ +      trace_sched_task_load(p, sched_boost_policy() && task_sched_boost(p),
+ +              env.reason, env.sync, env.need_idle, sbc_flag, target);
+ +      return target;
+ +}
+ +
+ +#ifdef CONFIG_CFS_BANDWIDTH
+ +
+ +static inline struct task_group *next_task_group(struct task_group *tg)
+ +{
+ +      tg = list_entry_rcu(tg->list.next, typeof(struct task_group), list);
+ +
+ +      return (&tg->list == &task_groups) ? NULL : tg;
+ +}
+ +
+ +/* Iterate over all cfs_rq in a cpu */
+ +#define for_each_cfs_rq(cfs_rq, tg, cpu)      \
+ +      for (tg = container_of(&task_groups, struct task_group, list);  \
+ +              ((tg = next_task_group(tg)) && (cfs_rq = tg->cfs_rq[cpu]));)
+ +
+ +void reset_cfs_rq_hmp_stats(int cpu, int reset_cra)
+ +{
+ +      struct task_group *tg;
+ +      struct cfs_rq *cfs_rq;
+ +
+ +      rcu_read_lock();
+ +
+ +      for_each_cfs_rq(cfs_rq, tg, cpu)
+ +              reset_hmp_stats(&cfs_rq->hmp_stats, reset_cra);
+ +
+ +      rcu_read_unlock();
+ +}
+ +
+ +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
+ +
+ +static void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra);
+ +static void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra);
+ +
+ +/* Add task's contribution to a cpu' HMP statistics */
+ +void _inc_hmp_sched_stats_fair(struct rq *rq,
+ +                      struct task_struct *p, int change_cra)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +      struct sched_entity *se = &p->se;
+ +
+ +      /*
+ +       * Although below check is not strictly required  (as
+ +       * inc/dec_nr_big_task and inc/dec_cumulative_runnable_avg called
+ +       * from inc_cfs_rq_hmp_stats() have similar checks), we gain a bit on
+ +       * efficiency by short-circuiting for_each_sched_entity() loop when
+ +       * sched_disable_window_stats
+ +       */
+ +      if (sched_disable_window_stats)
+ +              return;
+ +
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +              inc_cfs_rq_hmp_stats(cfs_rq, p, change_cra);
+ +              if (cfs_rq_throttled(cfs_rq))
+ +                      break;
+ +      }
+ +
+ +      /* Update rq->hmp_stats only if we didn't find any throttled cfs_rq */
+ +      if (!se)
+ +              inc_rq_hmp_stats(rq, p, change_cra);
+ +}
+ +
+ +/* Remove task's contribution from a cpu' HMP statistics */
+ +static void
+ +_dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +      struct sched_entity *se = &p->se;
+ +
+ +      /* See comment on efficiency in _inc_hmp_sched_stats_fair */
+ +      if (sched_disable_window_stats)
+ +              return;
+ +
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +              dec_cfs_rq_hmp_stats(cfs_rq, p, change_cra);
+ +              if (cfs_rq_throttled(cfs_rq))
+ +                      break;
+ +      }
+ +
+ +      /* Update rq->hmp_stats only if we didn't find any throttled cfs_rq */
+ +      if (!se)
+ +              dec_rq_hmp_stats(rq, p, change_cra);
+ +}
+ +
+ +static void inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p)
+ +{
+ +      _inc_hmp_sched_stats_fair(rq, p, 1);
+ +}
+ +
+ +static void dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p)
+ +{
+ +      _dec_hmp_sched_stats_fair(rq, p, 1);
+ +}
+ +
+ +static void fixup_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p,
+ +                                     u32 new_task_load, u32 new_pred_demand)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +      struct sched_entity *se = &p->se;
+ +      s64 task_load_delta = (s64)new_task_load - task_load(p);
+ +      s64 pred_demand_delta = PRED_DEMAND_DELTA;
+ +
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +
+ +              fixup_cumulative_runnable_avg(&cfs_rq->hmp_stats, p,
+ +                                            task_load_delta,
+ +                                            pred_demand_delta);
+ +              fixup_nr_big_tasks(&cfs_rq->hmp_stats, p, task_load_delta);
+ +              if (cfs_rq_throttled(cfs_rq))
+ +                      break;
+ +      }
+ +
+ +      /* Fix up rq->hmp_stats only if we didn't find any throttled cfs_rq */
+ +      if (!se) {
+ +              fixup_cumulative_runnable_avg(&rq->hmp_stats, p,
+ +                                            task_load_delta,
+ +                                            pred_demand_delta);
+ +              fixup_nr_big_tasks(&rq->hmp_stats, p, task_load_delta);
+ +      }
+ +}
+ +
+ +static int task_will_be_throttled(struct task_struct *p);
+ +
+ +#else /* CONFIG_CFS_BANDWIDTH */
+ +
+ +inline void reset_cfs_rq_hmp_stats(int cpu, int reset_cra) { }
+ +
+ +static void
+ +inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p)
+ +{
+ +      inc_nr_big_task(&rq->hmp_stats, p);
+ +      inc_cumulative_runnable_avg(&rq->hmp_stats, p);
+ +}
+ +
+ +static void
+ +dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p)
+ +{
+ +      dec_nr_big_task(&rq->hmp_stats, p);
+ +      dec_cumulative_runnable_avg(&rq->hmp_stats, p);
+ +}
+ +static void
+ +fixup_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p,
+ +                         u32 new_task_load, u32 new_pred_demand)
+ +{
+ +      s64 task_load_delta = (s64)new_task_load - task_load(p);
+ +      s64 pred_demand_delta = PRED_DEMAND_DELTA;
+ +
+ +      fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta,
+ +                                    pred_demand_delta);
+ +      fixup_nr_big_tasks(&rq->hmp_stats, p, task_load_delta);
+ +}
+ +
+ +static inline int task_will_be_throttled(struct task_struct *p)
+ +{
+ +      return 0;
+ +}
+ +
+ +void _inc_hmp_sched_stats_fair(struct rq *rq,
+ +                      struct task_struct *p, int change_cra)
+ +{
+ +      inc_nr_big_task(&rq->hmp_stats, p);
+ +}
+ +
+ +#endif        /* CONFIG_CFS_BANDWIDTH */
+ +
+ +/*
+ + * Reset balance_interval at all sched_domain levels of given cpu, so that it
+ + * honors kick.
+ + */
+ +static inline void reset_balance_interval(int cpu)
+ +{
+ +      struct sched_domain *sd;
+ +
+ +      if (cpu >= nr_cpu_ids)
+ +              return;
+ +
+ +      rcu_read_lock();
+ +      for_each_domain(cpu, sd)
+ +              sd->balance_interval = 0;
+ +      rcu_read_unlock();
+ +}
+ +
+ +/*
+ + * Check if a task is on the "wrong" cpu (i.e its current cpu is not the ideal
+ + * cpu as per its demand or priority)
+ + *
+ + * Returns reason why task needs to be migrated
+ + */
+ +static inline int migration_needed(struct task_struct *p, int cpu)
+ +{
+ +      int nice;
+ +      struct related_thread_group *grp;
+ +
+ +      if (p->state != TASK_RUNNING || p->nr_cpus_allowed == 1)
+ +              return 0;
+ +
+ +      /* No need to migrate task that is about to be throttled */
+ +      if (task_will_be_throttled(p))
+ +              return 0;
+ +
+ +      if (sched_boost_policy() == SCHED_BOOST_ON_BIG &&
+ +               cpu_capacity(cpu) != max_capacity && task_sched_boost(p))
+ +              return UP_MIGRATION;
+ +
+ +      if (sched_cpu_high_irqload(cpu))
+ +              return IRQLOAD_MIGRATION;
+ +
+ +      nice = task_nice(p);
+ +      rcu_read_lock();
+ +      grp = task_related_thread_group(p);
+ +      /*
+ +       * Don't assume higher capacity means higher power. If the task
+ +       * is running on the power efficient CPU, avoid migrating it
+ +       * to a lower capacity cluster.
+ +       */
+ +      if (!grp && (nice > SCHED_UPMIGRATE_MIN_NICE ||
+ +                      upmigrate_discouraged(p)) &&
+ +                      cpu_capacity(cpu) > min_capacity &&
+ +                      cpu_max_power_cost(cpu) == max_power_cost) {
+ +              rcu_read_unlock();
+ +              return DOWN_MIGRATION;
+ +      }
+ +
+ +      if (!task_will_fit(p, cpu)) {
+ +              rcu_read_unlock();
+ +              return UP_MIGRATION;
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      return 0;
+ +}
+ +
+ +static inline int
+ +kick_active_balance(struct rq *rq, struct task_struct *p, int new_cpu)
+ +{
+ +      unsigned long flags;
+ +      int rc = 0;
+ +
+ +      /* Invoke active balance to force migrate currently running task */
+ +      raw_spin_lock_irqsave(&rq->lock, flags);
+ +      if (!rq->active_balance) {
+ +              rq->active_balance = 1;
+ +              rq->push_cpu = new_cpu;
+ +              get_task_struct(p);
+ +              rq->push_task = p;
+ +              rc = 1;
+ +      }
+ +      raw_spin_unlock_irqrestore(&rq->lock, flags);
+ +
+ +      return rc;
+ +}
+ +
+ +static DEFINE_RAW_SPINLOCK(migration_lock);
+ +
+ +static bool do_migration(int reason, int new_cpu, int cpu)
+ +{
+ +      if ((reason == UP_MIGRATION || reason == DOWN_MIGRATION)
+ +                              && same_cluster(new_cpu, cpu))
+ +              return false;
+ +
+ +      /* Inter cluster high irqload migrations are OK */
+ +      return new_cpu != cpu;
+ +}
+ +
+ +/*
+ + * Check if currently running task should be migrated to a better cpu.
+ + *
+ + * Todo: Effect this via changes to nohz_balancer_kick() and load balance?
+ + */
+ +void check_for_migration(struct rq *rq, struct task_struct *p)
+ +{
+ +      int cpu = cpu_of(rq), new_cpu;
+ +      int active_balance = 0, reason;
+ +
+ +      reason = migration_needed(p, cpu);
+ +      if (!reason)
+ +              return;
+ +
+ +      raw_spin_lock(&migration_lock);
+ +      new_cpu = select_best_cpu(p, cpu, reason, 0);
+ +
+ +      if (do_migration(reason, new_cpu, cpu)) {
+ +              active_balance = kick_active_balance(rq, p, new_cpu);
+ +              if (active_balance)
+ +                      mark_reserved(new_cpu);
+ +      }
+ +
+ +      raw_spin_unlock(&migration_lock);
+ +
+ +      if (active_balance)
+ +              stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, rq,
+ +                                      &rq->active_balance_work);
+ +}
+ +
+ +#ifdef CONFIG_CFS_BANDWIDTH
+ +
+ +static void init_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq)
+ +{
+ +      cfs_rq->hmp_stats.nr_big_tasks = 0;
+ +      cfs_rq->hmp_stats.cumulative_runnable_avg = 0;
+ +      cfs_rq->hmp_stats.pred_demands_sum = 0;
+ +}
+ +
+ +static void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +               struct task_struct *p, int change_cra)
+ +{
+ +      inc_nr_big_task(&cfs_rq->hmp_stats, p);
+ +      if (change_cra)
+ +              inc_cumulative_runnable_avg(&cfs_rq->hmp_stats, p);
+ +}
+ +
+ +static void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +               struct task_struct *p, int change_cra)
+ +{
+ +      dec_nr_big_task(&cfs_rq->hmp_stats, p);
+ +      if (change_cra)
+ +              dec_cumulative_runnable_avg(&cfs_rq->hmp_stats, p);
+ +}
+ +
+ +static void inc_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats,
+ +                       struct cfs_rq *cfs_rq)
+ +{
+ +      stats->nr_big_tasks += cfs_rq->hmp_stats.nr_big_tasks;
+ +      stats->cumulative_runnable_avg +=
+ +                              cfs_rq->hmp_stats.cumulative_runnable_avg;
+ +      stats->pred_demands_sum += cfs_rq->hmp_stats.pred_demands_sum;
+ +}
+ +
+ +static void dec_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats,
+ +                               struct cfs_rq *cfs_rq)
+ +{
+ +      stats->nr_big_tasks -= cfs_rq->hmp_stats.nr_big_tasks;
+ +      stats->cumulative_runnable_avg -=
+ +                              cfs_rq->hmp_stats.cumulative_runnable_avg;
+ +      stats->pred_demands_sum -= cfs_rq->hmp_stats.pred_demands_sum;
+ +
+ +      BUG_ON(stats->nr_big_tasks < 0 ||
+ +              (s64)stats->cumulative_runnable_avg < 0);
+ +      BUG_ON((s64)stats->pred_demands_sum < 0);
+ +}
+ +
+ +#else /* CONFIG_CFS_BANDWIDTH */
+ +
+ +static inline void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
+ +static inline void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
+ +#endif        /* CONFIG_CFS_BANDWIDTH */
+ +
+ +#else /* CONFIG_SCHED_HMP */
+ +
+ +static inline void init_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq) { }
+ +
+ +static inline void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
+ +static inline void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
+ +#define dec_throttled_cfs_rq_hmp_stats(...)
+ +#define inc_throttled_cfs_rq_hmp_stats(...)
+ +
+ +#endif        /* CONFIG_SCHED_HMP */
+ +
+ +#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
+ +#error "load tracking assumes 2^10 as unit"
+ +#endif
+ +
+ +#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+ +
+ +/*
+ + * We can represent the historical contribution to runnable average as the
+ + * coefficients of a geometric series.  To do this we sub-divide our runnable
+ + * history into segments of approximately 1ms (1024us); label the segment that
+ + * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
+ + *
+ + * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
+ + *      p0            p1           p2
+ + *     (now)       (~1ms ago)  (~2ms ago)
+ + *
+ + * Let u_i denote the fraction of p_i that the entity was runnable.
+ + *
+ + * We then designate the fractions u_i as our co-efficients, yielding the
+ + * following representation of historical load:
+ + *   u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
+ + *
+ + * We choose y based on the with of a reasonably scheduling period, fixing:
+ + *   y^32 = 0.5
+ + *
+ + * This means that the contribution to load ~32ms ago (u_32) will be weighted
+ + * approximately half as much as the contribution to load within the last ms
+ + * (u_0).
+ + *
+ + * When a period "rolls over" and we have new u_0`, multiplying the previous
+ + * sum again by y is sufficient to update:
+ + *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
+ + *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
+ + */
+ +static __always_inline int
+ +__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
+ +                unsigned long weight, int running, struct cfs_rq *cfs_rq)
+ +{
+ +      u64 delta, scaled_delta, periods;
+ +      u32 contrib;
+ +      unsigned int delta_w, scaled_delta_w, decayed = 0;
+ +      unsigned long scale_freq, scale_cpu;
+ +
+ +      delta = now - sa->last_update_time;
+ +      /*
+ +       * This should only happen when time goes backwards, which it
+ +       * unfortunately does during sched clock init when we swap over to TSC.
+ +       */
+ +      if ((s64)delta < 0) {
+ +              sa->last_update_time = now;
+ +              return 0;
+ +      }
+ +
+ +      /*
+ +       * Use 1024ns as the unit of measurement since it's a reasonable
+ +       * approximation of 1us and fast to compute.
+ +       */
+ +      delta >>= 10;
+ +      if (!delta)
+ +              return 0;
+ +      sa->last_update_time = now;
+ +
+ +      scale_freq = arch_scale_freq_capacity(NULL, cpu);
+ +      scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ +      trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu);
+ +
+ +      /* delta_w is the amount already accumulated against our next period */
+ +      delta_w = sa->period_contrib;
+ +      if (delta + delta_w >= 1024) {
+ +              decayed = 1;
+ +
+ +              /* how much left for next period will start over, we don't know yet */
+ +              sa->period_contrib = 0;
+ +
+ +              /*
+ +               * Now that we know we're crossing a period boundary, figure
+ +               * out how much from delta we need to complete the current
+ +               * period and accrue it.
+ +               */
+ +              delta_w = 1024 - delta_w;
+ +              scaled_delta_w = cap_scale(delta_w, scale_freq);
+ +              if (weight) {
+ +                      sa->load_sum += weight * scaled_delta_w;
+ +                      if (cfs_rq) {
+ +                              cfs_rq->runnable_load_sum +=
+ +                                              weight * scaled_delta_w;
+ +                      }
+ +              }
+ +              if (running)
+ +                      sa->util_sum += scaled_delta_w * scale_cpu;
+ +
+ +              delta -= delta_w;
+ +
+ +              /* Figure out how many additional periods this update spans */
+ +              periods = delta / 1024;
+ +              delta %= 1024;
+ +
+ +              sa->load_sum = decay_load(sa->load_sum, periods + 1);
+ +              if (cfs_rq) {
+ +                      cfs_rq->runnable_load_sum =
+ +                              decay_load(cfs_rq->runnable_load_sum, periods + 1);
+ +              }
+ +              sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
+ +
+ +              /* Efficiently calculate \sum (1..n_period) 1024*y^i */
+ +              contrib = __compute_runnable_contrib(periods);
+ +              contrib = cap_scale(contrib, scale_freq);
+ +              if (weight) {
+ +                      sa->load_sum += weight * contrib;
+ +                      if (cfs_rq)
+ +                              cfs_rq->runnable_load_sum += weight * contrib;
+ +              }
+ +              if (running)
+ +                      sa->util_sum += contrib * scale_cpu;
+ +      }
   
         /* Remainder of delta accrued against u_0` */
         scaled_delta = cap_scale(delta, scale_freq);
@@@ -3931,7 -2679,6 +3931,7 @@@
                 if (cfs_rq)
                         cfs_rq->runnable_load_sum += weight * scaled_delta;
         }
+ +
         if (running)
                 sa->util_sum += scaled_delta * scale_cpu;
   
@@@ -3949,262 -2696,25 +3949,262 @@@
         return decayed;
   }
   
- -#ifdef CONFIG_FAIR_GROUP_SCHED
   /*
- - * Updating tg's load_avg is necessary before update_cfs_share (which is done)
- - * and effective_load (which is not done because it is too costly).
+ + * Signed add and clamp on underflow.
+ + *
+ + * Explicitly do a load-store to ensure the intermediate value never hits
+ + * memory. This allows lockless observations without ever seeing the negative
+ + * values.
+ + */
+ +#define add_positive(_ptr, _val) do {                           \
+ +      typeof(_ptr) ptr = (_ptr);                              \
+ +      typeof(_val) val = (_val);                              \
+ +      typeof(*ptr) res, var = READ_ONCE(*ptr);                \
+ +                                                              \
+ +      res = var + val;                                        \
+ +                                                              \
+ +      if (val < 0 && res > var)                               \
+ +              res = 0;                                        \
+ +                                                              \
+ +      WRITE_ONCE(*ptr, res);                                  \
+ +} while (0)
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +/**
+ + * update_tg_load_avg - update the tg's load avg
+ + * @cfs_rq: the cfs_rq whose avg changed
+ + * @force: update regardless of how small the difference
+ + *
+ + * This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
+ + * However, because tg->load_avg is a global value there are performance
+ + * considerations.
+ + *
+ + * In order to avoid having to look at the other cfs_rq's, we use a
+ + * differential update where we store the last value we propagated. This in
+ + * turn allows skipping updates if the differential is 'small'.
+ + *
+ + * Updating tg's load_avg is necessary before update_cfs_share() (which is
+ + * done) and effective_load() (which is not done because it is too costly).
    */
   static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
   {
         long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
   
+ +      /*
+ +       * No need to update load_avg for root_task_group as it is not used.
+ +       */
+ +      if (cfs_rq->tg == &root_task_group)
+ +              return;
+ +
         if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
                 atomic_long_add(delta, &cfs_rq->tg->load_avg);
                 cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
         }
   }
   
+ +/*
+ + * Called within set_task_rq() right before setting a task's cpu. The
+ + * caller only guarantees p->pi_lock is held; no other assumptions,
+ + * including the state of rq->lock, should be made.
+ + */
+ +void set_task_rq_fair(struct sched_entity *se,
+ +                    struct cfs_rq *prev, struct cfs_rq *next)
+ +{
+ +      if (!sched_feat(ATTACH_AGE_LOAD))
+ +              return;
+ +
+ +      /*
+ +       * We are supposed to update the task to "current" time, then its up to
+ +       * date and ready to go to new CPU/cfs_rq. But we have difficulty in
+ +       * getting what current time is, so simply throw away the out-of-date
+ +       * time. This will result in the wakee task is less decayed, but giving
+ +       * the wakee more load sounds not bad.
+ +       */
+ +      if (se->avg.last_update_time && prev) {
+ +              u64 p_last_update_time;
+ +              u64 n_last_update_time;
+ +
+ +#ifndef CONFIG_64BIT
+ +              u64 p_last_update_time_copy;
+ +              u64 n_last_update_time_copy;
+ +
+ +              do {
+ +                      p_last_update_time_copy = prev->load_last_update_time_copy;
+ +                      n_last_update_time_copy = next->load_last_update_time_copy;
+ +
+ +                      smp_rmb();
+ +
+ +                      p_last_update_time = prev->avg.last_update_time;
+ +                      n_last_update_time = next->avg.last_update_time;
+ +
+ +              } while (p_last_update_time != p_last_update_time_copy ||
+ +                       n_last_update_time != n_last_update_time_copy);
+ +#else
+ +              p_last_update_time = prev->avg.last_update_time;
+ +              n_last_update_time = next->avg.last_update_time;
+ +#endif
+ +              __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
+ +                                &se->avg, 0, 0, NULL);
+ +              se->avg.last_update_time = n_last_update_time;
+ +      }
+ +}
+ +
+ +/* Take into account change of utilization of a child task group */
+ +static inline void
+ +update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ +{
+ +      struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+ +      long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+ +
+ +      /* Nothing to update */
+ +      if (!delta)
+ +              return;
+ +
+ +      /* Set new sched_entity's utilization */
+ +      se->avg.util_avg = gcfs_rq->avg.util_avg;
+ +      se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+ +
+ +      /* Update parent cfs_rq utilization */
+ +      add_positive(&cfs_rq->avg.util_avg, delta);
+ +      cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+ +}
+ +
+ +/* Take into account change of load of a child task group */
+ +static inline void
+ +update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ +{
+ +      struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+ +      long delta, load = gcfs_rq->avg.load_avg;
+ +
+ +      /*
+ +       * If the load of group cfs_rq is null, the load of the
+ +       * sched_entity will also be null so we can skip the formula
+ +       */
+ +      if (load) {
+ +              long tg_load;
+ +
+ +              /* Get tg's load and ensure tg_load > 0 */
+ +              tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;
+ +
+ +              /* Ensure tg_load >= load and updated with current load*/
+ +              tg_load -= gcfs_rq->tg_load_avg_contrib;
+ +              tg_load += load;
+ +
+ +              /*
+ +               * We need to compute a correction term in the case that the
+ +               * task group is consuming more CPU than a task of equal
+ +               * weight. A task with a weight equals to tg->shares will have
+ +               * a load less or equal to scale_load_down(tg->shares).
+ +               * Similarly, the sched_entities that represent the task group
+ +               * at parent level, can't have a load higher than
+ +               * scale_load_down(tg->shares). And the Sum of sched_entities'
+ +               * load must be <= scale_load_down(tg->shares).
+ +               */
+ +              if (tg_load > scale_load_down(gcfs_rq->tg->shares)) {
+ +                      /* scale gcfs_rq's load into tg's shares*/
+ +                      load *= scale_load_down(gcfs_rq->tg->shares);
+ +                      load /= tg_load;
+ +              }
+ +      }
+ +
+ +      delta = load - se->avg.load_avg;
+ +
+ +      /* Nothing to update */
+ +      if (!delta)
+ +              return;
+ +
+ +      /* Set new sched_entity's load */
+ +      se->avg.load_avg = load;
+ +      se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;
+ +
+ +      /* Update parent cfs_rq load */
+ +      add_positive(&cfs_rq->avg.load_avg, delta);
+ +      cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;
+ +
+ +      /*
+ +       * If the sched_entity is already enqueued, we also have to update the
+ +       * runnable load avg.
+ +       */
+ +      if (se->on_rq) {
+ +              /* Update parent cfs_rq runnable_load_avg */
+ +              add_positive(&cfs_rq->runnable_load_avg, delta);
+ +              cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX;
+ +      }
+ +}
+ +
+ +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)
+ +{
+ +      cfs_rq->propagate_avg = 1;
+ +}
+ +
+ +static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = group_cfs_rq(se);
+ +
+ +      if (!cfs_rq->propagate_avg)
+ +              return 0;
+ +
+ +      cfs_rq->propagate_avg = 0;
+ +      return 1;
+ +}
+ +
+ +/* Update task and its cfs_rq load average */
+ +static inline int propagate_entity_load_avg(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +
+ +      if (entity_is_task(se))
+ +              return 0;
+ +
+ +      if (!test_and_clear_tg_cfs_propagate(se))
+ +              return 0;
+ +
+ +      cfs_rq = cfs_rq_of(se);
+ +
+ +      set_tg_cfs_propagate(cfs_rq);
+ +
+ +      update_tg_cfs_util(cfs_rq, se);
+ +      update_tg_cfs_load(cfs_rq, se);
+ +
+ +      return 1;
+ +}
+ +
   #else /* CONFIG_FAIR_GROUP_SCHED */
+ +
   static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
+ +
+ +static inline int propagate_entity_load_avg(struct sched_entity *se)
+ +{
+ +      return 0;
+ +}
+ +
+ +static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
+ +
   #endif /* CONFIG_FAIR_GROUP_SCHED */
   
+ +static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
+ +{
+ +        if (&this_rq()->cfs == cfs_rq) {
+ +                /*
+ +                 * There are a few boundary cases this might miss but it should
+ +                 * get called often enough that that should (hopefully) not be
+ +                 * a real problem -- added to that it only calls on the local
+ +                 * CPU, so if we enqueue remotely we'll miss an update, but
+ +                 * the next tick/schedule should update.
+ +                 *
+ +                 * It will not get called when we go idle, because the idle
+ +                 * thread is a different class (!fair), nor will the utilization
+ +                 * number include things like RT tasks.
+ +                 *
+ +                 * As is, the util number is not freq-invariant (we'd have to
+ +                 * implement arch_scale_freq_capacity() for that).
+ +                 *
+ +                 * See cpu_util().
+ +                 */
+ +                cpufreq_update_util(rq_of(cfs_rq), 0);
+ +        }
+ +}
+ +
   static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
   
   /*
@@@ -4224,43 -2734,23 +4224,43 @@@
         WRITE_ONCE(*ptr, res);                                  \
   } while (0)
   
- -/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
- -static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ +/**
+ + * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
+ + * @now: current time, as per cfs_rq_clock_task()
+ + * @cfs_rq: cfs_rq to update
+ + * @update_freq: should we call cfs_rq_util_change() or will the call do so
+ + *
+ + * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
+ + * avg. The immediate corollary is that all (fair) tasks must be attached, see
+ + * post_init_entity_util_avg().
+ + *
+ + * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
+ + *
+ + * Returns true if the load decayed or we removed load.
+ + *
+ + * Since both these conditions indicate a changed cfs_rq->avg.load we should
+ + * call update_tg_load_avg() when this function returns true.
+ + */
+ +static inline int
+ +update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
   {
         struct sched_avg *sa = &cfs_rq->avg;
- -      int decayed, removed = 0;
+ +      int decayed, removed = 0, removed_util = 0;
   
         if (atomic_long_read(&cfs_rq->removed_load_avg)) {
                 s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
                 sub_positive(&sa->load_avg, r);
                 sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
                 removed = 1;
+ +              set_tg_cfs_propagate(cfs_rq);
         }
   
         if (atomic_long_read(&cfs_rq->removed_util_avg)) {
                 long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
                 sub_positive(&sa->util_avg, r);
                 sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
+ +              removed_util = 1;
+ +              set_tg_cfs_propagate(cfs_rq);
         }
   
         decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
@@@ -4271,93 -2761,65 +4271,93 @@@
         cfs_rq->load_last_update_time_copy = sa->last_update_time;
   #endif
   
+ +      /* Trace CPU load, unless cfs_rq belongs to a non-root task_group */
+ +      if (cfs_rq == &rq_of(cfs_rq)->cfs)
+ +              trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
+ +
+ +      if (update_freq && (decayed || removed_util))
+ +              cfs_rq_util_change(cfs_rq);
+ +
         return decayed || removed;
   }
   
+ +/*
+ + * Optional action to be done while updating the load average
+ + */
+ +#define UPDATE_TG     0x1
+ +#define SKIP_AGE_LOAD 0x2
+ +
   /* Update task and its cfs_rq load average */
- -static inline void update_load_avg(struct sched_entity *se, int update_tg)
+ +static inline void update_load_avg(struct sched_entity *se, int flags)
   {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
         u64 now = cfs_rq_clock_task(cfs_rq);
         int cpu = cpu_of(rq_of(cfs_rq));
+ +      int decayed;
+ +      void *ptr = NULL;
   
         /*
          * Track task load average for carrying it to new CPU after migrated, and
          * track group sched_entity load average for task_h_load calc in migration
          */
- -      __update_load_avg(now, cpu, &se->avg,
+ +      if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
+ +              __update_load_avg(now, cpu, &se->avg,
                           se->on_rq * scale_load_down(se->load.weight),
                           cfs_rq->curr == se, NULL);
+ +      }
+ +
+ +      decayed  = update_cfs_rq_load_avg(now, cfs_rq, true);
+ +      decayed |= propagate_entity_load_avg(se);
   
- -      if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
+ +      if (decayed && (flags & UPDATE_TG))
                 update_tg_load_avg(cfs_rq, 0);
+ +
+ +      if (entity_is_task(se)) {
+ +#ifdef CONFIG_SCHED_WALT
+ +              ptr = (void *)&(task_of(se)->ravg);
+ +#endif
+ +              trace_sched_load_avg_task(task_of(se), &se->avg, ptr);
+ +      }
   }
   
+ +/**
+ + * attach_entity_load_avg - attach this entity to its cfs_rq load avg
+ + * @cfs_rq: cfs_rq to attach to
+ + * @se: sched_entity to attach
+ + *
+ + * Must call update_cfs_rq_load_avg() before this, since we rely on
+ + * cfs_rq->avg.last_update_time being current.
+ + */
   static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
- -      if (!sched_feat(ATTACH_AGE_LOAD))
- -              goto skip_aging;
- -
- -      /*
- -       * If we got migrated (either between CPUs or between cgroups) we'll
- -       * have aged the average right before clearing @last_update_time.
- -       */
- -      if (se->avg.last_update_time) {
- -              __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
- -                                &se->avg, 0, 0, NULL);
- -
- -              /*
- -               * XXX: we could have just aged the entire load away if we've been
- -               * absent from the fair class for too long.
- -               */
- -      }
- -
- -skip_aging:
         se->avg.last_update_time = cfs_rq->avg.last_update_time;
         cfs_rq->avg.load_avg += se->avg.load_avg;
         cfs_rq->avg.load_sum += se->avg.load_sum;
         cfs_rq->avg.util_avg += se->avg.util_avg;
         cfs_rq->avg.util_sum += se->avg.util_sum;
+ +      set_tg_cfs_propagate(cfs_rq);
+ +
+ +      cfs_rq_util_change(cfs_rq);
   }
   
+ +/**
+ + * detach_entity_load_avg - detach this entity from its cfs_rq load avg
+ + * @cfs_rq: cfs_rq to detach from
+ + * @se: sched_entity to detach
+ + *
+ + * Must call update_cfs_rq_load_avg() before this, since we rely on
+ + * cfs_rq->avg.last_update_time being current.
+ + */
   static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
- -      __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
- -                        &se->avg, se->on_rq * scale_load_down(se->load.weight),
- -                        cfs_rq->curr == se, NULL);
   
         sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
         sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
         sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
         sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ +      set_tg_cfs_propagate(cfs_rq);
+ +
+ +      cfs_rq_util_change(cfs_rq);
   }
   
   /* Add the load generated by se into cfs_rq's load average */
@@@ -4365,20 -2827,34 +4365,20 @@@ static inline voi
   enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
         struct sched_avg *sa = &se->avg;
- -      u64 now = cfs_rq_clock_task(cfs_rq);
- -      int migrated, decayed;
- -
- -      migrated = !sa->last_update_time;
- -      if (!migrated) {
- -              __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
- -                      se->on_rq * scale_load_down(se->load.weight),
- -                      cfs_rq->curr == se, NULL);
- -      }
- -
- -      decayed = update_cfs_rq_load_avg(now, cfs_rq);
   
         cfs_rq->runnable_load_avg += sa->load_avg;
         cfs_rq->runnable_load_sum += sa->load_sum;
   
- -      if (migrated)
+ +      if (!sa->last_update_time) {
                 attach_entity_load_avg(cfs_rq, se);
- -
- -      if (decayed || migrated)
                 update_tg_load_avg(cfs_rq, 0);
+ +      }
   }
   
   /* Remove the runnable load generated by se from cfs_rq's runnable load average */
   static inline void
   dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
- -      update_load_avg(se, 1);
- -
         cfs_rq->runnable_load_avg =
                 max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
         cfs_rq->runnable_load_sum =
@@@ -4407,37 -2883,24 +4407,37 @@@ static inline u64 cfs_rq_last_update_ti
   #endif
   
   /*
+ + * Synchronize entity load avg of dequeued entity without locking
+ + * the previous rq.
+ + */
+ +void sync_entity_load_avg(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ +      u64 last_update_time;
+ +
+ +      last_update_time = cfs_rq_last_update_time(cfs_rq);
+ +      __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+ +}
+ +
+ +/*
    * Task first catches up with cfs_rq, and then subtract
    * itself from the cfs_rq (task must be off the queue now).
    */
   void remove_entity_load_avg(struct sched_entity *se)
   {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
- -      u64 last_update_time;
   
         /*
- -       * Newly created task or never used group entity should not be removed
- -       * from its (source) cfs_rq
+ +       * tasks cannot exit without having gone through wake_up_new_task() ->
+ +       * post_init_entity_util_avg() which will have added things to the
+ +       * cfs_rq, so we can remove unconditionally.
+ +       *
+ +       * Similarly for groups, they will have passed through
+ +       * post_init_entity_util_avg() before unregister_sched_fair_group()
+ +       * calls this.
          */
- -      if (se->avg.last_update_time == 0)
- -              return;
- -
- -      last_update_time = cfs_rq_last_update_time(cfs_rq);
   
- -      __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+ +      sync_entity_load_avg(se);
         atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
         atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
   }
@@@ -4474,16 -2937,7 +4474,16 @@@ static int idle_balance(struct rq *this
   
   #else /* CONFIG_SMP */
   
- -static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
+ +static inline int
+ +update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
+ +{
+ +      return 0;
+ +}
+ +
+ +#define UPDATE_TG     0x0
+ +#define SKIP_AGE_LOAD 0x0
+ +
+ +static inline void update_load_avg(struct sched_entity *se, int not_used1){}
   static inline void
   enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
   static inline void
@@@ -4500,12 -2954,6 +4500,12 @@@ static inline int idle_balance(struct r
         return 0;
   }
   
+ +static inline void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
+ +static inline void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq,
+ +       struct task_struct *p, int change_cra) { }
+ +
   #endif /* CONFIG_SMP */
   
   static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@@ -4553,7 -3001,6 +4553,7 @@@
                         }
   
                         trace_sched_stat_blocked(tsk, delta);
+ +                      trace_sched_blocked_reason(tsk);
   
                         /*
                          * Blocking time is in units of nanosecs, so shift by
@@@ -4632,10 -3079,9 +4632,10 @@@ enqueue_entity(struct cfs_rq *cfs_rq, s
          * Update run-time statistics of the 'current'.
          */
         update_curr(cfs_rq);
+ +      update_load_avg(se, UPDATE_TG);
         enqueue_entity_load_avg(cfs_rq, se);
+ +      update_cfs_shares(se);
         account_entity_enqueue(cfs_rq, se);
- -      update_cfs_shares(cfs_rq);
   
         if (flags & ENQUEUE_WAKEUP) {
                 place_entity(cfs_rq, se, 0);
@@@ -4708,16 -3154,6 +4708,16 @@@ dequeue_entity(struct cfs_rq *cfs_rq, s
          * Update run-time statistics of the 'current'.
          */
         update_curr(cfs_rq);
+ +
+ +      /*
+ +       * When dequeuing a sched_entity, we must:
+ +       *   - Update loads to have both entity and cfs_rq synced with now.
+ +       *   - Substract its load from the cfs_rq->runnable_avg.
+ +       *   - Substract its previous weight from cfs_rq->load.weight.
+ +       *   - For group entity, update its weight to reflect the new share
+ +       *     of its group cfs_rq.
+ +       */
+ +      update_load_avg(se, UPDATE_TG);
         dequeue_entity_load_avg(cfs_rq, se);
   
         update_stats_dequeue(cfs_rq, se);
@@@ -4753,7 -3189,7 +4753,7 @@@
         return_cfs_rq_runtime(cfs_rq);
   
         update_min_vruntime(cfs_rq);
- -      update_cfs_shares(cfs_rq);
+ +      update_cfs_shares(se);
   }
   
   /*
@@@ -4808,7 -3244,7 +4808,7 @@@ set_next_entity(struct cfs_rq *cfs_rq, 
                  */
                 update_stats_wait_end(cfs_rq, se);
                 __dequeue_entity(cfs_rq, se);
- -              update_load_avg(se, 1);
+ +              update_load_avg(se, UPDATE_TG);
         }
   
         update_stats_curr_start(cfs_rq, se);
@@@ -4924,8 -3360,8 +4924,8 @@@ entity_tick(struct cfs_rq *cfs_rq, stru
         /*
          * Ensure that runnable average is periodically updated.
          */
- -      update_load_avg(curr, 1);
- -      update_cfs_shares(cfs_rq);
+ +      update_load_avg(curr, UPDATE_TG);
+ +      update_cfs_shares(curr);
   
   #ifdef CONFIG_SCHED_HRTICK
         /*
@@@ -5132,35 -3568,6 +5132,35 @@@ static inline int cfs_rq_throttled(stru
         return cfs_bandwidth_used() && cfs_rq->throttled;
   }
   
+ +#ifdef CONFIG_SCHED_HMP
+ +/*
+ + * Check if task is part of a hierarchy where some cfs_rq does not have any
+ + * runtime left.
+ + *
+ + * We can't rely on throttled_hierarchy() to do this test, as
+ + * cfs_rq->throttle_count will not be updated yet when this function is called
+ + * from scheduler_tick()
+ + */
+ +static int task_will_be_throttled(struct task_struct *p)
+ +{
+ +      struct sched_entity *se = &p->se;
+ +      struct cfs_rq *cfs_rq;
+ +
+ +      if (!cfs_bandwidth_used())
+ +              return 0;
+ +
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +              if (!cfs_rq->runtime_enabled)
+ +                      continue;
+ +              if (cfs_rq->runtime_remaining <= 0)
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +#endif
+ +
   /* check whether cfs_rq, or any parent, is throttled */
   static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
   {
@@@ -5240,16 -3647,13 +5240,16 @@@ static void throttle_cfs_rq(struct cfs_
                 if (dequeue)
                         dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
                 qcfs_rq->h_nr_running -= task_delta;
+ +              dec_throttled_cfs_rq_hmp_stats(&qcfs_rq->hmp_stats, cfs_rq);
   
                 if (qcfs_rq->load.weight)
                         dequeue = 0;
         }
   
- -      if (!se)
+ +      if (!se) {
                 sub_nr_running(rq, task_delta);
+ +              dec_throttled_cfs_rq_hmp_stats(&rq->hmp_stats, cfs_rq);
+ +      }
   
         cfs_rq->throttled = 1;
         cfs_rq->throttled_clock = rq_clock(rq);
@@@ -5274,12 -3678,6 +5274,12 @@@
                 start_cfs_bandwidth(cfs_b);
   
         raw_spin_unlock(&cfs_b->lock);
+ +
+ +      /* Log effect on hmp stats after throttling */
+ +      trace_sched_cpu_load_cgroup(rq, idle_cpu(cpu_of(rq)),
+ +                           sched_irqload(cpu_of(rq)),
+ +                           power_cost(cpu_of(rq), 0),
+ +                           cpu_temp(cpu_of(rq)));
   }
   
   void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
@@@ -5289,7 -3687,6 +5289,7 @@@
         struct sched_entity *se;
         int enqueue = 1;
         long task_delta;
+ +      struct cfs_rq *tcfs_rq __maybe_unused = cfs_rq;
   
         se = cfs_rq->tg->se[cpu_of(rq)];
   
@@@ -5317,26 -3714,17 +5317,26 @@@
                 if (enqueue)
                         enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
                 cfs_rq->h_nr_running += task_delta;
+ +              inc_throttled_cfs_rq_hmp_stats(&cfs_rq->hmp_stats, tcfs_rq);
   
                 if (cfs_rq_throttled(cfs_rq))
                         break;
         }
   
- -      if (!se)
+ +      if (!se) {
                 add_nr_running(rq, task_delta);
+ +              inc_throttled_cfs_rq_hmp_stats(&rq->hmp_stats, tcfs_rq);
+ +      }
   
         /* determine whether we need to wake up potentially idle cpu */
         if (rq->curr == rq->idle && rq->cfs.nr_running)
                 resched_curr(rq);
+ +
+ +      /* Log effect on hmp stats after un-throttling */
+ +      trace_sched_cpu_load_cgroup(rq, idle_cpu(cpu_of(rq)),
+ +                           sched_irqload(cpu_of(rq)),
+ +                           power_cost(cpu_of(rq), 0),
+ +                           cpu_temp(cpu_of(rq)));
   }
   
   static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
@@@ -5667,20 -4055,28 +5667,28 @@@ static enum hrtimer_restart sched_cfs_p
                 if (++count > 3) {
                         u64 new, old = ktime_to_ns(cfs_b->period);
   
-                       new = (old * 147) / 128; /* ~115% */
-                       new = min(new, max_cfs_quota_period);
- 
-                       cfs_b->period = ns_to_ktime(new);
- 
-                       /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
-                       cfs_b->quota *= new;
-                       cfs_b->quota = div64_u64(cfs_b->quota, old);
- 
-                       pr_warn_ratelimited(
-         "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
-                               smp_processor_id(),
-                               div_u64(new, NSEC_PER_USEC),
-                                 div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       /*
+                        * Grow period by a factor of 2 to avoid losing precision.
+                        * Precision loss in the quota/period ratio can cause __cfs_schedulable
+                        * to fail.
+                        */
+                       new = old * 2;
+                       if (new < max_cfs_quota_period) {
+                               cfs_b->period = ns_to_ktime(new);
+                               cfs_b->quota *= 2;
+ 
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(new, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       } else {
+                               pr_warn_ratelimited(
+       "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+                                       smp_processor_id(),
+                                       div_u64(old, NSEC_PER_USEC),
+                                       div_u64(cfs_b->quota, NSEC_PER_USEC));
+                       }
   
                         /* reset count so we don't come right back in here */
                         count = 0;
@@@ -5714,7 -4110,6 +5722,7 @@@ static void init_cfs_rq_runtime(struct 
   {
         cfs_rq->runtime_enabled = 0;
         INIT_LIST_HEAD(&cfs_rq->throttled_list);
+ +      init_cfs_rq_hmp_stats(cfs_rq);
   }
   
   void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@@ -5830,7 -4225,7 +5838,7 @@@ static void hrtick_start_fair(struct r
   
         WARN_ON(task_rq(p) != rq);
   
- -      if (cfs_rq->nr_running > 1) {
+ +      if (rq->cfs.h_nr_running > 1) {
                 u64 slice = sched_slice(cfs_rq, se);
                 u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
                 s64 delta = slice - ran;
@@@ -5846,7 -4241,8 +5854,7 @@@
   
   /*
    * called from enqueue/dequeue and updates the hrtick when the
- - * current task is from our class and nr_running is low enough
- - * to matter.
+ + * current task is from our class.
    */
   static void hrtick_update(struct rq *rq)
   {
@@@ -5855,7 -4251,8 +5863,7 @@@
         if (!hrtick_enabled(rq) || curr->sched_class != &fair_sched_class)
                 return;
   
- -      if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
- -              hrtick_start_fair(rq, curr);
+ +      hrtick_start_fair(rq, curr);
   }
   #else /* !CONFIG_SCHED_HRTICK */
   static inline void
@@@ -5868,14 -4265,6 +5876,14 @@@ static inline void hrtick_update(struc
   }
   #endif
   
+ +#ifdef CONFIG_SMP
+ +static bool __cpu_overutilized(int cpu, int delta);
+ +static bool cpu_overutilized(int cpu);
+ +unsigned long boosted_cpu_util(int cpu);
+ +#else
+ +#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
+ +#endif
+ +
   /*
    * The enqueue_task method is called before nr_running is
    * increased. Here we update the fair scheduling stats and
@@@ -5886,17 -4275,6 +5894,17 @@@ enqueue_task_fair(struct rq *rq, struc
   {
         struct cfs_rq *cfs_rq;
         struct sched_entity *se = &p->se;
+ +#ifdef CONFIG_SMP
+ +      int task_new = flags & ENQUEUE_WAKEUP_NEW;
+ +#endif
+ +
+ +      /*
+ +       * If in_iowait is set, the code below may not trigger any cpufreq
+ +       * utilization updates, so do it here explicitly with the IOWAIT flag
+ +       * passed.
+ +       */
+ +      if (p->in_iowait)
+ +              cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
   
         for_each_sched_entity(se) {
                 if (se->on_rq)
@@@ -5909,11 -4287,10 +5917,11 @@@
                  *
                  * note: in the case of encountering a throttled cfs_rq we will
                  * post the final h_nr_running increment below.
- -              */
+ +               */
                 if (cfs_rq_throttled(cfs_rq))
                         break;
                 cfs_rq->h_nr_running++;
+ +              inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
   
                 flags = ENQUEUE_WAKEUP;
         }
@@@ -5921,50 -4298,17 +5929,50 @@@
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
                 cfs_rq->h_nr_running++;
+ +              inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
   
                 if (cfs_rq_throttled(cfs_rq))
                         break;
   
- -              update_load_avg(se, 1);
- -              update_cfs_shares(cfs_rq);
+ +              update_load_avg(se, UPDATE_TG);
+ +              update_cfs_shares(se);
         }
   
- -      if (!se)
+ +      if (!se) {
                 add_nr_running(rq, 1);
+ +              inc_rq_hmp_stats(rq, p, 1);
+ +      }
+ +
+ +#ifdef CONFIG_SMP
+ +
+ +      /*
+ +       * Update SchedTune accounting.
+ +       *
+ +       * We do it before updating the CPU capacity to ensure the
+ +       * boost value of the current task is accounted for in the
+ +       * selection of the OPP.
+ +       *
+ +       * We do it also in the case where we enqueue a throttled task;
+ +       * we could argue that a throttled task should not boost a CPU,
+ +       * however:
+ +       * a) properly implementing CPU boosting considering throttled
+ +       *    tasks will increase a lot the complexity of the solution
+ +       * b) it's not easy to quantify the benefits introduced by
+ +       *    such a more complex solution.
+ +       * Thus, for the time being we go for the simple solution and boost
+ +       * also for throttled RQs.
+ +       */
+ +      schedtune_enqueue_task(p, cpu_of(rq));
+ +
+ +      if (energy_aware() && !se) {
+ +              if (!task_new && !rq->rd->overutilized &&
+ +                  cpu_overutilized(rq->cpu)) {
+ +                      rq->rd->overutilized = true;
+ +                      trace_sched_overutilized(true);
+ +              }
+ +      }
   
+ +#endif /* CONFIG_SMP */
         hrtick_update(rq);
   }
   
@@@ -5994,7 -4338,6 +6002,7 @@@ static void dequeue_task_fair(struct r
                 if (cfs_rq_throttled(cfs_rq))
                         break;
                 cfs_rq->h_nr_running--;
+ +              dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
   
                 /* Don't dequeue parent if it has other entities besides us */
                 if (cfs_rq->load.weight) {
@@@ -6014,32 -4357,16 +6022,32 @@@
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
                 cfs_rq->h_nr_running--;
+ +              dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
   
                 if (cfs_rq_throttled(cfs_rq))
                         break;
   
- -              update_load_avg(se, 1);
- -              update_cfs_shares(cfs_rq);
+ +              update_load_avg(se, UPDATE_TG);
+ +              update_cfs_shares(se);
+ +      }
+ +
+ +      if (!se) {
+ +              sub_nr_running(rq, 1);
+ +              dec_rq_hmp_stats(rq, p, 1);
         }
   
- -      if (!se)
- -              sub_nr_running(rq, 1);
+ +#ifdef CONFIG_SMP
+ +
+ +      /*
+ +       * Update SchedTune accounting
+ +       *
+ +       * We do it before updating the CPU capacity to ensure the
+ +       * boost value of the current task is accounted for in the
+ +       * selection of the OPP.
+ +       */
+ +      schedtune_dequeue_task(p, cpu_of(rq));
+ +
+ +#endif /* CONFIG_SMP */
   
         hrtick_update(rq);
   }
@@@ -6267,6 -4594,15 +6275,6 @@@ static unsigned long target_load(int cp
         return max(rq->cpu_load[type-1], total);
   }
   
- -static unsigned long capacity_of(int cpu)
- -{
- -      return cpu_rq(cpu)->cpu_capacity;
- -}
- -
- -static unsigned long capacity_orig_of(int cpu)
- -{
- -      return cpu_rq(cpu)->cpu_capacity_orig;
- -}
   
   static unsigned long cpu_avg_load_per_task(int cpu)
   {
@@@ -6441,520 -4777,6 +6449,520 @@@ static long effective_load(struct task_
   #endif
   
   /*
+ + * Returns the current capacity of cpu after applying both
+ + * cpu and freq scaling.
+ + */
+ +unsigned long capacity_curr_of(int cpu)
+ +{
+ +      return cpu_rq(cpu)->cpu_capacity_orig *
+ +             arch_scale_freq_capacity(NULL, cpu)
+ +             >> SCHED_CAPACITY_SHIFT;
+ +}
+ +
+ +struct energy_env {
+ +      struct sched_group      *sg_top;
+ +      struct sched_group      *sg_cap;
+ +      int                     cap_idx;
+ +      int                     util_delta;
+ +      int                     src_cpu;
+ +      int                     dst_cpu;
+ +      int                     trg_cpu;
+ +      int                     energy;
+ +      int                     payoff;
+ +      struct task_struct      *task;
+ +      struct {
+ +              int before;
+ +              int after;
+ +              int delta;
+ +              int diff;
+ +      } nrg;
+ +      struct {
+ +              int before;
+ +              int after;
+ +              int delta;
+ +      } cap;
+ +};
+ +
+ +static int cpu_util_wake(int cpu, struct task_struct *p);
+ +
+ +/*
+ + * __cpu_norm_util() returns the cpu util relative to a specific capacity,
+ + * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE], which is useful for
+ + * energy calculations.
+ + *
+ + * Since util is a scale-invariant utilization defined as:
+ + *
+ + *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
+ + *
+ + * the normalized util can be found using the specific capacity.
+ + *
+ + *   capacity = capacity_orig * curr_freq/max_freq
+ + *
+ + *   norm_util = running_time/time ~ util/capacity
+ + */
+ +static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
+ +{
+ +      if (util >= capacity)
+ +              return SCHED_CAPACITY_SCALE;
+ +
+ +      return (util << SCHED_CAPACITY_SHIFT)/capacity;
+ +}
+ +
+ +static unsigned long group_max_util(struct energy_env *eenv)
+ +{
+ +      unsigned long max_util = 0;
+ +      unsigned long util;
+ +      int cpu;
+ +
+ +      for_each_cpu(cpu, sched_group_cpus(eenv->sg_cap)) {
+ +              util = cpu_util_wake(cpu, eenv->task);
+ +
+ +              /*
+ +               * If we are looking at the target CPU specified by the eenv,
+ +               * then we should add the (estimated) utilization of the task
+ +               * assuming we will wake it up on that CPU.
+ +               */
+ +              if (unlikely(cpu == eenv->trg_cpu))
+ +                      util += eenv->util_delta;
+ +
+ +              max_util = max(max_util, util);
+ +      }
+ +
+ +      return max_util;
+ +}
+ +
+ +/*
+ + * group_norm_util() returns the approximated group util relative to it's
+ + * current capacity (busy ratio), in the range [0..SCHED_LOAD_SCALE], for use
+ + * in energy calculations.
+ + *
+ + * Since task executions may or may not overlap in time in the group the true
+ + * normalized util is between MAX(cpu_norm_util(i)) and SUM(cpu_norm_util(i))
+ + * when iterating over all CPUs in the group.
+ + * The latter estimate is used as it leads to a more pessimistic energy
+ + * estimate (more busy).
+ + */
+ +static unsigned
+ +long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
+ +{
+ +      unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+ +      unsigned long util, util_sum = 0;
+ +      int cpu;
+ +
+ +      for_each_cpu(cpu, sched_group_cpus(sg)) {
+ +              util = cpu_util_wake(cpu, eenv->task);
+ +
+ +              /*
+ +               * If we are looking at the target CPU specified by the eenv,
+ +               * then we should add the (estimated) utilization of the task
+ +               * assuming we will wake it up on that CPU.
+ +               */
+ +              if (unlikely(cpu == eenv->trg_cpu))
+ +                      util += eenv->util_delta;
+ +
+ +              util_sum += __cpu_norm_util(util, capacity);
+ +      }
+ +
+ +      return min_t(unsigned long, util_sum, SCHED_CAPACITY_SCALE);
+ +}
+ +
+ +static int find_new_capacity(struct energy_env *eenv,
+ +      const struct sched_group_energy * const sge)
+ +{
+ +      int idx, max_idx = sge->nr_cap_states - 1;
+ +      unsigned long util = group_max_util(eenv);
+ +
+ +      /* default is max_cap if we don't find a match */
+ +      eenv->cap_idx = max_idx;
+ +
+ +      for (idx = 0; idx < sge->nr_cap_states; idx++) {
+ +              if (sge->cap_states[idx].cap >= util) {
+ +                      eenv->cap_idx = idx;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      return eenv->cap_idx;
+ +}
+ +
+ +static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
+ +{
+ +      int i, state = INT_MAX;
+ +      int src_in_grp, dst_in_grp;
+ +      long grp_util = 0;
+ +
+ +      /* Find the shallowest idle state in the sched group. */
+ +      for_each_cpu(i, sched_group_cpus(sg))
+ +              state = min(state, idle_get_state_idx(cpu_rq(i)));
+ +
+ +      /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
+ +      state++;
+ +
+ +      src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
+ +      dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
+ +      if (src_in_grp == dst_in_grp) {
+ +              /* both CPUs under consideration are in the same group or not in
+ +               * either group, migration should leave idle state the same.
+ +               */
+ +              goto end;
+ +      }
+ +
+ +      /*
+ +       * Try to estimate if a deeper idle state is
+ +       * achievable when we move the task.
+ +       */
+ +      for_each_cpu(i, sched_group_cpus(sg)) {
+ +              grp_util += cpu_util_wake(i, eenv->task);
+ +              if (unlikely(i == eenv->trg_cpu))
+ +                      grp_util += eenv->util_delta;
+ +      }
+ +
+ +      if (grp_util <=
+ +              ((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
+ +              /* after moving, this group is at most partly
+ +               * occupied, so it should have some idle time.
+ +               */
+ +              int max_idle_state_idx = sg->sge->nr_idle_states - 2;
+ +              int new_state = grp_util * max_idle_state_idx;
+ +              if (grp_util <= 0)
+ +                      /* group will have no util, use lowest state */
+ +                      new_state = max_idle_state_idx + 1;
+ +              else {
+ +                      /* for partially idle, linearly map util to idle
+ +                       * states, excluding the lowest one. This does not
+ +                       * correspond to the state we expect to enter in
+ +                       * reality, but an indication of what might happen.
+ +                       */
+ +                      new_state = min(max_idle_state_idx, (int)
+ +                                      (new_state / sg->sgc->max_capacity));
+ +                      new_state = max_idle_state_idx - new_state;
+ +              }
+ +              state = new_state;
+ +      } else {
+ +              /* After moving, the group will be fully occupied
+ +               * so assume it will not be idle at all.
+ +               */
+ +              state = 0;
+ +      }
+ +end:
+ +      return state;
+ +}
+ +
+ +/*
+ + * sched_group_energy(): Computes the absolute energy consumption of cpus
+ + * belonging to the sched_group including shared resources shared only by
+ + * members of the group. Iterates over all cpus in the hierarchy below the
+ + * sched_group starting from the bottom working it's way up before going to
+ + * the next cpu until all cpus are covered at all levels. The current
+ + * implementation is likely to gather the same util statistics multiple times.
+ + * This can probably be done in a faster but more complex way.
+ + * Note: sched_group_energy() may fail when racing with sched_domain updates.
+ + */
+ +static int sched_group_energy(struct energy_env *eenv)
+ +{
+ +      struct cpumask visit_cpus;
+ +      u64 total_energy = 0;
+ +      int cpu_count;
+ +
+ +      WARN_ON(!eenv->sg_top->sge);
+ +
+ +      cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+ +      /* If a cpu is hotplugged in while we are in this function,
+ +       * it does not appear in the existing visit_cpus mask
+ +       * which came from the sched_group pointer of the
+ +       * sched_domain pointed at by sd_ea for either the prev
+ +       * or next cpu and was dereferenced in __energy_diff.
+ +       * Since we will dereference sd_scs later as we iterate
+ +       * through the CPUs we expect to visit, new CPUs can
+ +       * be present which are not in the visit_cpus mask.
+ +       * Guard this with cpu_count.
+ +       */
+ +      cpu_count = cpumask_weight(&visit_cpus);
+ +
+ +      while (!cpumask_empty(&visit_cpus)) {
+ +              struct sched_group *sg_shared_cap = NULL;
+ +              int cpu = cpumask_first(&visit_cpus);
+ +              struct sched_domain *sd;
+ +
+ +              /*
+ +               * Is the group utilization affected by cpus outside this
+ +               * sched_group?
+ +               * This sd may have groups with cpus which were not present
+ +               * when we took visit_cpus.
+ +               */
+ +              sd = rcu_dereference(per_cpu(sd_scs, cpu));
+ +
+ +              if (sd && sd->parent)
+ +                      sg_shared_cap = sd->parent->groups;
+ +
+ +              for_each_domain(cpu, sd) {
+ +                      struct sched_group *sg = sd->groups;
+ +
+ +                      /* Has this sched_domain already been visited? */
+ +                      if (sd->child && group_first_cpu(sg) != cpu)
+ +                              break;
+ +
+ +                      do {
+ +                              unsigned long group_util;
+ +                              int sg_busy_energy, sg_idle_energy;
+ +                              int cap_idx, idle_idx;
+ +
+ +                              if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
+ +                                      eenv->sg_cap = sg_shared_cap;
+ +                              else
+ +                                      eenv->sg_cap = sg;
+ +
+ +                              cap_idx = find_new_capacity(eenv, sg->sge);
+ +
+ +                              if (sg->group_weight == 1) {
+ +                                      /* Remove capacity of src CPU (before task move) */
+ +                                      if (eenv->trg_cpu == eenv->src_cpu &&
+ +                                          cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
+ +                                              eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
+ +                                              eenv->cap.delta -= eenv->cap.before;
+ +                                      }
+ +                                      /* Add capacity of dst CPU  (after task move) */
+ +                                      if (eenv->trg_cpu == eenv->dst_cpu &&
+ +                                          cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
+ +                                              eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
+ +                                              eenv->cap.delta += eenv->cap.after;
+ +                                      }
+ +                              }
+ +
+ +                              idle_idx = group_idle_state(eenv, sg);
+ +                              group_util = group_norm_util(eenv, sg);
+ +
+ +                              sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power);
+ +                              sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)
+ +                                                              * sg->sge->idle_states[idle_idx].power);
+ +
+ +                              total_energy += sg_busy_energy + sg_idle_energy;
+ +
+ +                              if (!sd->child) {
+ +                                      /*
+ +                                       * cpu_count here is the number of
+ +                                       * cpus we expect to visit in this
+ +                                       * calculation. If we race against
+ +                                       * hotplug, we can have extra cpus
+ +                                       * added to the groups we are
+ +                                       * iterating which do not appear in
+ +                                       * the visit_cpus mask. In that case
+ +                                       * we are not able to calculate energy
+ +                                       * without restarting so we will bail
+ +                                       * out and use prev_cpu this time.
+ +                                       */
+ +                                      if (!cpu_count)
+ +                                              return -EINVAL;
+ +                                      cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
+ +                                      cpu_count--;
+ +                              }
+ +
+ +                              if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
+ +                                      goto next_cpu;
+ +
+ +                      } while (sg = sg->next, sg != sd->groups);
+ +              }
+ +
+ +              /*
+ +               * If we raced with hotplug and got an sd NULL-pointer;
+ +               * returning a wrong energy estimation is better than
+ +               * entering an infinite loop.
+ +               * Specifically: If a cpu is unplugged after we took
+ +               * the visit_cpus mask, it no longer has an sd_scs
+ +               * pointer, so when we dereference it, we get NULL.
+ +               */
+ +              if (cpumask_test_cpu(cpu, &visit_cpus))
+ +                      return -EINVAL;
+ +next_cpu:
+ +              cpumask_clear_cpu(cpu, &visit_cpus);
+ +              continue;
+ +      }
+ +
+ +      eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT;
+ +      return 0;
+ +}
+ +
+ +static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
+ +{
+ +      return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg));
+ +}
+ +
+ +static inline unsigned long task_util(struct task_struct *p);
+ +
+ +/*
+ + * energy_diff(): Estimate the energy impact of changing the utilization
+ + * distribution. eenv specifies the change: utilisation amount, source, and
+ + * destination cpu. Source or destination cpu may be -1 in which case the
+ + * utilization is removed from or added to the system (e.g. task wake-up). If
+ + * both are specified, the utilization is migrated.
+ + */
+ +static inline int __energy_diff(struct energy_env *eenv)
+ +{
+ +      struct sched_domain *sd;
+ +      struct sched_group *sg;
+ +      int sd_cpu = -1, energy_before = 0, energy_after = 0;
+ +      int diff, margin;
+ +
+ +      struct energy_env eenv_before = {
+ +              .util_delta     = task_util(eenv->task),
+ +              .src_cpu        = eenv->src_cpu,
+ +              .dst_cpu        = eenv->dst_cpu,
+ +              .trg_cpu        = eenv->src_cpu,
+ +              .nrg            = { 0, 0, 0, 0},
+ +              .cap            = { 0, 0, 0 },
+ +              .task           = eenv->task,
+ +      };
+ +
+ +      if (eenv->src_cpu == eenv->dst_cpu)
+ +              return 0;
+ +
+ +      sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
+ +      sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
+ +
+ +      if (!sd)
+ +              return 0; /* Error */
+ +
+ +      sg = sd->groups;
+ +
+ +      do {
+ +              if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) {
+ +                      eenv_before.sg_top = eenv->sg_top = sg;
+ +
+ +                      if (sched_group_energy(&eenv_before))
+ +                              return 0; /* Invalid result abort */
+ +                      energy_before += eenv_before.energy;
+ +
+ +                      /* Keep track of SRC cpu (before) capacity */
+ +                      eenv->cap.before = eenv_before.cap.before;
+ +                      eenv->cap.delta = eenv_before.cap.delta;
+ +
+ +                      if (sched_group_energy(eenv))
+ +                              return 0; /* Invalid result abort */
+ +                      energy_after += eenv->energy;
+ +              }
+ +      } while (sg = sg->next, sg != sd->groups);
+ +
+ +      eenv->nrg.before = energy_before;
+ +      eenv->nrg.after = energy_after;
+ +      eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before;
+ +      eenv->payoff = 0;
+ +#ifndef CONFIG_SCHED_TUNE
+ +      trace_sched_energy_diff(eenv->task,
+ +                      eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+ +                      eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+ +                      eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+ +                      eenv->nrg.delta, eenv->payoff);
+ +#endif
+ +      /*
+ +       * Dead-zone margin preventing too many migrations.
+ +       */
+ +
+ +      margin = eenv->nrg.before >> 6; /* ~1.56% */
+ +
+ +      diff = eenv->nrg.after - eenv->nrg.before;
+ +
+ +      eenv->nrg.diff = (abs(diff) < margin) ? 0 : eenv->nrg.diff;
+ +
+ +      return eenv->nrg.diff;
+ +}
+ +
+ +#ifdef CONFIG_SCHED_TUNE
+ +
+ +struct target_nrg schedtune_target_nrg;
+ +
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +extern bool schedtune_initialized;
+ +#endif /* CONFIG_CGROUP_SCHEDTUNE */
+ +
+ +/*
+ + * System energy normalization
+ + * Returns the normalized value, in the range [0..SCHED_CAPACITY_SCALE],
+ + * corresponding to the specified energy variation.
+ + */
+ +static inline int
+ +normalize_energy(int energy_diff)
+ +{
+ +      u32 normalized_nrg;
+ +
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +      /* during early setup, we don't know the extents */
+ +      if (unlikely(!schedtune_initialized))
+ +              return energy_diff < 0 ? -1 : 1 ;
+ +#endif /* CONFIG_CGROUP_SCHEDTUNE */
+ +
+ +#ifdef CONFIG_SCHED_DEBUG
+ +      {
+ +      int max_delta;
+ +
+ +      /* Check for boundaries */
+ +      max_delta  = schedtune_target_nrg.max_power;
+ +      max_delta -= schedtune_target_nrg.min_power;
+ +      WARN_ON(abs(energy_diff) >= max_delta);
+ +      }
+ +#endif
+ +
+ +      /* Do scaling using positive numbers to increase the range */
+ +      normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
+ +
+ +      /* Scale by energy magnitude */
+ +      normalized_nrg <<= SCHED_CAPACITY_SHIFT;
+ +
+ +      /* Normalize on max energy for target platform */
+ +      normalized_nrg = reciprocal_divide(
+ +                      normalized_nrg, schedtune_target_nrg.rdiv);
+ +
+ +      return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
+ +}
+ +
+ +static inline int
+ +energy_diff(struct energy_env *eenv)
+ +{
+ +      int boost = schedtune_task_boost(eenv->task);
+ +      int nrg_delta;
+ +
+ +      /* Conpute "absolute" energy diff */
+ +      __energy_diff(eenv);
+ +
+ +      /* Return energy diff when boost margin is 0 */
+ +      if (boost == 0) {
+ +              trace_sched_energy_diff(eenv->task,
+ +                              eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+ +                              eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+ +                              eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+ +                              0, -eenv->nrg.diff);
+ +              return eenv->nrg.diff;
+ +      }
+ +
+ +      /* Compute normalized energy diff */
+ +      nrg_delta = normalize_energy(eenv->nrg.diff);
+ +      eenv->nrg.delta = nrg_delta;
+ +
+ +      eenv->payoff = schedtune_accept_deltas(
+ +                      eenv->nrg.delta,
+ +                      eenv->cap.delta,
+ +                      eenv->task);
+ +
+ +      trace_sched_energy_diff(eenv->task,
+ +                      eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+ +                      eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+ +                      eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+ +                      eenv->nrg.delta, eenv->payoff);
+ +
+ +      /*
+ +       * When SchedTune is enabled, the energy_diff() function will return
+ +       * the computed energy payoff value. Since the energy_diff() return
+ +       * value is expected to be negative by its callers, this evaluation
+ +       * function return a negative value each time the evaluation return a
+ +       * positive payoff, which is the condition for the acceptance of
+ +       * a scheduling decision
+ +       */
+ +      return -eenv->payoff;
+ +}
+ +#else /* CONFIG_SCHED_TUNE */
+ +#define energy_diff(eenv) __energy_diff(eenv)
+ +#endif
+ +
+ +/*
    * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
    * A waker of many should wake a different task than the one last awakened
    * at a frequency roughly N times higher than one of its wakees.  In order
@@@ -6966,34 -4788,31 +6974,34 @@@
    * being client/server, worker/dispatcher, interrupt source or whatever is
    * irrelevant, spread criteria is apparent partner count exceeds socket size.
    */
- -static int wake_wide(struct task_struct *p)
+ +static int wake_wide(struct task_struct *p, int sibling_count_hint)
   {
         unsigned int master = current->wakee_flips;
         unsigned int slave = p->wakee_flips;
- -      int factor = this_cpu_read(sd_llc_size);
+ +      int llc_size = this_cpu_read(sd_llc_size);
+ +
+ +      if (sibling_count_hint >= llc_size)
+ +              return 1;
   
         if (master < slave)
                 swap(master, slave);
- -      if (slave < factor || master < slave * factor)
+ +      if (slave < llc_size || master < slave * llc_size)
                 return 0;
         return 1;
   }
   
- -static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ +static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+ +                     int prev_cpu, int sync)
   {
         s64 this_load, load;
         s64 this_eff_load, prev_eff_load;
- -      int idx, this_cpu, prev_cpu;
+ +      int idx, this_cpu;
         struct task_group *tg;
         unsigned long weight;
         int balanced;
   
         idx       = sd->wake_idx;
         this_cpu  = smp_processor_id();
- -      prev_cpu  = task_cpu(p);
         load      = source_load(prev_cpu, idx);
         this_load = target_load(this_cpu, idx);
   
@@@ -7025,183 -4844,39 +7033,183 @@@
         this_eff_load = 100;
         this_eff_load *= capacity_of(prev_cpu);
   
- -      prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
- -      prev_eff_load *= capacity_of(this_cpu);
+ +      prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
+ +      prev_eff_load *= capacity_of(this_cpu);
+ +
+ +      if (this_load > 0) {
+ +              this_eff_load *= this_load +
+ +                      effective_load(tg, this_cpu, weight, weight);
+ +
+ +              prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
+ +      }
+ +
+ +      balanced = this_eff_load <= prev_eff_load;
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+ +
+ +      if (!balanced)
+ +              return 0;
+ +
+ +      schedstat_inc(sd, ttwu_move_affine);
+ +      schedstat_inc(p, se.statistics.nr_wakeups_affine);
+ +
+ +      return 1;
+ +}
+ +
+ +static inline unsigned long task_util(struct task_struct *p)
+ +{
+ +      return p->se.avg.util_avg;
+ +}
+ +
+ +static inline unsigned long boosted_task_util(struct task_struct *task);
+ +
+ +static inline bool __task_fits(struct task_struct *p, int cpu, int util)
+ +{
+ +      unsigned long capacity = capacity_of(cpu);
+ +
+ +      util += boosted_task_util(p);
+ +
+ +      return (capacity * 1024) > (util * capacity_margin);
+ +}
+ +
+ +static inline bool task_fits_max(struct task_struct *p, int cpu)
+ +{
+ +      unsigned long capacity = capacity_of(cpu);
+ +      unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+ +
+ +      if (capacity == max_capacity)
+ +              return true;
+ +
+ +      if (capacity * capacity_margin > max_capacity * 1024)
+ +              return true;
+ +
+ +      return __task_fits(p, cpu, 0);
+ +}
+ +
+ +static bool __cpu_overutilized(int cpu, int delta)
+ +{
+ +      return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin);
+ +}
+ +
+ +static bool cpu_overutilized(int cpu)
+ +{
+ +      return __cpu_overutilized(cpu, 0);
+ +}
+ +
+ +#ifdef CONFIG_SCHED_TUNE
+ +
+ +struct reciprocal_value schedtune_spc_rdiv;
+ +
+ +static long
+ +schedtune_margin(unsigned long signal, long boost)
+ +{
+ +      long long margin = 0;
+ +
+ +      /*
+ +       * Signal proportional compensation (SPC)
+ +       *
+ +       * The Boost (B) value is used to compute a Margin (M) which is
+ +       * proportional to the complement of the original Signal (S):
+ +       *   M = B * (SCHED_CAPACITY_SCALE - S)
+ +       * The obtained M could be used by the caller to "boost" S.
+ +       */
+ +      if (boost >= 0) {
+ +              margin  = SCHED_CAPACITY_SCALE - signal;
+ +              margin *= boost;
+ +      } else
+ +              margin = -signal * boost;
+ +
+ +      margin  = reciprocal_divide(margin, schedtune_spc_rdiv);
+ +
+ +      if (boost < 0)
+ +              margin *= -1;
+ +      return margin;
+ +}
+ +
+ +static inline int
+ +schedtune_cpu_margin(unsigned long util, int cpu)
+ +{
+ +      int boost = schedtune_cpu_boost(cpu);
+ +
+ +      if (boost == 0)
+ +              return 0;
+ +
+ +      return schedtune_margin(util, boost);
+ +}
+ +
+ +static inline long
+ +schedtune_task_margin(struct task_struct *task)
+ +{
+ +      int boost = schedtune_task_boost(task);
+ +      unsigned long util;
+ +      long margin;
+ +
+ +      if (boost == 0)
+ +              return 0;
+ +
+ +      util = task_util(task);
+ +      margin = schedtune_margin(util, boost);
+ +
+ +      return margin;
+ +}
+ +
+ +#else /* CONFIG_SCHED_TUNE */
   
- -      if (this_load > 0) {
- -              this_eff_load *= this_load +
- -                      effective_load(tg, this_cpu, weight, weight);
+ +static inline int
+ +schedtune_cpu_margin(unsigned long util, int cpu)
+ +{
+ +      return 0;
+ +}
   
- -              prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
- -      }
+ +static inline int
+ +schedtune_task_margin(struct task_struct *task)
+ +{
+ +      return 0;
+ +}
   
- -      balanced = this_eff_load <= prev_eff_load;
+ +#endif /* CONFIG_SCHED_TUNE */
   
- -      schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+ +unsigned long
+ +boosted_cpu_util(int cpu)
+ +{
+ +      unsigned long util = cpu_util_freq(cpu);
+ +      long margin = schedtune_cpu_margin(util, cpu);
   
- -      if (!balanced)
- -              return 0;
+ +      trace_sched_boost_cpu(cpu, util, margin);
   
- -      schedstat_inc(sd, ttwu_move_affine);
- -      schedstat_inc(p, se.statistics.nr_wakeups_affine);
+ +      return util + margin;
+ +}
   
- -      return 1;
+ +static inline unsigned long
+ +boosted_task_util(struct task_struct *task)
+ +{
+ +      unsigned long util = task_util(task);
+ +      long margin = schedtune_task_margin(task);
+ +
+ +      trace_sched_boost_task(task, util, margin);
+ +
+ +      return util + margin;
+ +}
+ +
+ +static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
+ +{
+ +      return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0);
   }
   
   /*
    * find_idlest_group finds and returns the least busy CPU group within the
    * domain.
+ + *
+ + * Assumes p is allowed on at least one CPU in sd.
    */
   static struct sched_group *
   find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                   int this_cpu, int sd_flag)
   {
         struct sched_group *idlest = NULL, *group = sd->groups;
- -      unsigned long min_load = ULONG_MAX, this_load = 0;
+ +      struct sched_group *most_spare_sg = NULL;
+ +      unsigned long min_load = ULONG_MAX, this_load = ULONG_MAX;
+ +      unsigned long most_spare = 0, this_spare = 0;
         int load_idx = sd->forkexec_idx;
         int imbalance = 100 + (sd->imbalance_pct-100)/2;
   
@@@ -7209,7 -4884,7 +7217,7 @@@
                 load_idx = sd->wake_idx;
   
         do {
- -              unsigned long load, avg_load;
+ +              unsigned long load, avg_load, spare_cap, max_spare_cap;
                 int local_group;
                 int i;
   
@@@ -7221,12 -4896,8 +7229,12 @@@
                 local_group = cpumask_test_cpu(this_cpu,
                                                sched_group_cpus(group));
   
- -              /* Tally up the load of all CPUs in the group */
+ +              /*
+ +               * Tally up the load of all CPUs in the group and find
+ +               * the group containing the CPU with most spare capacity.
+ +               */
                 avg_load = 0;
+ +              max_spare_cap = 0;
   
                 for_each_cpu(i, sched_group_cpus(group)) {
                         /* Bias balancing toward cpus of our domain */
@@@ -7236,11 -4907,6 +7244,11 @@@
                                 load = target_load(i, load_idx);
   
                         avg_load += load;
+ +
+ +                      spare_cap = capacity_spare_wake(i, p);
+ +
+ +                      if (spare_cap > max_spare_cap)
+ +                              max_spare_cap = spare_cap;
                 }
   
                 /* Adjust by relative CPU capacity of the group */
@@@ -7248,51 -4914,22 +7256,51 @@@
   
                 if (local_group) {
                         this_load = avg_load;
- -              } else if (avg_load < min_load) {
- -                      min_load = avg_load;
- -                      idlest = group;
+ +                      this_spare = max_spare_cap;
+ +              } else {
+ +                      if (avg_load < min_load) {
+ +                              min_load = avg_load;
+ +                              idlest = group;
+ +                      }
+ +
+ +                      if (most_spare < max_spare_cap) {
+ +                              most_spare = max_spare_cap;
+ +                              most_spare_sg = group;
+ +                      }
                 }
         } while (group = group->next, group != sd->groups);
   
+ +      /*
+ +       * The cross-over point between using spare capacity or least load
+ +       * is too conservative for high utilization tasks on partially
+ +       * utilized systems if we require spare_capacity > task_util(p),
+ +       * so we allow for some task stuffing by using
+ +       * spare_capacity > task_util(p)/2.
+ +       *
+ +       * Spare capacity can't be used for fork because the utilization has
+ +       * not been set yet, we must first select a rq to compute the initial
+ +       * utilization.
+ +       */
+ +      if (sd_flag & SD_BALANCE_FORK)
+ +              goto skip_spare;
+ +
+ +      if (this_spare > task_util(p) / 2 &&
+ +          imbalance*this_spare > 100*most_spare)
+ +              return NULL;
+ +      else if (most_spare > task_util(p) / 2)
+ +              return most_spare_sg;
+ +
+ +skip_spare:
         if (!idlest || 100*this_load < imbalance*min_load)
                 return NULL;
         return idlest;
   }
   
   /*
- - * find_idlest_cpu - find the idlest cpu among the cpus in group.
+ + * find_idlest_group_cpu - find the idlest cpu among the cpus in group.
    */
   static int
- -find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ +find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
   {
         unsigned long load, min_load = ULONG_MAX;
         unsigned int min_exit_latency = UINT_MAX;
@@@ -7301,10 -4938,6 +7309,10 @@@
         int shallowest_idle_cpu = -1;
         int i;
   
+ +      /* Check if we have any choice: */
+ +      if (group->group_weight == 1)
+ +              return cpumask_first(sched_group_cpus(group));
+ +
         /* Traverse only the allowed CPUs */
         for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
                 if (idle_cpu(i)) {
@@@ -7339,104 -4972,25 +7347,104 @@@
         }
   
         return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+ + }
+ +
+ +static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
+ +                                int cpu, int prev_cpu, int sd_flag)
+ +{
+ +      int new_cpu = cpu;
+ +      int wu = sd_flag & SD_BALANCE_WAKE;
+ +      int cas_cpu = -1;
+ +
+ +      if (wu) {
+ +              schedstat_inc(p, se.statistics.nr_wakeups_cas_attempts);
+ +              schedstat_inc(this_rq(), eas_stats.cas_attempts);
+ +      }
+ +
+ +      if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
+ +              return prev_cpu;
+ +
+ +      while (sd) {
+ +              struct sched_group *group;
+ +              struct sched_domain *tmp;
+ +              int weight;
+ +
+ +              if (wu)
+ +                      schedstat_inc(sd, eas_stats.cas_attempts);
+ +
+ +              if (!(sd->flags & sd_flag)) {
+ +                      sd = sd->child;
+ +                      continue;
+ +              }
+ +
+ +              group = find_idlest_group(sd, p, cpu, sd_flag);
+ +              if (!group) {
+ +                      sd = sd->child;
+ +                      continue;
+ +              }
+ +
+ +              new_cpu = find_idlest_group_cpu(group, p, cpu);
+ +              if (new_cpu == cpu) {
+ +                      /* Now try balancing at a lower domain level of cpu */
+ +                      sd = sd->child;
+ +                      continue;
+ +              }
+ +
+ +              /* Now try balancing at a lower domain level of new_cpu */
+ +              cpu = cas_cpu = new_cpu;
+ +              weight = sd->span_weight;
+ +              sd = NULL;
+ +              for_each_domain(cpu, tmp) {
+ +                      if (weight <= tmp->span_weight)
+ +                              break;
+ +                      if (tmp->flags & sd_flag)
+ +                              sd = tmp;
+ +              }
+ +              /* while loop will break here if sd == NULL */
+ +      }
+ +
+ +      if (wu && (cas_cpu >= 0)) {
+ +              schedstat_inc(p, se.statistics.nr_wakeups_cas_count);
+ +              schedstat_inc(this_rq(), eas_stats.cas_count);
+ +      }
+ +
+ +      return new_cpu;
   }
   
   /*
    * Try and locate an idle CPU in the sched_domain.
    */
- -static int select_idle_sibling(struct task_struct *p, int target)
+ +static int select_idle_sibling(struct task_struct *p, int prev, int target)
   {
         struct sched_domain *sd;
         struct sched_group *sg;
- -      int i = task_cpu(p);
+ +      int best_idle_cpu = -1;
+ +      int best_idle_cstate = INT_MAX;
+ +      unsigned long best_idle_capacity = ULONG_MAX;
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_sis_attempts);
+ +      schedstat_inc(this_rq(), eas_stats.sis_attempts);
+ +
+ +      if (!sysctl_sched_cstate_aware) {
+ +              if (idle_cpu(target)) {
+ +                      schedstat_inc(p, se.statistics.nr_wakeups_sis_idle);
+ +                      schedstat_inc(this_rq(), eas_stats.sis_idle);
+ +                      return target;
+ +              }
   
- -      if (idle_cpu(target))
- -              return target;
+ +              /*
+ +               * If the prevous cpu is cache affine and idle, don't be stupid.
+ +               */
+ +              if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) {
+ +                      schedstat_inc(p, se.statistics.nr_wakeups_sis_cache_affine);
+ +                      schedstat_inc(this_rq(), eas_stats.sis_cache_affine);
+ +                      return prev;
+ +              }
+ +      }
   
- -      /*
- -       * If the prevous cpu is cache affine and idle, don't be stupid.
- -       */
- -      if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
- -              return i;
+ +      if (!(current->flags & PF_WAKE_UP_IDLE) &&
+ +                      !(p->flags & PF_WAKE_UP_IDLE))
+ +              return target;
   
         /*
          * Otherwise, iterate the domains and find an elegible idle cpu.
@@@ -7445,508 -4999,58 +7453,508 @@@
         for_each_lower_domain(sd) {
                 sg = sd->groups;
                 do {
+ +                      int i;
                         if (!cpumask_intersects(sched_group_cpus(sg),
                                                 tsk_cpus_allowed(p)))
                                 goto next;
   
- -                      for_each_cpu(i, sched_group_cpus(sg)) {
- -                              if (i == target || !idle_cpu(i))
- -                                      goto next;
- -                      }
+ +                      if (sysctl_sched_cstate_aware) {
+ +                              for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+ +                                      int idle_idx = idle_get_state_idx(cpu_rq(i));
+ +                                      unsigned long new_usage = boosted_task_util(p);
+ +                                      unsigned long capacity_orig = capacity_orig_of(i);
+ +
+ +                                      if (new_usage > capacity_orig || !idle_cpu(i))
+ +                                              goto next;
+ +
+ +                                      if (i == target && new_usage <= capacity_curr_of(target)) {
+ +                                              schedstat_inc(p, se.statistics.nr_wakeups_sis_suff_cap);
+ +                                              schedstat_inc(this_rq(), eas_stats.sis_suff_cap);
+ +                                              schedstat_inc(sd, eas_stats.sis_suff_cap);
+ +                                              return target;
+ +                                      }
+ +
+ +                                      if (idle_idx < best_idle_cstate &&
+ +                                          capacity_orig <= best_idle_capacity) {
+ +                                              best_idle_cpu = i;
+ +                                              best_idle_cstate = idle_idx;
+ +                                              best_idle_capacity = capacity_orig;
+ +                                      }
+ +                              }
+ +                      } else {
+ +                              for_each_cpu(i, sched_group_cpus(sg)) {
+ +                                      if (i == target || !idle_cpu(i))
+ +                                              goto next;
+ +                              }
   
- -                      target = cpumask_first_and(sched_group_cpus(sg),
+ +                              target = cpumask_first_and(sched_group_cpus(sg),
                                         tsk_cpus_allowed(p));
- -                      goto done;
+ +                              schedstat_inc(p, se.statistics.nr_wakeups_sis_idle_cpu);
+ +                              schedstat_inc(this_rq(), eas_stats.sis_idle_cpu);
+ +                              schedstat_inc(sd, eas_stats.sis_idle_cpu);
+ +                              goto done;
+ +                      }
   next:
                         sg = sg->next;
                 } while (sg != sd->groups);
         }
+ +
+ +      if (best_idle_cpu >= 0)
+ +              target = best_idle_cpu;
+ +
   done:
+ +      schedstat_inc(p, se.statistics.nr_wakeups_sis_count);
+ +      schedstat_inc(this_rq(), eas_stats.sis_count);
+ +
         return target;
   }
   
   /*
- - * cpu_util returns the amount of capacity of a CPU that is used by CFS
- - * tasks. The unit of the return value must be the one of capacity so we can
- - * compare the utilization with the capacity of the CPU that is available for
- - * CFS task (ie cpu_capacity).
- - *
- - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
- - * recent utilization of currently non-runnable tasks on a CPU. It represents
- - * the amount of utilization of a CPU in the range [0..capacity_orig] where
- - * capacity_orig is the cpu_capacity available at the highest frequency
- - * (arch_scale_freq_capacity()).
- - * The utilization of a CPU converges towards a sum equal to or less than the
- - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
- - * the running time on this CPU scaled by capacity_curr.
+ + * cpu_util_wake: Compute cpu utilization with any contributions from
+ + * the waking task p removed.  check_for_migration() looks for a better CPU of
+ + * rq->curr. For that case we should return cpu util with contributions from
+ + * currently running task p removed.
+ + */
+ +static int cpu_util_wake(int cpu, struct task_struct *p)
+ +{
+ +      unsigned long util, capacity;
+ +
+ +#ifdef CONFIG_SCHED_WALT
+ +      /*
+ +       * WALT does not decay idle tasks in the same manner
+ +       * as PELT, so it makes little sense to subtract task
+ +       * utilization from cpu utilization. Instead just use
+ +       * cpu_util for this case.
+ +       */
+ +      if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
+ +          p->state == TASK_WAKING)
+ +              return cpu_util(cpu);
+ +#endif
+ +      /* Task has no contribution or is new */
+ +      if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
+ +              return cpu_util(cpu);
+ +
+ +      capacity = capacity_orig_of(cpu);
+ +      util = max_t(long, cpu_util(cpu) - task_util(p), 0);
+ +
+ +      return (util >= capacity) ? capacity : util;
+ +}
+ +
+ +static int start_cpu(bool boosted)
+ +{
+ +      struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ +
+ +      return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
+ +}
+ +
+ +static inline int find_best_target(struct task_struct *p, int *backup_cpu,
+ +                                 bool boosted, bool prefer_idle)
+ +{
+ +      unsigned long best_idle_min_cap_orig = ULONG_MAX;
+ +      unsigned long min_util = boosted_task_util(p);
+ +      unsigned long target_capacity = ULONG_MAX;
+ +      unsigned long min_wake_util = ULONG_MAX;
+ +      unsigned long target_max_spare_cap = 0;
+ +      unsigned long best_active_util = ULONG_MAX;
+ +      int best_idle_cstate = INT_MAX;
+ +      struct sched_domain *sd;
+ +      struct sched_group *sg;
+ +      int best_active_cpu = -1;
+ +      int best_idle_cpu = -1;
+ +      int target_cpu = -1;
+ +      int cpu, i;
+ +      struct task_struct *curr_tsk;
+ +
+ +      *backup_cpu = -1;
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts);
+ +      schedstat_inc(this_rq(), eas_stats.fbt_attempts);
+ +
+ +      /* Find start CPU based on boost value */
+ +      cpu = start_cpu(boosted);
+ +      if (cpu < 0) {
+ +              schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu);
+ +              schedstat_inc(this_rq(), eas_stats.fbt_no_cpu);
+ +              return -1;
+ +      }
+ +
+ +      /* Find SD for the start CPU */
+ +      sd = rcu_dereference(per_cpu(sd_ea, cpu));
+ +      if (!sd) {
+ +              schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd);
+ +              schedstat_inc(this_rq(), eas_stats.fbt_no_sd);
+ +              return -1;
+ +      }
+ +
+ +      /* Scan CPUs in all SDs */
+ +      sg = sd->groups;
+ +      do {
+ +              for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+ +                      unsigned long capacity_curr = capacity_curr_of(i);
+ +                      unsigned long capacity_orig = capacity_orig_of(i);
+ +                      unsigned long wake_util, new_util;
+ +
+ +                      if (!cpu_online(i))
+ +                              continue;
+ +
+ +                      if (walt_cpu_high_irqload(i))
+ +                              continue;
+ +
+ +                      /*
+ +                       * p's blocked utilization is still accounted for on prev_cpu
+ +                       * so prev_cpu will receive a negative bias due to the double
+ +                       * accounting. However, the blocked utilization may be zero.
+ +                       */
+ +                      wake_util = cpu_util_wake(i, p);
+ +                      new_util = wake_util + task_util(p);
+ +
+ +                      /*
+ +                       * Ensure minimum capacity to grant the required boost.
+ +                       * The target CPU can be already at a capacity level higher
+ +                       * than the one required to boost the task.
+ +                       */
+ +                      new_util = max(min_util, new_util);
+ +                      if (new_util > capacity_orig)
+ +                              continue;
+ +
+ +                      /*
+ +                       * Case A) Latency sensitive tasks
+ +                       *
+ +                       * Unconditionally favoring tasks that prefer idle CPU to
+ +                       * improve latency.
+ +                       *
+ +                       * Looking for:
+ +                       * - an idle CPU, whatever its idle_state is, since
+ +                       *   the first CPUs we explore are more likely to be
+ +                       *   reserved for latency sensitive tasks.
+ +                       * - a non idle CPU where the task fits in its current
+ +                       *   capacity and has the maximum spare capacity.
+ +                       * - a non idle CPU with lower contention from other
+ +                       *   tasks and running at the lowest possible OPP.
+ +                       *
+ +                       * The last two goals tries to favor a non idle CPU
+ +                       * where the task can run as if it is "almost alone".
+ +                       * A maximum spare capacity CPU is favoured since
+ +                       * the task already fits into that CPU's capacity
+ +                       * without waiting for an OPP chance.
+ +                       *
+ +                       * The following code path is the only one in the CPUs
+ +                       * exploration loop which is always used by
+ +                       * prefer_idle tasks. It exits the loop with wither a
+ +                       * best_active_cpu or a target_cpu which should
+ +                       * represent an optimal choice for latency sensitive
+ +                       * tasks.
+ +                       */
+ +                      if (prefer_idle) {
+ +
+ +                              /*
+ +                               * Case A.1: IDLE CPU
+ +                               * Return the first IDLE CPU we find.
+ +                               */
+ +                              if (idle_cpu(i)) {
+ +                                      schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle);
+ +                                      schedstat_inc(this_rq(), eas_stats.fbt_pref_idle);
+ +
+ +                                      trace_sched_find_best_target(p,
+ +                                                      prefer_idle, min_util,
+ +                                                      cpu, best_idle_cpu,
+ +                                                      best_active_cpu, i);
+ +
+ +                                      return i;
+ +                              }
+ +
+ +                              /*
+ +                               * Case A.2: Target ACTIVE CPU
+ +                               * Favor CPUs with max spare capacity.
+ +                               */
+ +                              if ((capacity_curr > new_util) &&
+ +                                      (capacity_orig - new_util > target_max_spare_cap)) {
+ +                                      target_max_spare_cap = capacity_orig - new_util;
+ +                                      target_cpu = i;
+ +                                      continue;
+ +                              }
+ +                              if (target_cpu != -1)
+ +                                      continue;
+ +
+ +
+ +                              /*
+ +                               * Case A.3: Backup ACTIVE CPU
+ +                               * Favor CPUs with:
+ +                               * - lower utilization due to other tasks
+ +                               * - lower utilization with the task in
+ +                               */
+ +                              if (wake_util > min_wake_util)
+ +                                      continue;
+ +                              if (new_util > best_active_util)
+ +                                      continue;
+ +                              min_wake_util = wake_util;
+ +                              best_active_util = new_util;
+ +                              best_active_cpu = i;
+ +                              continue;
+ +                      }
+ +
+ +                      /*
+ +                       * Enforce EAS mode
+ +                       *
+ +                       * For non latency sensitive tasks, skip CPUs that
+ +                       * will be overutilized by moving the task there.
+ +                       *
+ +                       * The goal here is to remain in EAS mode as long as
+ +                       * possible at least for !prefer_idle tasks.
+ +                       */
+ +                      if ((new_util * capacity_margin) >
+ +                          (capacity_orig * SCHED_CAPACITY_SCALE))
+ +                              continue;
+ +
+ +                      /*
+ +                       * Case B) Non latency sensitive tasks on IDLE CPUs.
+ +                       *
+ +                       * Find an optimal backup IDLE CPU for non latency
+ +                       * sensitive tasks.
+ +                       *
+ +                       * Looking for:
+ +                       * - minimizing the capacity_orig,
+ +                       *   i.e. preferring LITTLE CPUs
+ +                       * - favoring shallowest idle states
+ +                       *   i.e. avoid to wakeup deep-idle CPUs
+ +                       *
+ +                       * The following code path is used by non latency
+ +                       * sensitive tasks if IDLE CPUs are available. If at
+ +                       * least one of such CPUs are available it sets the
+ +                       * best_idle_cpu to the most suitable idle CPU to be
+ +                       * selected.
+ +                       *
+ +                       * If idle CPUs are available, favour these CPUs to
+ +                       * improve performances by spreading tasks.
+ +                       * Indeed, the energy_diff() computed by the caller
+ +                       * will take care to ensure the minimization of energy
+ +                       * consumptions without affecting performance.
+ +                       */
+ +                      if (idle_cpu(i)) {
+ +                              int idle_idx = idle_get_state_idx(cpu_rq(i));
+ +
+ +                              /* Select idle CPU with lower cap_orig */
+ +                              if (capacity_orig > best_idle_min_cap_orig)
+ +                                      continue;
+ +
+ +                              /*
+ +                               * Skip CPUs in deeper idle state, but only
+ +                               * if they are also less energy efficient.
+ +                               * IOW, prefer a deep IDLE LITTLE CPU vs a
+ +                               * shallow idle big CPU.
+ +                               */
+ +                              if (sysctl_sched_cstate_aware &&
+ +                                  best_idle_cstate <= idle_idx)
+ +                                      continue;
+ +
+ +                              /* Keep track of best idle CPU */
+ +                              best_idle_min_cap_orig = capacity_orig;
+ +                              best_idle_cstate = idle_idx;
+ +                              best_idle_cpu = i;
+ +                              continue;
+ +                      }
+ +
+ +                      /*
+ +                       * Case C) Non latency sensitive tasks on ACTIVE CPUs.
+ +                       *
+ +                       * Pack tasks in the most energy efficient capacities.
+ +                       *
+ +                       * This task packing strategy prefers more energy
+ +                       * efficient CPUs (i.e. pack on smaller maximum
+ +                       * capacity CPUs) while also trying to spread tasks to
+ +                       * run them all at the lower OPP.
+ +                       *
+ +                       * This assumes for example that it's more energy
+ +                       * efficient to run two tasks on two CPUs at a lower
+ +                       * OPP than packing both on a single CPU but running
+ +                       * that CPU at an higher OPP.
+ +                       *
+ +                       * Thus, this case keep track of the CPU with the
+ +                       * smallest maximum capacity and highest spare maximum
+ +                       * capacity.
+ +                       */
+ +
+ +                      /* Favor CPUs with smaller capacity */
+ +                      if (capacity_orig > target_capacity)
+ +                              continue;
+ +
+ +                      /* Favor CPUs with maximum spare capacity */
+ +                      if ((capacity_orig - new_util) < target_max_spare_cap)
+ +                              continue;
+ +
+ +                      target_max_spare_cap = capacity_orig - new_util;
+ +                      target_capacity = capacity_orig;
+ +                      target_cpu = i;
+ +              }
+ +
+ +      } while (sg = sg->next, sg != sd->groups);
+ +
+ +      /*
+ +       * For non latency sensitive tasks, cases B and C in the previous loop,
+ +       * we pick the best IDLE CPU only if we was not able to find a target
+ +       * ACTIVE CPU.
+ +       *
+ +       * Policies priorities:
+ +       *
+ +       * - prefer_idle tasks:
+ +       *
+ +       *   a) IDLE CPU available, we return immediately
+ +       *   b) ACTIVE CPU where task fits and has the bigger maximum spare
+ +       *      capacity (i.e. target_cpu)
+ +       *   c) ACTIVE CPU with less contention due to other tasks
+ +       *      (i.e. best_active_cpu)
+ +       *
+ +       * - NON prefer_idle tasks:
+ +       *
+ +       *   a) ACTIVE CPU: target_cpu
+ +       *   b) IDLE CPU: best_idle_cpu
+ +       */
+ +      if (target_cpu != -1 && !idle_cpu(target_cpu) &&
+ +                      best_idle_cpu != -1) {
+ +              curr_tsk = READ_ONCE(cpu_rq(target_cpu)->curr);
+ +              if (curr_tsk && schedtune_task_boost_rcu_locked(curr_tsk)) {
+ +                      target_cpu = best_idle_cpu;
+ +              }
+ +      }
+ +
+ +      if (target_cpu == -1)
+ +              target_cpu = prefer_idle
+ +                      ? best_active_cpu
+ +                      : best_idle_cpu;
+ +      else
+ +              *backup_cpu = prefer_idle
+ +              ? best_active_cpu
+ +              : best_idle_cpu;
+ +
+ +      trace_sched_find_best_target(p, prefer_idle, min_util, cpu,
+ +                                   best_idle_cpu, best_active_cpu,
+ +                                   target_cpu);
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_fbt_count);
+ +      schedstat_inc(this_rq(), eas_stats.fbt_count);
+ +
+ +      return target_cpu;
+ +}
+ +
+ +/*
+ + * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
+ + * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
    *
- - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
- - * higher than capacity_orig because of unfortunate rounding in
- - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
- - * the average stabilizes with the new running time. We need to check that the
- - * utilization stays within the range of [0..capacity_orig] and cap it if
- - * necessary. Without utilization capping, a group could be seen as overloaded
- - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
- - * available capacity. We allow utilization to overshoot capacity_curr (but not
- - * capacity_orig) as it useful for predicting the capacity required after task
- - * migrations (scheduler-driven DVFS).
+ + * In that case WAKE_AFFINE doesn't make sense and we'll let
+ + * BALANCE_WAKE sort things out.
    */
- -static int cpu_util(int cpu)
+ +static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
+ +{
+ +      long min_cap, max_cap;
+ +
+ +      min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
+ +      max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+ +
+ +      /* Minimum capacity is close to max, no need to abort wake_affine */
+ +      if (max_cap - min_cap < max_cap >> 3)
+ +              return 0;
+ +
+ +      /* Bring task utilization in sync with prev_cpu */
+ +      sync_entity_load_avg(&p->se);
+ +
+ +      return min_cap * 1024 < task_util(p) * capacity_margin;
+ +}
+ +
+ +static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync)
   {
- -      unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
- -      unsigned long capacity = capacity_orig_of(cpu);
+ +      struct sched_domain *sd;
+ +      int target_cpu = prev_cpu, tmp_target, tmp_backup;
+ +      bool boosted, prefer_idle;
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_secb_attempts);
+ +      schedstat_inc(this_rq(), eas_stats.secb_attempts);
+ +
+ +      if (sysctl_sched_sync_hint_enable && sync) {
+ +              int cpu = smp_processor_id();
+ +
+ +              if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+ +                      schedstat_inc(p, se.statistics.nr_wakeups_secb_sync);
+ +                      schedstat_inc(this_rq(), eas_stats.secb_sync);
+ +                      return cpu;
+ +              }
+ +      }
+ +
+ +      rcu_read_lock();
+ +#ifdef CONFIG_CGROUP_SCHEDTUNE
+ +      boosted = schedtune_task_boost(p) > 0;
+ +      prefer_idle = schedtune_prefer_idle(p) > 0;
+ +#else
+ +      boosted = get_sysctl_sched_cfs_boost() > 0;
+ +      prefer_idle = 0;
+ +#endif
+ +
+ +      sync_entity_load_avg(&p->se);
+ +
+ +      sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
+ +      /* Find a cpu with sufficient capacity */
+ +      tmp_target = find_best_target(p, &tmp_backup, boosted, prefer_idle);
+ +
+ +      if (!sd)
+ +              goto unlock;
+ +      if (tmp_target >= 0) {
+ +              target_cpu = tmp_target;
+ +              if ((boosted || prefer_idle) && idle_cpu(target_cpu)) {
+ +                      schedstat_inc(p, se.statistics.nr_wakeups_secb_idle_bt);
+ +                      schedstat_inc(this_rq(), eas_stats.secb_idle_bt);
+ +                      goto unlock;
+ +              }
+ +      }
+ +
+ +      if (target_cpu != prev_cpu) {
+ +              int delta = 0;
+ +              struct energy_env eenv = {
+ +                      .util_delta     = task_util(p),
+ +                      .src_cpu        = prev_cpu,
+ +                      .dst_cpu        = target_cpu,
+ +                      .task           = p,
+ +                      .trg_cpu        = target_cpu,
+ +              };
+ +
+ +
+ +#ifdef CONFIG_SCHED_WALT
+ +              if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
+ +                      p->state == TASK_WAKING)
+ +                      delta = task_util(p);
+ +#endif
+ +              /* Not enough spare capacity on previous cpu */
+ +              if (__cpu_overutilized(prev_cpu, delta)) {
+ +                      schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap);
+ +                      schedstat_inc(this_rq(), eas_stats.secb_insuff_cap);
+ +                      goto unlock;
+ +              }
+ +
+ +              if (energy_diff(&eenv) >= 0) {
+ +                      /* No energy saving for target_cpu, try backup */
+ +                      target_cpu = tmp_backup;
+ +                      eenv.dst_cpu = target_cpu;
+ +                      eenv.trg_cpu = target_cpu;
+ +                      if (tmp_backup < 0 ||
+ +                          tmp_backup == prev_cpu ||
+ +                          energy_diff(&eenv) >= 0) {
+ +                              schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav);
+ +                              schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav);
+ +                              target_cpu = prev_cpu;
+ +                              goto unlock;
+ +                      }
+ +              }
+ +
+ +              schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav);
+ +              schedstat_inc(this_rq(), eas_stats.secb_nrg_sav);
+ +              goto unlock;
+ +      }
+ +
+ +      schedstat_inc(p, se.statistics.nr_wakeups_secb_count);
+ +      schedstat_inc(this_rq(), eas_stats.secb_count);
+ +
+ +unlock:
+ +      rcu_read_unlock();
   
- -      return (util >= capacity) ? capacity : util;
+ +      return target_cpu;
   }
   
   /*
@@@ -7962,8 -5066,7 +7970,8 @@@
    * preempt must be disabled.
    */
   static int
- -select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
+ +select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags,
+ +                  int sibling_count_hint)
   {
         struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
         int cpu = smp_processor_id();
@@@ -7971,29 -5074,8 +7979,29 @@@
         int want_affine = 0;
         int sync = wake_flags & WF_SYNC;
   
- -      if (sd_flag & SD_BALANCE_WAKE)
- -              want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+ +#ifdef CONFIG_SCHED_HMP
+ +      return select_best_cpu(p, prev_cpu, 0, sync);
+ +#endif
+ +
+ +      if (sd_flag & SD_BALANCE_WAKE) {
+ +              int _wake_cap = wake_cap(p, cpu, prev_cpu);
+ +
+ +              if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+ +                      bool about_to_idle = (cpu_rq(cpu)->nr_running < 2);
+ +
+ +                      if (sysctl_sched_sync_hint_enable && sync &&
+ +                          !_wake_cap && about_to_idle)
+ +                              return cpu;
+ +              }
+ +
+ +              record_wakee(p);
+ +              want_affine = !wake_wide(p, sibling_count_hint) &&
+ +                            !_wake_cap &&
+ +                            cpumask_test_cpu(cpu, &p->cpus_allowed);
+ +      }
+ +
+ +      if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
+ +              return select_energy_cpu_brute(p, prev_cpu, sync);
   
         rcu_read_lock();
         for_each_domain(cpu, tmp) {
@@@ -8018,25 -5100,47 +8026,25 @@@
   
         if (affine_sd) {
                 sd = NULL; /* Prefer wake_affine over balance flags */
- -              if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+ +              if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
                         new_cpu = cpu;
         }
   
+ +      if (sd && !(sd_flag & SD_BALANCE_FORK)) {
+ +              /*
+ +               * We're going to need the task's util for capacity_spare_wake
+ +               * in find_idlest_group. Sync it up to prev_cpu's
+ +               * last_update_time.
+ +               */
+ +              sync_entity_load_avg(&p->se);
+ +      }
+ +
         if (!sd) {
                 if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
- -                      new_cpu = select_idle_sibling(p, new_cpu);
- -
- -      } else while (sd) {
- -              struct sched_group *group;
- -              int weight;
- -
- -              if (!(sd->flags & sd_flag)) {
- -                      sd = sd->child;
- -                      continue;
- -              }
- -
- -              group = find_idlest_group(sd, p, cpu, sd_flag);
- -              if (!group) {
- -                      sd = sd->child;
- -                      continue;
- -              }
- -
- -              new_cpu = find_idlest_cpu(group, p, cpu);
- -              if (new_cpu == -1 || new_cpu == cpu) {
- -                      /* Now try balancing at a lower domain level of cpu */
- -                      sd = sd->child;
- -                      continue;
- -              }
+ +                      new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
   
- -              /* Now try balancing at a lower domain level of new_cpu */
- -              cpu = new_cpu;
- -              weight = sd->span_weight;
- -              sd = NULL;
- -              for_each_domain(cpu, tmp) {
- -                      if (weight <= tmp->span_weight)
- -                              break;
- -                      if (tmp->flags & sd_flag)
- -                              sd = tmp;
- -              }
- -              /* while loop will break here if sd == NULL */
+ +      } else {
+ +              new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
         }
         rcu_read_unlock();
   
@@@ -8071,8 -5175,6 +8079,8 @@@ static void task_dead_fair(struct task_
   {
         remove_entity_load_avg(&p->se);
   }
+ +#else
+ +#define task_fits_max(p, cpu) true
   #endif /* CONFIG_SMP */
   
   static unsigned long
@@@ -8319,8 -5421,6 +8327,8 @@@ again
         if (hrtick_enabled(rq))
                 hrtick_start_fair(rq, p);
   
+ +      rq->misfit_task = !task_fits_max(p, rq->cpu);
+ +
         return p;
   simple:
         cfs_rq = &rq->cfs;
@@@ -8342,12 -5442,9 +8350,12 @@@
         if (hrtick_enabled(rq))
                 hrtick_start_fair(rq, p);
   
+ +      rq->misfit_task = !task_fits_max(p, rq->cpu);
+ +
         return p;
   
   idle:
+ +      rq->misfit_task = 0;
         /*
          * This is OK, because current is on_cpu, which avoids it being picked
          * for load-balance and preemption/IRQs are still disabled avoiding
@@@ -8560,21 -5657,10 +8568,21 @@@ static unsigned long __read_mostly max_
   
   enum fbq_type { regular, remote, all };
   
+ +enum group_type {
+ +      group_other = 0,
+ +      group_misfit_task,
+ +      group_imbalanced,
+ +      group_overloaded,
+ +};
+ +
   #define LBF_ALL_PINNED        0x01
   #define LBF_NEED_BREAK        0x02
   #define LBF_DST_PINNED  0x04
   #define LBF_SOME_PINNED       0x08
+ +#define LBF_BIG_TASK_ACTIVE_BALANCE 0x80
+ +#define LBF_IGNORE_BIG_TASKS 0x100
+ +#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
+ +#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400
   
   struct lb_env {
         struct sched_domain     *sd;
@@@ -8589,11 -5675,8 +8597,11 @@@
         int                     new_dst_cpu;
         enum cpu_idle_type      idle;
         long                    imbalance;
+ +      unsigned int            src_grp_nr_running;
         /* The set of CPUs under consideration for load-balancing */
         struct cpumask          *cpus;
+ +      unsigned int            busiest_grp_capacity;
+ +      unsigned int            busiest_nr_running;
   
         unsigned int            flags;
   
@@@ -8602,9 -5685,7 +8610,9 @@@
         unsigned int            loop_max;
   
         enum fbq_type           fbq_type;
+ +      enum group_type         busiest_group_type;
         struct list_head        tasks;
+ +      enum sched_boost_policy boost_policy;
   };
   
   /*
@@@ -8702,7 -5783,6 +8710,7 @@@ stati
   int can_migrate_task(struct task_struct *p, struct lb_env *env)
   {
         int tsk_cache_hot;
+ +      int twf, group_cpus;
   
         lockdep_assert_held(&env->src_rq->lock);
   
@@@ -8749,39 -5829,6 +8757,39 @@@
         /* Record that we found atleast one task that could run on dst_cpu */
         env->flags &= ~LBF_ALL_PINNED;
   
+ +      if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu)) {
+ +              if (nr_big_tasks(env->src_rq) && !is_big_task(p))
+ +                      return 0;
+ +
+ +              if (env->boost_policy == SCHED_BOOST_ON_BIG &&
+ +                                      !task_sched_boost(p))
+ +                      return 0;
+ +      }
+ +
+ +      twf = task_will_fit(p, env->dst_cpu);
+ +
+ +      /*
+ +       * Attempt to not pull tasks that don't fit. We may get lucky and find
+ +       * one that actually fits.
+ +       */
+ +      if (env->flags & LBF_IGNORE_BIG_TASKS && !twf)
+ +              return 0;
+ +
+ +      if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS &&
+ +          !preferred_cluster(rq_cluster(cpu_rq(env->dst_cpu)), p))
+ +              return 0;
+ +
+ +      /*
+ +       * Group imbalance can sometimes cause work to be pulled across groups
+ +       * even though the group could have managed the imbalance on its own.
+ +       * Prevent inter-cluster migrations for big tasks when the number of
+ +       * tasks is lower than the capacity of the group.
+ +       */
+ +      group_cpus = DIV_ROUND_UP(env->busiest_grp_capacity,
+ +                                               SCHED_CAPACITY_SCALE);
+ +      if (!twf && env->busiest_nr_running <= group_cpus)
+ +              return 0;
+ +
         if (task_running(env->src_rq, p)) {
                 schedstat_inc(p, se.statistics.nr_failed_migrations_running);
                 return 0;
@@@ -8789,16 -5836,15 +8797,16 @@@
   
         /*
          * Aggressive migration if:
- -       * 1) destination numa is preferred
- -       * 2) task is cache cold, or
- -       * 3) too many balance attempts have failed.
+ +       * 1) IDLE or NEWLY_IDLE balance.
+ +       * 2) destination numa is preferred
+ +       * 3) task is cache cold, or
+ +       * 4) too many balance attempts have failed.
          */
         tsk_cache_hot = migrate_degrades_locality(p, env);
         if (tsk_cache_hot == -1)
                 tsk_cache_hot = task_hot(p, env);
   
- -      if (tsk_cache_hot <= 0 ||
+ +      if (env->idle != CPU_NOT_IDLE || tsk_cache_hot <= 0 ||
             env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
                 if (tsk_cache_hot == 1) {
                         schedstat_inc(env->sd, lb_hot_gained[env->idle]);
@@@ -8818,13 -5864,9 +8826,13 @@@ static void detach_task(struct task_str
   {
         lockdep_assert_held(&env->src_rq->lock);
   
- -      deactivate_task(env->src_rq, p, 0);
         p->on_rq = TASK_ON_RQ_MIGRATING;
+ +      deactivate_task(env->src_rq, p, 0);
+ +      double_lock_balance(env->src_rq, env->dst_rq);
         set_task_cpu(p, env->dst_cpu);
+ +      if (task_in_related_thread_group(p))
+ +              env->flags |= LBF_MOVED_RELATED_THREAD_GROUP_TASK;
+ +      double_unlock_balance(env->src_rq, env->dst_rq);
   }
   
   /*
@@@ -8852,7 -5894,6 +8860,7 @@@ static struct task_struct *detach_one_t
                  * inside detach_tasks().
                  */
                 schedstat_inc(env->sd, lb_gained[env->idle]);
+ +
                 return p;
         }
         return NULL;
@@@ -8872,20 -5913,12 +8880,20 @@@ static int detach_tasks(struct lb_env *
         struct task_struct *p;
         unsigned long load;
         int detached = 0;
+ +      int orig_loop = env->loop;
   
         lockdep_assert_held(&env->src_rq->lock);
   
         if (env->imbalance <= 0)
                 return 0;
   
+ +      if (!same_cluster(env->dst_cpu, env->src_cpu))
+ +              env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
+ +
+ +      if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu))
+ +              env->flags |= LBF_IGNORE_BIG_TASKS;
+ +
+ +redo:
         while (!list_empty(tasks)) {
                 /*
                  * We don't want to steal all, otherwise we may be treated likewise,
@@@ -8947,15 -5980,6 +8955,15 @@@ next
                 list_move_tail(&p->se.group_node, tasks);
         }
   
+ +      if (env->flags & (LBF_IGNORE_BIG_TASKS |
+ +                      LBF_IGNORE_PREFERRED_CLUSTER_TASKS) && !detached) {
+ +              tasks = &env->src_rq->cfs_tasks;
+ +              env->flags &= ~(LBF_IGNORE_BIG_TASKS |
+ +                              LBF_IGNORE_PREFERRED_CLUSTER_TASKS);
+ +              env->loop = orig_loop;
+ +              goto redo;
+ +      }
+ +
         /*
          * Right now, this is one of only two places we collect this stat
          * so we can safely collect detach_one_task() stats here rather
@@@ -8974,8 -5998,8 +8982,8 @@@ static void attach_task(struct rq *rq, 
         lockdep_assert_held(&rq->lock);
   
         BUG_ON(task_rq(p) != rq);
- -      p->on_rq = TASK_ON_RQ_QUEUED;
         activate_task(rq, p, 0);
+ +      p->on_rq = TASK_ON_RQ_QUEUED;
         check_preempt_curr(rq, p, 0);
   }
   
@@@ -9030,13 -6054,8 +9038,13 @@@ static void update_blocked_averages(in
                 if (throttled_hierarchy(cfs_rq))
                         continue;
   
- -              if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ +              if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq,
+ +                                         true))
                         update_tg_load_avg(cfs_rq, 0);
+ +
+ +              /* Propagate pending load changes to the parent */
+ +              if (cfs_rq->tg->se[cpu])
+ +                      update_load_avg(cfs_rq->tg->se[cpu], 0);
         }
         raw_spin_unlock_irqrestore(&rq->lock, flags);
   }
@@@ -9096,7 -6115,7 +9104,7 @@@ static inline void update_blocked_avera
   
         raw_spin_lock_irqsave(&rq->lock, flags);
         update_rq_clock(rq);
- -      update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ +      update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
   }
   
@@@ -9108,6 -6127,12 +9116,6 @@@ static unsigned long task_h_load(struc
   
   /********** Helpers for find_busiest_group ************************/
   
- -enum group_type {
- -      group_other = 0,
- -      group_imbalanced,
- -      group_overloaded,
- -};
- -
   /*
    * sg_lb_stats - stats of a sched_group required for load_balancing
    */
@@@ -9119,15 -6144,10 +9127,15 @@@ struct sg_lb_stats 
         unsigned long group_capacity;
         unsigned long group_util; /* Total utilization of the group */
         unsigned int sum_nr_running; /* Nr tasks running in the group */
+ +#ifdef CONFIG_SCHED_HMP
+ +      unsigned long sum_nr_big_tasks;
+ +      u64 group_cpu_load; /* Scaled load of all CPUs of the group */
+ +#endif
         unsigned int idle_cpus;
         unsigned int group_weight;
         enum group_type group_type;
         int group_no_capacity;
+ +      int group_misfit_task; /* A cpu has a task too big for its capacity */
   #ifdef CONFIG_NUMA_BALANCING
         unsigned int nr_numa_running;
         unsigned int nr_preferred_running;
@@@ -9166,64 -6186,10 +9174,64 @@@ static inline void init_sd_lb_stats(str
                         .avg_load = 0UL,
                         .sum_nr_running = 0,
                         .group_type = group_other,
+ +#ifdef CONFIG_SCHED_HMP
+ +                      .sum_nr_big_tasks = 0UL,
+ +                      .group_cpu_load = 0ULL,
+ +#endif
                 },
         };
   }
   
+ +#ifdef CONFIG_SCHED_HMP
+ +
+ +static int
+ +bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)
+ +{
+ +      int local_cpu, busiest_cpu;
+ +      int local_capacity, busiest_capacity;
+ +      int local_pwr_cost, busiest_pwr_cost;
+ +      int nr_cpus;
+ +      int boost = sched_boost();
+ +
+ +      if (!sysctl_sched_restrict_cluster_spill ||
+ +              boost == FULL_THROTTLE_BOOST || boost == CONSERVATIVE_BOOST)
+ +              return 0;
+ +
+ +      local_cpu = group_first_cpu(sds->local);
+ +      busiest_cpu = group_first_cpu(sds->busiest);
+ +
+ +      local_capacity = cpu_max_possible_capacity(local_cpu);
+ +      busiest_capacity = cpu_max_possible_capacity(busiest_cpu);
+ +
+ +      local_pwr_cost = cpu_max_power_cost(local_cpu);
+ +      busiest_pwr_cost = cpu_max_power_cost(busiest_cpu);
+ +
+ +      if (local_pwr_cost <= busiest_pwr_cost)
+ +              return 0;
+ +
+ +      if (local_capacity > busiest_capacity &&
+ +                      sds->busiest_stat.sum_nr_big_tasks)
+ +              return 0;
+ +
+ +      nr_cpus = cpumask_weight(sched_group_cpus(sds->busiest));
+ +      if ((sds->busiest_stat.group_cpu_load < nr_cpus * sched_spill_load) &&
+ +              (sds->busiest_stat.sum_nr_running <
+ +                      nr_cpus * sysctl_sched_spill_nr_run))
+ +              return 1;
+ +
+ +      return 0;
+ +}
+ +
+ +#else /* CONFIG_SCHED_HMP */
+ +
+ +static inline int
+ +bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)
+ +{
+ +      return 0;
+ +}
+ +
+ +#endif        /* CONFIG_SCHED_HMP */
+ +
   /**
    * get_sd_load_idx - Obtain the load index for a given sched domain.
    * @sd: The sched_domain whose load_idx is to be obtained.
@@@ -9273,58 -6239,19 +9281,58 @@@ static unsigned long scale_rt_capacity(
   
         used = div_u64(avg, total);
   
+ +      /*
+ +       * deadline bandwidth is defined at system level so we must
+ +       * weight this bandwidth with the max capacity of the system.
+ +       * As a reminder, avg_bw is 20bits width and
+ +       * scale_cpu_capacity is 10 bits width
+ +       */
+ +      used += div_u64(rq->dl.avg_bw, arch_scale_cpu_capacity(NULL, cpu));
+ +
         if (likely(used < SCHED_CAPACITY_SCALE))
                 return SCHED_CAPACITY_SCALE - used;
   
         return 1;
   }
   
+ +void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
+ +{
+ +      raw_spin_lock_init(&mcc->lock);
+ +      mcc->val = 0;
+ +      mcc->cpu = -1;
+ +}
+ +
   static void update_cpu_capacity(struct sched_domain *sd, int cpu)
   {
         unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
         struct sched_group *sdg = sd->groups;
+ +      struct max_cpu_capacity *mcc;
+ +      unsigned long max_capacity;
+ +      int max_cap_cpu;
+ +      unsigned long flags;
   
         cpu_rq(cpu)->cpu_capacity_orig = capacity;
   
+ +      mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+ +
+ +      raw_spin_lock_irqsave(&mcc->lock, flags);
+ +      max_capacity = mcc->val;
+ +      max_cap_cpu = mcc->cpu;
+ +
+ +      if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+ +          (max_capacity < capacity)) {
+ +              mcc->val = capacity;
+ +              mcc->cpu = cpu;
+ +#ifdef CONFIG_SCHED_DEBUG
+ +              raw_spin_unlock_irqrestore(&mcc->lock, flags);
+ +              printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
+ +                              cpu, capacity);
+ +              goto skip_unlock;
+ +#endif
+ +      }
+ +      raw_spin_unlock_irqrestore(&mcc->lock, flags);
+ +
+ +skip_unlock: __attribute__ ((unused));
         capacity *= scale_rt_capacity(cpu);
         capacity >>= SCHED_CAPACITY_SHIFT;
   
@@@ -9333,15 -6260,13 +9341,15 @@@
   
         cpu_rq(cpu)->cpu_capacity = capacity;
         sdg->sgc->capacity = capacity;
+ +      sdg->sgc->max_capacity = capacity;
+ +      sdg->sgc->min_capacity = capacity;
   }
   
   void update_group_capacity(struct sched_domain *sd, int cpu)
   {
         struct sched_domain *child = sd->child;
         struct sched_group *group, *sdg = sd->groups;
- -      unsigned long capacity;
+ +      unsigned long capacity, max_capacity, min_capacity;
         unsigned long interval;
   
         interval = msecs_to_jiffies(sd->balance_interval);
@@@ -9354,8 -6279,6 +9362,8 @@@
         }
   
         capacity = 0;
+ +      max_capacity = 0;
+ +      min_capacity = ULONG_MAX;
   
         if (child->flags & SD_OVERLAP) {
                 /*
@@@ -9367,8 -6290,6 +9375,8 @@@
                         struct sched_group_capacity *sgc;
                         struct rq *rq = cpu_rq(cpu);
   
+ +                      if (cpumask_test_cpu(cpu, cpu_isolated_mask))
+ +                              continue;
                         /*
                          * build_sched_domains() -> init_sched_groups_capacity()
                          * gets here before we've attached the domains to the
@@@ -9382,13 -6303,11 +9390,13 @@@
                          */
                         if (unlikely(!rq->sd)) {
                                 capacity += capacity_of(cpu);
- -                              continue;
+ +                      } else {
+ +                              sgc = rq->sd->groups->sgc;
+ +                              capacity += sgc->capacity;
                         }
   
- -                      sgc = rq->sd->groups->sgc;
- -                      capacity += sgc->capacity;
+ +                      max_capacity = max(capacity, max_capacity);
+ +                      min_capacity = min(capacity, min_capacity);
                 }
         } else  {
                 /*
@@@ -9398,23 -6317,12 +9406,23 @@@
   
                 group = child->groups;
                 do {
- -                      capacity += group->sgc->capacity;
+ +                      struct sched_group_capacity *sgc = group->sgc;
+ +
+ +                      cpumask_t *cpus = sched_group_cpus(group);
+ +
+ +                      /* Revisit this later. This won't work for MT domain */
+ +                      if (!cpu_isolated(cpumask_first(cpus))) {
+ +                              capacity += sgc->capacity;
+ +                              max_capacity = max(sgc->max_capacity, max_capacity);
+ +                              min_capacity = min(sgc->min_capacity, min_capacity);
+ +                      }
                         group = group->next;
                 } while (group != child->groups);
         }
   
         sdg->sgc->capacity = capacity;
+ +      sdg->sgc->max_capacity = max_capacity;
+ +      sdg->sgc->min_capacity = min_capacity;
   }
   
   /*
@@@ -9509,21 -6417,9 +9517,21 @@@ group_is_overloaded(struct lb_env *env
         return false;
   }
   
+ +
+ +/*
+ + * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ + * per-cpu capacity than sched_group ref.
+ + */
+ +static inline bool
+ +group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+ +{
+ +      return sg->sgc->max_capacity + capacity_margin - SCHED_LOAD_SCALE <
+ +                                                      ref->sgc->max_capacity;
+ +}
+ +
   static inline enum
   group_type group_classify(struct sched_group *group,
- -                        struct sg_lb_stats *sgs)
+ +                        struct sg_lb_stats *sgs, struct lb_env *env)
   {
         if (sgs->group_no_capacity)
                 return group_overloaded;
@@@ -9531,44 -6427,9 +9539,44 @@@
         if (sg_imbalanced(group))
                 return group_imbalanced;
   
+ +      if (sgs->group_misfit_task)
+ +              return group_misfit_task;
+ +
         return group_other;
   }
   
+ +#ifdef CONFIG_NO_HZ_COMMON
+ +/*
+ + * idle load balancing data
+ + *  - used by the nohz balance, but we want it available here
+ + *    so that we can see which CPUs have no tick.
+ + */
+ +static struct {
+ +      cpumask_var_t idle_cpus_mask;
+ +      atomic_t nr_cpus;
+ +      unsigned long next_balance;     /* in jiffy units */
+ +} nohz ____cacheline_aligned;
+ +
+ +static inline void update_cpu_stats_if_tickless(struct rq *rq)
+ +{
+ +      /* only called from update_sg_lb_stats when irqs are disabled */
+ +      if (cpumask_test_cpu(rq->cpu, nohz.idle_cpus_mask)) {
+ +              /* rate limit updates to once-per-jiffie at most */
+ +              if (READ_ONCE(jiffies) <= rq->last_load_update_tick)
+ +                      return;
+ +
+ +              raw_spin_lock(&rq->lock);
+ +              update_rq_clock(rq);
+ +              update_idle_cpu_load(rq);
+ +              update_cfs_rq_load_avg(rq->clock_task, &rq->cfs, false);
+ +              raw_spin_unlock(&rq->lock);
+ +      }
+ +}
+ +
+ +#else
+ +static inline void update_cpu_stats_if_tickless(struct rq *rq) { }
+ +#endif
+ +
   /**
    * update_sg_lb_stats - Update sched_group's statistics for load balancing.
    * @env: The load balancing environment.
@@@ -9577,35 -6438,20 +9585,35 @@@
    * @local_group: Does group contain this_cpu.
    * @sgs: variable to hold the statistics for this group.
    * @overload: Indicate more than one runnable task for any CPU.
+ + * @overutilized: Indicate overutilization for any CPU.
    */
   static inline void update_sg_lb_stats(struct lb_env *env,
                         struct sched_group *group, int load_idx,
                         int local_group, struct sg_lb_stats *sgs,
- -                      bool *overload)
+ +                      bool *overload, bool *overutilized)
   {
         unsigned long load;
- -      int i;
+ +      int i, nr_running;
   
         memset(sgs, 0, sizeof(*sgs));
   
         for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
                 struct rq *rq = cpu_rq(i);
   
+ +              trace_sched_cpu_load_lb(cpu_rq(i), idle_cpu(i),
+ +                                   sched_irqload(i),
+ +                                   power_cost(i, 0),
+ +                                   cpu_temp(i));
+ +
+ +              if (cpu_isolated(i))
+ +                      continue;
+ +
+ +              /* if we are entering idle and there are CPUs with
+ +               * their tick stopped, do an update for them
+ +               */
+ +              if (env->idle == CPU_NEWLY_IDLE)
+ +                      update_cpu_stats_if_tickless(rq);
+ +
                 /* Bias balancing toward cpus of our domain */
                 if (local_group)
                         load = target_load(i, load_idx);
@@@ -9616,82 -6462,30 +9624,82 @@@
                 sgs->group_util += cpu_util(i);
                 sgs->sum_nr_running += rq->cfs.h_nr_running;
   
- -              if (rq->nr_running > 1)
+ +              nr_running = rq->nr_running;
+ +              if (nr_running > 1)
                         *overload = true;
   
+ +#ifdef CONFIG_SCHED_HMP
+ +              sgs->sum_nr_big_tasks += rq->hmp_stats.nr_big_tasks;
+ +              sgs->group_cpu_load += cpu_load(i);
+ +#endif
+ +
   #ifdef CONFIG_NUMA_BALANCING
                 sgs->nr_numa_running += rq->nr_numa_running;
                 sgs->nr_preferred_running += rq->nr_preferred_running;
   #endif
                 sgs->sum_weighted_load += weighted_cpuload(i);
- -              if (idle_cpu(i))
+ +              /*
+ +               * No need to call idle_cpu() if nr_running is not 0
+ +               */
+ +              if (!nr_running && idle_cpu(i))
                         sgs->idle_cpus++;
+ +
+ +              if (energy_aware() && cpu_overutilized(i)) {
+ +                      *overutilized = true;
+ +                      if (!sgs->group_misfit_task && rq->misfit_task)
+ +                              sgs->group_misfit_task = capacity_of(i);
+ +              }
         }
   
- -      /* Adjust by relative CPU capacity of the group */
- -      sgs->group_capacity = group->sgc->capacity;
- -      sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
+ +      /* Isolated CPU has no weight */
+ +      if (!group->group_weight) {
+ +              sgs->group_capacity = 0;
+ +              sgs->avg_load = 0;
+ +              sgs->group_no_capacity = 1;
+ +              sgs->group_type = group_other;
+ +              sgs->group_weight = group->group_weight;
+ +      } else {
+ +              /* Adjust by relative CPU capacity of the group */
+ +              sgs->group_capacity = group->sgc->capacity;
+ +              sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) /
+ +                                                      sgs->group_capacity;
+ +
+ +              sgs->group_weight = group->group_weight;
+ +
+ +              sgs->group_no_capacity = group_is_overloaded(env, sgs);
+ +              sgs->group_type = group_classify(group, sgs, env);
+ +      }
   
         if (sgs->sum_nr_running)
                 sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
+ +}
   
- -      sgs->group_weight = group->group_weight;
+ +#ifdef CONFIG_SCHED_HMP
+ +static bool update_sd_pick_busiest_active_balance(struct lb_env *env,
+ +                                                struct sd_lb_stats *sds,
+ +                                                struct sched_group *sg,
+ +                                                struct sg_lb_stats *sgs)
+ +{
+ +      if (env->idle != CPU_NOT_IDLE &&
+ +          cpu_capacity(env->dst_cpu) > group_rq_capacity(sg)) {
+ +              if (sgs->sum_nr_big_tasks >
+ +                              sds->busiest_stat.sum_nr_big_tasks) {
+ +                      env->flags |= LBF_BIG_TASK_ACTIVE_BALANCE;
+ +                      return true;
+ +              }
+ +      }
   
- -      sgs->group_no_capacity = group_is_overloaded(env, sgs);
- -      sgs->group_type = group_classify(group, sgs);
+ +      return false;
+ +}
+ +#else
+ +static bool update_sd_pick_busiest_active_balance(struct lb_env *env,
+ +                                                struct sd_lb_stats *sds,
+ +                                                struct sched_group *sg,
+ +                                                struct sg_lb_stats *sgs)
+ +{
+ +      return false;
   }
+ +#endif
   
   /**
    * update_sd_pick_busiest - return 1 on busiest group
@@@ -9713,42 -6507,15 +9721,42 @@@ static bool update_sd_pick_busiest(stru
   {
         struct sg_lb_stats *busiest = &sds->busiest_stat;
   
+ +      if (update_sd_pick_busiest_active_balance(env, sds, sg, sgs))
+ +              return true;
+ +
         if (sgs->group_type > busiest->group_type)
                 return true;
   
         if (sgs->group_type < busiest->group_type)
                 return false;
   
- -      if (sgs->avg_load <= busiest->avg_load)
- -              return false;
+ +      if (energy_aware()) {
+ +              /*
+ +               * Candidate sg doesn't face any serious load-balance problems
+ +               * so don't pick it if the local sg is already filled up.
+ +               */
+ +              if (sgs->group_type == group_other &&
+ +                  !group_has_capacity(env, &sds->local_stat))
+ +                      return false;
+ +
+ +              if (sgs->avg_load <= busiest->avg_load)
+ +                      return false;
   
+ +              if (!(env->sd->flags & SD_ASYM_CPUCAPACITY))
+ +                      goto asym_packing;
+ +
+ +              /*
+ +               * Candidate sg has no more than one task per CPU and
+ +               * has higher per-CPU capacity. Migrating tasks to less
+ +               * capable CPUs may harm throughput. Maximize throughput,
+ +               * power/energy consequences are not considered.
+ +               */
+ +              if (sgs->sum_nr_running <= sgs->group_weight &&
+ +                  group_smaller_cpu_capacity(sds->local, sg))
+ +                      return false;
+ +      }
+ +
+ +asym_packing:
         /* This is the busiest node in its class. */
         if (!(env->sd->flags & SD_ASYM_PACKING))
                 return true;
@@@ -9799,9 -6566,6 +9807,9 @@@ static inline enum fbq_type fbq_classif
   }
   #endif /* CONFIG_NUMA_BALANCING */
   
+ +#define lb_sd_parent(sd) \
+ +      (sd->parent && sd->parent->groups != sd->parent->groups->next)
+ +
   /**
    * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
    * @env: The load balancing environment.
@@@ -9813,7 -6577,7 +9821,7 @@@ static inline void update_sd_lb_stats(s
         struct sched_group *sg = env->sd->groups;
         struct sg_lb_stats tmp_sgs;
         int load_idx, prefer_sibling = 0;
- -      bool overload = false;
+ +      bool overload = false, overutilized = false;
   
         if (child && child->flags & SD_PREFER_SIBLING)
                 prefer_sibling = 1;
@@@ -9835,7 -6599,7 +9843,7 @@@
                 }
   
                 update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
- -                                              &overload);
+ +                                              &overload, &overutilized);
   
                 if (local_group)
                         goto next_group;
@@@ -9854,24 -6618,12 +9862,24 @@@
                     group_has_capacity(env, &sds->local_stat) &&
                     (sgs->sum_nr_running > 1)) {
                         sgs->group_no_capacity = 1;
- -                      sgs->group_type = group_classify(sg, sgs);
+ +                      sgs->group_type = group_classify(sg, sgs, env);
                 }
   
+ +              /*
+ +               * Ignore task groups with misfit tasks if local group has no
+ +               * capacity or if per-cpu capacity isn't higher.
+ +               */
+ +              if (energy_aware() &&
+ +                  sgs->group_type == group_misfit_task &&
+ +                  (!group_has_capacity(env, &sds->local_stat) ||
+ +                   !group_smaller_cpu_capacity(sg, sds->local)))
+ +                      sgs->group_type = group_other;
+ +
                 if (update_sd_pick_busiest(env, sds, sg, sgs)) {
                         sds->busiest = sg;
                         sds->busiest_stat = *sgs;
+ +                      env->busiest_nr_running = sgs->sum_nr_running;
+ +                      env->busiest_grp_capacity = sgs->group_capacity;
                 }
   
   next_group:
@@@ -9885,23 -6637,10 +9893,23 @@@
         if (env->sd->flags & SD_NUMA)
                 env->fbq_type = fbq_classify_group(&sds->busiest_stat);
   
- -      if (!env->sd->parent) {
+ +      env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+ +
+ +      if (!lb_sd_parent(env->sd)) {
                 /* update overload indicator if we are at root domain */
                 if (env->dst_rq->rd->overload != overload)
                         env->dst_rq->rd->overload = overload;
+ +
+ +              /* Update over-utilization (tipping point, U >= 0) indicator */
+ +              if (energy_aware() && env->dst_rq->rd->overutilized != overutilized) {
+ +                      env->dst_rq->rd->overutilized = overutilized;
+ +                      trace_sched_overutilized(overutilized);
+ +              }
+ +      } else {
+ +              if (energy_aware() && !env->dst_rq->rd->overutilized && overutilized) {
+ +                      env->dst_rq->rd->overutilized = true;
+ +                      trace_sched_overutilized(true);
+ +              }
         }
   
   }
@@@ -10050,24 -6789,6 +10058,24 @@@ static inline void calculate_imbalance(
          */
         if (busiest->avg_load <= sds->avg_load ||
             local->avg_load >= sds->avg_load) {
+ +              if (energy_aware()) {
+ +                      /* Misfitting tasks should be migrated in any case */
+ +                      if (busiest->group_type == group_misfit_task) {
+ +                              env->imbalance = busiest->group_misfit_task;
+ +                              return;
+ +                      }
+ +
+ +                      /*
+ +                       * Busiest group is overloaded, local is not, use the spare
+ +                       * cycles to maximize throughput
+ +                       */
+ +                      if (busiest->group_type == group_overloaded &&
+ +                          local->group_type <= group_misfit_task) {
+ +                              env->imbalance = busiest->load_per_task;
+ +                              return;
+ +                      }
+ +              }
+ +
                 env->imbalance = 0;
                 return fix_small_imbalance(env, sds);
         }
@@@ -10101,11 -6822,6 +10109,11 @@@
                 (sds->avg_load - local->avg_load) * local->group_capacity
         ) / SCHED_CAPACITY_SCALE;
   
+ +      /* Boost imbalance to allow misfit task to be balanced. */
+ +      if (energy_aware() && busiest->group_type == group_misfit_task)
+ +              env->imbalance = max_t(long, env->imbalance,
+ +                                   busiest->group_misfit_task);
+ +
         /*
          * if *imbalance is less than the average load per runnable task
          * there is no guarantee that any tasks will be moved so we'll have
@@@ -10147,10 -6863,6 +10155,10 @@@ static struct sched_group *find_busiest
          * this level.
          */
         update_sd_lb_stats(env, &sds);
+ +
+ +      if (energy_aware() && !env->dst_rq->rd->overutilized)
+ +              goto out_balanced;
+ +
         local = &sds.local_stat;
         busiest = &sds.busiest_stat;
   
@@@ -10163,12 -6875,6 +10171,12 @@@
         if (!sds.busiest || busiest->sum_nr_running == 0)
                 goto out_balanced;
   
+ +      if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
+ +              goto force_balance;
+ +
+ +      if (bail_inter_cluster_balance(env, &sds))
+ +              goto out_balanced;
+ +
         sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
                                                 / sds.total_capacity;
   
@@@ -10180,19 -6886,11 +10188,19 @@@
         if (busiest->group_type == group_imbalanced)
                 goto force_balance;
   
- -      /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
- -      if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) &&
+ +      /*
+ +       * When dst_cpu is idle, prevent SMP nice and/or asymmetric group
+ +       * capacities from resulting in underutilization due to avg_load.
+ +       */
+ +      if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) &&
             busiest->group_no_capacity)
                 goto force_balance;
   
+ +      /* Misfitting tasks should be dealt with regardless of the avg load */
+ +      if (energy_aware() && busiest->group_type == group_misfit_task) {
+ +              goto force_balance;
+ +      }
+ +
         /*
          * If the local group is busier than the selected busiest group
          * don't try and pull any tasks.
@@@ -10216,8 -6914,7 +10224,8 @@@
                  * might end up to just move the imbalance on another group
                  */
                 if ((busiest->group_type != group_overloaded) &&
- -                              (local->idle_cpus <= (busiest->idle_cpus + 1)))
+ +                  (local->idle_cpus <= (busiest->idle_cpus + 1)) &&
+ +                  !group_smaller_cpu_capacity(sds.busiest, sds.local))
                         goto out_balanced;
         } else {
                 /*
@@@ -10229,70 -6926,15 +10237,70 @@@
                         goto out_balanced;
         }
   
- -force_balance:
- -      /* Looks like there is an imbalance. Compute it */
- -      calculate_imbalance(env, &sds);
- -      return sds.busiest;
+ +force_balance:
+ +      env->busiest_group_type = busiest->group_type;
+ +      /* Looks like there is an imbalance. Compute it */
+ +      calculate_imbalance(env, &sds);
+ +      return sds.busiest;
+ +
+ +out_balanced:
+ +      env->imbalance = 0;
+ +      return NULL;
+ +}
+ +
+ +#ifdef CONFIG_SCHED_HMP
+ +static struct rq *find_busiest_queue_hmp(struct lb_env *env,
+ +                                   struct sched_group *group)
+ +{
+ +      struct rq *busiest = NULL, *busiest_big = NULL;
+ +      u64 max_runnable_avg = 0, max_runnable_avg_big = 0;
+ +      int max_nr_big = 0, nr_big;
+ +      bool find_big = !!(env->flags & LBF_BIG_TASK_ACTIVE_BALANCE);
+ +      int i;
+ +      cpumask_t cpus;
+ +
+ +      cpumask_andnot(&cpus, sched_group_cpus(group), cpu_isolated_mask);
+ +
+ +      for_each_cpu(i, &cpus) {
+ +              struct rq *rq = cpu_rq(i);
+ +              u64 cumulative_runnable_avg =
+ +                              rq->hmp_stats.cumulative_runnable_avg;
+ +
+ +              if (!cpumask_test_cpu(i, env->cpus))
+ +                      continue;
+ +
+ +
+ +              if (find_big) {
+ +                      nr_big = nr_big_tasks(rq);
+ +                      if (nr_big > max_nr_big ||
+ +                          (nr_big > 0 && nr_big == max_nr_big &&
+ +                           cumulative_runnable_avg > max_runnable_avg_big)) {
+ +                              max_runnable_avg_big = cumulative_runnable_avg;
+ +                              busiest_big = rq;
+ +                              max_nr_big = nr_big;
+ +                              continue;
+ +                      }
+ +              }
+ +
+ +              if (cumulative_runnable_avg > max_runnable_avg) {
+ +                      max_runnable_avg = cumulative_runnable_avg;
+ +                      busiest = rq;
+ +              }
+ +      }
+ +
+ +      if (busiest_big)
+ +              return busiest_big;
   
- -out_balanced:
- -      env->imbalance = 0;
+ +      env->flags &= ~LBF_BIG_TASK_ACTIVE_BALANCE;
+ +      return busiest;
+ +}
+ +#else
+ +static inline struct rq *find_busiest_queue_hmp(struct lb_env *env,
+ +                                    struct sched_group *group)
+ +{
         return NULL;
   }
+ +#endif
   
   /*
    * find_busiest_queue - find the busiest runqueue among the cpus in group.
@@@ -10304,10 -6946,6 +10312,10 @@@ static struct rq *find_busiest_queue(st
         unsigned long busiest_load = 0, busiest_capacity = 1;
         int i;
   
+ +#ifdef CONFIG_SCHED_HMP
+ +      return find_busiest_queue_hmp(env, group);
+ +#endif
+ +
         for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
                 unsigned long capacity, wl;
                 enum fbq_type rt;
@@@ -10347,8 -6985,7 +10355,8 @@@
                  */
   
                 if (rq->nr_running == 1 && wl > env->imbalance &&
- -                  !check_cpu_capacity(rq, env->sd))
+ +                  !check_cpu_capacity(rq, env->sd) &&
+ +                  env->busiest_group_type != group_misfit_task)
                         continue;
   
                 /*
@@@ -10376,20 -7013,15 +10384,20 @@@
    * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
    * so long as it is large enough.
    */
- -#define MAX_PINNED_INTERVAL   512
+ +#define MAX_PINNED_INTERVAL   16
   
   /* Working cpumask for load_balance and load_balance_newidle. */
   DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
   
+ +#define NEED_ACTIVE_BALANCE_THRESHOLD 10
+ +
   static int need_active_balance(struct lb_env *env)
   {
         struct sched_domain *sd = env->sd;
   
+ +      if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
+ +              return 1;
+ +
         if (env->idle == CPU_NEWLY_IDLE) {
   
                 /*
@@@ -10414,27 -7046,10 +10422,27 @@@
                         return 1;
         }
   
- -      return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
+ +      if (energy_aware() &&
+ +          (capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+ +          ((capacity_orig_of(env->src_cpu) < capacity_orig_of(env->dst_cpu))) &&
+ +                              env->src_rq->cfs.h_nr_running == 1 &&
+ +                              cpu_overutilized(env->src_cpu) &&
+ +                              !cpu_overutilized(env->dst_cpu)) {
+ +                      return 1;
+ +      }
+ +
+ +      return unlikely(sd->nr_balance_failed >
+ +                      sd->cache_nice_tries + NEED_ACTIVE_BALANCE_THRESHOLD);
   }
   
- -static int active_load_balance_cpu_stop(void *data);
+ +static int group_balance_cpu_not_isolated(struct sched_group *sg)
+ +{
+ +      cpumask_t cpus;
+ +
+ +      cpumask_and(&cpus, sched_group_cpus(sg), sched_group_mask(sg));
+ +      cpumask_andnot(&cpus, &cpus, cpu_isolated_mask);
+ +      return cpumask_first(&cpus);
+ +}
   
   static int should_we_balance(struct lb_env *env)
   {
@@@ -10453,8 -7068,7 +10461,8 @@@
         sg_mask = sched_group_mask(sg);
         /* Try to find first idle cpu */
         for_each_cpu_and(cpu, sg_cpus, env->cpus) {
- -              if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+ +              if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu) ||
+ +                  cpu_isolated(cpu))
                         continue;
   
                 balance_cpu = cpu;
@@@ -10462,7 -7076,7 +10470,7 @@@
         }
   
         if (balance_cpu == -1)
- -              balance_cpu = group_balance_cpu(sg);
+ +              balance_cpu = group_balance_cpu_not_isolated(sg);
   
         /*
          * First idle cpu or the first cpu(busiest) in this sched group
@@@ -10479,29 -7093,23 +10487,29 @@@ static int load_balance(int this_cpu, s
                         struct sched_domain *sd, enum cpu_idle_type idle,
                         int *continue_balancing)
   {
- -      int ld_moved, cur_ld_moved, active_balance = 0;
- -      struct sched_domain *sd_parent = sd->parent;
- -      struct sched_group *group;
- -      struct rq *busiest;
+ +      int ld_moved = 0, cur_ld_moved, active_balance = 0;
+ +      struct sched_domain *sd_parent = lb_sd_parent(sd) ? sd->parent : NULL;
+ +      struct sched_group *group = NULL;
+ +      struct rq *busiest = NULL;
         unsigned long flags;
         struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
   
         struct lb_env env = {
- -              .sd             = sd,
- -              .dst_cpu        = this_cpu,
- -              .dst_rq         = this_rq,
- -              .dst_grpmask    = sched_group_cpus(sd->groups),
- -              .idle           = idle,
- -              .loop_break     = sched_nr_migrate_break,
- -              .cpus           = cpus,
- -              .fbq_type       = all,
- -              .tasks          = LIST_HEAD_INIT(env.tasks),
+ +              .sd                     = sd,
+ +              .dst_cpu                = this_cpu,
+ +              .dst_rq                 = this_rq,
+ +              .dst_grpmask            = sched_group_cpus(sd->groups),
+ +              .idle                   = idle,
+ +              .loop_break             = sched_nr_migrate_break,
+ +              .cpus                   = cpus,
+ +              .fbq_type               = all,
+ +              .tasks                  = LIST_HEAD_INIT(env.tasks),
+ +              .imbalance              = 0,
+ +              .flags                  = 0,
+ +              .loop                   = 0,
+ +              .busiest_nr_running     = 0,
+ +              .busiest_grp_capacity   = 0,
+ +              .boost_policy           = sched_boost_policy(),
         };
   
         /*
@@@ -10549,23 -7157,10 +10557,23 @@@ redo
                  * correctly treated as an imbalance.
                  */
                 env.flags |= LBF_ALL_PINNED;
- -              env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
   
   more_balance:
                 raw_spin_lock_irqsave(&busiest->lock, flags);
+ +              update_rq_clock(busiest);
+ +
+ +              /* The world might have changed. Validate assumptions */
+ +              if (busiest->nr_running <= 1) {
+ +                      raw_spin_unlock_irqrestore(&busiest->lock, flags);
+ +                      env.flags &= ~LBF_ALL_PINNED;
+ +                      goto no_move;
+ +              }
+ +
+ +              /*
+ +               * Set loop_max when rq's lock is taken to prevent a race.
+ +               */
+ +              env.loop_max = min(sysctl_sched_nr_migrate,
+ +                                                      busiest->nr_running);
   
                 /*
                  * cur_ld_moved - load moved in current iteration
@@@ -10654,22 -7249,16 +10662,22 @@@
                 }
         }
   
+ +no_move:
         if (!ld_moved) {
- -              schedstat_inc(sd, lb_failed[idle]);
+ +              if (!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
+ +                      schedstat_inc(sd, lb_failed[idle]);
+ +
                 /*
                  * Increment the failure counter only on periodic balance.
                  * We do not want newidle balance, which can be very
                  * frequent, pollute the failure counter causing
                  * excessive cache_hot migrations and active balances.
                  */
- -              if (idle != CPU_NEWLY_IDLE)
- -                      sd->nr_balance_failed++;
+ +              if (idle != CPU_NEWLY_IDLE &&
+ +                  !(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE)) {
+ +                      if (env.src_grp_nr_running > 1)
+ +                              sd->nr_balance_failed++;
+ +              }
   
                 if (need_active_balance(&env)) {
                         raw_spin_lock_irqsave(&busiest->lock, flags);
@@@ -10691,8 -7280,7 +10699,8 @@@
                          * ->active_balance_work.  Once set, it's cleared
                          * only after active load balance is finished.
                          */
- -                      if (!busiest->active_balance) {
+ +                      if (!busiest->active_balance &&
+ +                          !cpu_isolated(cpu_of(busiest))) {
                                 busiest->active_balance = 1;
                                 busiest->push_cpu = this_cpu;
                                 active_balance = 1;
@@@ -10703,31 -7291,17 +10711,31 @@@
                                 stop_one_cpu_nowait(cpu_of(busiest),
                                         active_load_balance_cpu_stop, busiest,
                                         &busiest->active_balance_work);
+ +                              *continue_balancing = 0;
                         }
   
                         /*
                          * We've kicked active balancing, reset the failure
                          * counter.
                          */
- -                      sd->nr_balance_failed = sd->cache_nice_tries+1;
+ +                      sd->nr_balance_failed =
+ +                          sd->cache_nice_tries +
+ +                          NEED_ACTIVE_BALANCE_THRESHOLD - 1;
                 }
- -      } else
+ +      } else {
                 sd->nr_balance_failed = 0;
   
+ +              /* Assumes one 'busiest' cpu that we pulled tasks from */
+ +              if (!same_freq_domain(this_cpu, cpu_of(busiest))) {
+ +                      int check_groups = !!(env.flags &
+ +                                       LBF_MOVED_RELATED_THREAD_GROUP_TASK);
+ +
+ +                      check_for_freq_change(this_rq, false, check_groups);
+ +                      check_for_freq_change(busiest, false, check_groups);
+ +              } else {
+ +                      check_for_freq_change(this_rq, true, false);
+ +              }
+ +      }
         if (likely(!active_balance)) {
                 /* We were unbalanced, so reset the balancing interval */
                 sd->balance_interval = sd->min_interval;
@@@ -10785,11 -7359,6 +10793,11 @@@ out_one_pinned
                         (sd->balance_interval < sd->max_interval))
                 sd->balance_interval *= 2;
   out:
+ +      trace_sched_load_balance(this_cpu, idle, *continue_balancing,
+ +                               group ? group->cpumask[0] : 0,
+ +                               busiest ? busiest->nr_running : 0,
+ +                               env.imbalance, env.flags, ld_moved,
+ +                               sd->balance_interval);
         return ld_moved;
   }
   
@@@ -10832,9 -7401,6 +10840,9 @@@ static int idle_balance(struct rq *this
         int pulled_task = 0;
         u64 curr_cost = 0;
   
+ +      if (cpu_isolated(this_cpu))
+ +              return 0;
+ +
         idle_enter_fair(this_rq);
   
         /*
@@@ -10843,9 -7409,8 +10851,9 @@@
          */
         this_rq->idle_stamp = rq_clock(this_rq);
   
- -      if (this_rq->avg_idle < sysctl_sched_migration_cost ||
- -          !this_rq->rd->overload) {
+ +      if (!energy_aware() &&
+ +          (this_rq->avg_idle < sysctl_sched_migration_cost ||
+ +           !this_rq->rd->overload)) {
                 rcu_read_lock();
                 sd = rcu_dereference_check_sched_domain(this_rq->sd);
                 if (sd)
@@@ -10889,12 -7454,9 +10897,12 @@@
   
                 /*
                  * Stop searching for tasks to pull if there are
- -               * now runnable tasks on this rq.
+ +               * now runnable tasks on the balance rq or if
+ +               * continue_balancing has been unset (only possible
+ +               * due to active migration).
                  */
- -              if (pulled_task || this_rq->nr_running > 0)
+ +              if (pulled_task || this_rq->nr_running > 0 ||
+ +                                              !continue_balancing)
                         break;
         }
         rcu_read_unlock();
@@@ -10941,24 -7503,8 +10949,24 @@@ static int active_load_balance_cpu_stop
         int busiest_cpu = cpu_of(busiest_rq);
         int target_cpu = busiest_rq->push_cpu;
         struct rq *target_rq = cpu_rq(target_cpu);
- -      struct sched_domain *sd;
+ +      struct sched_domain *sd = NULL;
         struct task_struct *p = NULL;
+ +      struct task_struct *push_task = NULL;
+ +      int push_task_detached = 0;
+ +      struct lb_env env = {
+ +              .sd                     = sd,
+ +              .dst_cpu                = target_cpu,
+ +              .dst_rq                 = target_rq,
+ +              .src_cpu                = busiest_rq->cpu,
+ +              .src_rq                 = busiest_rq,
+ +              .idle                   = CPU_IDLE,
+ +              .busiest_nr_running     = 0,
+ +              .busiest_grp_capacity   = 0,
+ +              .flags                  = 0,
+ +              .loop                   = 0,
+ +              .boost_policy           = sched_boost_policy(),
+ +      };
+ +      bool moved = false;
   
         raw_spin_lock_irq(&busiest_rq->lock);
   
@@@ -10978,20 -7524,6 +10986,20 @@@
          */
         BUG_ON(busiest_rq == target_rq);
   
+ +      push_task = busiest_rq->push_task;
+ +      target_cpu = busiest_rq->push_cpu;
+ +      if (push_task) {
+ +              if (task_on_rq_queued(push_task) &&
+ +                      push_task->state == TASK_RUNNING &&
+ +                      task_cpu(push_task) == busiest_cpu &&
+ +                                      cpu_online(target_cpu)) {
+ +                      detach_task(push_task, &env);
+ +                      push_task_detached = 1;
+ +                      moved = true;
+ +              }
+ +              goto out_unlock;
+ +      }
+ +
         /* Search for an sd spanning us and the target CPU. */
         rcu_read_lock();
         for_each_domain(target_cpu, sd) {
@@@ -11001,50 -7533,33 +11009,50 @@@
         }
   
         if (likely(sd)) {
- -              struct lb_env env = {
- -                      .sd             = sd,
- -                      .dst_cpu        = target_cpu,
- -                      .dst_rq         = target_rq,
- -                      .src_cpu        = busiest_rq->cpu,
- -                      .src_rq         = busiest_rq,
- -                      .idle           = CPU_IDLE,
- -              };
- -
+ +              env.sd = sd;
                 schedstat_inc(sd, alb_count);
+ +              update_rq_clock(busiest_rq);
   
                 p = detach_one_task(&env);
- -              if (p)
+ +              if (p) {
                         schedstat_inc(sd, alb_pushed);
- -              else
+ +                      moved = true;
+ +              } else {
                         schedstat_inc(sd, alb_failed);
+ +              }
         }
         rcu_read_unlock();
   out_unlock:
         busiest_rq->active_balance = 0;
+ +      push_task = busiest_rq->push_task;
+ +      target_cpu = busiest_rq->push_cpu;
+ +
+ +      if (push_task)
+ +              busiest_rq->push_task = NULL;
+ +
         raw_spin_unlock(&busiest_rq->lock);
   
+ +      if (push_task) {
+ +              if (push_task_detached)
+ +                      attach_one_task(target_rq, push_task);
+ +              put_task_struct(push_task);
+ +              clear_reserved(target_cpu);
+ +      }
+ +
         if (p)
                 attach_one_task(target_rq, p);
   
         local_irq_enable();
   
+ +      if (moved && !same_freq_domain(busiest_cpu, target_cpu)) {
+ +              int check_groups = !!(env.flags &
+ +                                       LBF_MOVED_RELATED_THREAD_GROUP_TASK);
+ +              check_for_freq_change(busiest_rq, false, check_groups);
+ +              check_for_freq_change(target_rq, false, check_groups);
+ +      } else if (moved) {
+ +              check_for_freq_change(target_rq, true, false);
+ +      }
+ +
         return 0;
   }
   
@@@ -11060,49 -7575,15 +11068,49 @@@ static inline int on_null_domain(struc
    *   needed, they will kick the idle load balancer, which then does idle
    *   load balancing for all the idle CPUs.
    */
- -static struct {
- -      cpumask_var_t idle_cpus_mask;
- -      atomic_t nr_cpus;
- -      unsigned long next_balance;     /* in jiffy units */
- -} nohz ____cacheline_aligned;
   
- -static inline int find_new_ilb(void)
+ +#ifdef CONFIG_SCHED_HMP
+ +static inline int find_new_hmp_ilb(int type)
+ +{
+ +      int call_cpu = raw_smp_processor_id();
+ +      struct sched_domain *sd;
+ +      int ilb;
+ +
+ +      rcu_read_lock();
+ +
+ +      /* Pick an idle cpu "closest" to call_cpu */
+ +      for_each_domain(call_cpu, sd) {
+ +              for_each_cpu_and(ilb, nohz.idle_cpus_mask,
+ +                                              sched_domain_span(sd)) {
+ +                      if (idle_cpu(ilb) && (type != NOHZ_KICK_RESTRICT ||
+ +                                      cpu_max_power_cost(ilb) <=
+ +                                      cpu_max_power_cost(call_cpu))) {
+ +                              rcu_read_unlock();
+ +                              reset_balance_interval(ilb);
+ +                              return ilb;
+ +                      }
+ +              }
+ +      }
+ +
+ +      rcu_read_unlock();
+ +      return nr_cpu_ids;
+ +}
+ +#else /* CONFIG_SCHED_HMP */
+ +static inline int find_new_hmp_ilb(int type)
+ +{
+ +      return 0;
+ +}
+ +#endif        /* CONFIG_SCHED_HMP */
+ +
+ +static inline int find_new_ilb(int type)
   {
- -      int ilb = cpumask_first(nohz.idle_cpus_mask);
+ +      int ilb;
+ +
+ +#ifdef CONFIG_SCHED_HMP
+ +      return find_new_hmp_ilb(type);
+ +#endif
+ +
+ +      ilb = cpumask_first(nohz.idle_cpus_mask);
   
         if (ilb < nr_cpu_ids && idle_cpu(ilb))
                 return ilb;
@@@ -11115,13 -7596,13 +11123,13 @@@
    * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
    * CPU (if there is one).
    */
- -static void nohz_balancer_kick(void)
+ +static void nohz_balancer_kick(int type)
   {
         int ilb_cpu;
   
         nohz.next_balance++;
   
- -      ilb_cpu = find_new_ilb();
+ +      ilb_cpu = find_new_ilb(type);
   
         if (ilb_cpu >= nr_cpu_ids)
                 return;
@@@ -11138,21 -7619,16 +11146,21 @@@
         return;
   }
   
+ +void nohz_balance_clear_nohz_mask(int cpu)
+ +{
+ +      if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
+ +              cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
+ +              atomic_dec(&nohz.nr_cpus);
+ +      }
+ +}
+ +
   static inline void nohz_balance_exit_idle(int cpu)
   {
         if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
                 /*
                  * Completely isolated CPUs don't ever set, so we must test.
                  */
- -              if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) {
- -                      cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
- -                      atomic_dec(&nohz.nr_cpus);
- -              }
+ +              nohz_balance_clear_nohz_mask(cpu);
                 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
         }
   }
@@@ -11209,7 -7685,7 +11217,7 @@@ void nohz_balance_enter_idle(int cpu
         /*
          * If we're a completely isolated CPU, we don't play.
          */
- -      if (on_null_domain(cpu_rq(cpu)))
+ +      if (on_null_domain(cpu_rq(cpu)) || cpu_isolated(cpu))
                 return;
   
         cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
@@@ -11238,13 -7714,7 +11246,13 @@@ static DEFINE_SPINLOCK(balancing)
    */
   void update_max_interval(void)
   {
- -      max_load_balance_interval = HZ*num_online_cpus()/10;
+ +      cpumask_t avail_mask;
+ +      unsigned int available_cpus;
+ +
+ +      cpumask_andnot(&avail_mask, cpu_online_mask, cpu_isolated_mask);
+ +      available_cpus = cpumask_weight(&avail_mask);
+ +
+ +      max_load_balance_interval = HZ*available_cpus/10;
   }
   
   /*
@@@ -11369,15 -7839,12 +11377,15 @@@ static void nohz_idle_balance(struct r
         /* Earliest time when we have to do rebalance again */
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
+ +      cpumask_t cpus;
   
         if (idle != CPU_IDLE ||
             !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
                 goto end;
   
- -      for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+ +      cpumask_andnot(&cpus, nohz.idle_cpus_mask, cpu_isolated_mask);
+ +
+ +      for_each_cpu(balance_cpu, &cpus) {
                 if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
                         continue;
   
@@@ -11420,79 -7887,6 +11428,79 @@@ end
         clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
   }
   
+ +#ifdef CONFIG_SCHED_HMP
+ +static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
+ +{
+ +      struct sched_domain *sd;
+ +      int i;
+ +
+ +      if (rq->nr_running < 2)
+ +              return 0;
+ +
+ +      if (!sysctl_sched_restrict_cluster_spill ||
+ +                      sched_boost_policy() == SCHED_BOOST_ON_ALL)
+ +              return 1;
+ +
+ +      if (cpu_max_power_cost(cpu) == max_power_cost)
+ +              return 1;
+ +
+ +      rcu_read_lock();
+ +      sd = rcu_dereference_check_sched_domain(rq->sd);
+ +      if (!sd) {
+ +              rcu_read_unlock();
+ +              return 0;
+ +      }
+ +
+ +      for_each_cpu(i, sched_domain_span(sd)) {
+ +              if (cpu_load(i) < sched_spill_load &&
+ +                              cpu_rq(i)->nr_running <
+ +                              sysctl_sched_spill_nr_run) {
+ +                      /* Change the kick type to limit to CPUs that
+ +                       * are of equal or lower capacity.
+ +                       */
+ +                      *type = NOHZ_KICK_RESTRICT;
+ +                      break;
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +      return 1;
+ +}
+ +#else
+ +static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
+ +{
+ +      return 0;
+ +}
+ +#endif
+ +
+ +static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
+ +{
+ +      unsigned long now = jiffies;
+ +
+ +      /*
+ +       * None are in tickless mode and hence no need for NOHZ idle load
+ +       * balancing.
+ +       */
+ +      if (likely(!atomic_read(&nohz.nr_cpus)))
+ +              return 0;
+ +
+ +#ifdef CONFIG_SCHED_HMP
+ +      return _nohz_kick_needed_hmp(rq, cpu, type);
+ +#endif
+ +
+ +      if (time_before(now, nohz.next_balance))
+ +              return 0;
+ +
+ +      if (rq->nr_running >= 2 &&
+ +          (!energy_aware() || cpu_overutilized(cpu)))
+ +              return true;
+ +
+ +      /* Do idle load balance if there have misfit task */
+ +      if (energy_aware())
+ +              return rq->misfit_task;
+ +
+ +      return (rq->nr_running >= 2);
+ +}
+ +
   /*
    * Current heuristic for kicking the idle load balancer in the presence
    * of an idle cpu in the system.
@@@ -11504,14 -7898,12 +11512,14 @@@
    *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
    *     domain span are idle.
    */
- -static inline bool nohz_kick_needed(struct rq *rq)
+ +static inline bool nohz_kick_needed(struct rq *rq, int *type)
   {
- -      unsigned long now = jiffies;
+ +#ifndef CONFIG_SCHED_HMP
         struct sched_domain *sd;
         struct sched_group_capacity *sgc;
- -      int nr_busy, cpu = rq->cpu;
+ +      int nr_busy;
+ +#endif
+ +      int cpu = rq->cpu;
         bool kick = false;
   
         if (unlikely(rq->idle_balance))
@@@ -11524,10 -7916,19 +11532,10 @@@
         set_cpu_sd_state_busy();
         nohz_balance_exit_idle(cpu);
   
- -      /*
- -       * None are in tickless mode and hence no need for NOHZ idle load
- -       * balancing.
- -       */
- -      if (likely(!atomic_read(&nohz.nr_cpus)))
- -              return false;
- -
- -      if (time_before(now, nohz.next_balance))
- -              return false;
- -
- -      if (rq->nr_running >= 2)
+ +      if (_nohz_kick_needed(rq, cpu, type))
                 return true;
   
+ +#ifndef CONFIG_SCHED_HMP
         rcu_read_lock();
         sd = rcu_dereference(per_cpu(sd_busy, cpu));
         if (sd) {
@@@ -11559,7 -7960,6 +11567,7 @@@
   
   unlock:
         rcu_read_unlock();
+ +#endif
         return kick;
   }
   #else
@@@ -11593,19 -7993,15 +11601,19 @@@ static void run_rebalance_domains(struc
    */
   void trigger_load_balance(struct rq *rq)
   {
- -      /* Don't need to rebalance while attached to NULL domain */
- -      if (unlikely(on_null_domain(rq)))
+ +      int type = NOHZ_KICK_ANY;
+ +
+ +      /* Don't need to rebalance while attached to NULL domain or
+ +       * cpu is isolated.
+ +       */
+ +      if (unlikely(on_null_domain(rq)) || cpu_isolated(cpu_of(rq)))
                 return;
   
         if (time_after_eq(jiffies, rq->next_balance))
                 raise_softirq(SCHED_SOFTIRQ);
   #ifdef CONFIG_NO_HZ_COMMON
- -      if (nohz_kick_needed(rq))
- -              nohz_balancer_kick();
+ +      if (nohz_kick_needed(rq, &type))
+ +              nohz_balancer_kick(type);
   #endif
   }
   
@@@ -11641,17 -8037,6 +11649,17 @@@ static void task_tick_fair(struct rq *r
   
         if (static_branch_unlikely(&sched_numa_balancing))
                 task_tick_numa(rq, curr);
+ +
+ +#ifdef CONFIG_SMP
+ +      if (energy_aware() &&
+ +          !rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) {
+ +              rq->rd->overutilized = true;
+ +              trace_sched_overutilized(true);
+ +      }
+ +
+ +      rq->misfit_task = !task_fits_max(curr, rq->cpu);
+ +#endif
+ +
   }
   
   /*
@@@ -11663,17 -8048,31 +11671,17 @@@ static void task_fork_fair(struct task_
   {
         struct cfs_rq *cfs_rq;
         struct sched_entity *se = &p->se, *curr;
- -      int this_cpu = smp_processor_id();
         struct rq *rq = this_rq();
- -      unsigned long flags;
- -
- -      raw_spin_lock_irqsave(&rq->lock, flags);
   
+ +      raw_spin_lock(&rq->lock);
         update_rq_clock(rq);
   
         cfs_rq = task_cfs_rq(current);
         curr = cfs_rq->curr;
- -
- -      /*
- -       * Not only the cpu but also the task_group of the parent might have
- -       * been changed after parent->se.parent,cfs_rq were copied to
- -       * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
- -       * of child point to valid ones.
- -       */
- -      rcu_read_lock();
- -      __set_task_cpu(p, this_cpu);
- -      rcu_read_unlock();
- -
- -      update_curr(cfs_rq);
- -
- -      if (curr)
+ +      if (curr) {
+ +              update_curr(cfs_rq);
                 se->vruntime = curr->vruntime;
+ +      }
         place_entity(cfs_rq, se, 1);
   
         if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
@@@ -11686,7 -8085,8 +11694,7 @@@
         }
   
         se->vruntime -= cfs_rq->min_vruntime;
- -
- -      raw_spin_unlock_irqrestore(&rq->lock, flags);
+ +      raw_spin_unlock(&rq->lock);
   }
   
   /*
@@@ -11738,61 -8138,6 +11746,61 @@@ static inline bool vruntime_normalized(
         return false;
   }
   
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +/*
+ + * Propagate the changes of the sched_entity across the tg tree to make it
+ + * visible to the root
+ + */
+ +static void propagate_entity_cfs_rq(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +
+ +      /* Start to propagate at parent */
+ +      se = se->parent;
+ +
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +
+ +              if (cfs_rq_throttled(cfs_rq))
+ +                      break;
+ +
+ +              update_load_avg(se, UPDATE_TG);
+ +      }
+ +}
+ +#else
+ +static void propagate_entity_cfs_rq(struct sched_entity *se) { }
+ +#endif
+ +
+ +static void detach_entity_cfs_rq(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ +
+ +      /* Catch up with the cfs_rq and remove our load when we leave */
+ +      update_load_avg(se, 0);
+ +      detach_entity_load_avg(cfs_rq, se);
+ +      update_tg_load_avg(cfs_rq, false);
+ +      propagate_entity_cfs_rq(se);
+ +}
+ +
+ +static void attach_entity_cfs_rq(struct sched_entity *se)
+ +{
+ +      struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      /*
+ +       * Since the real-depth could have been changed (only FAIR
+ +       * class maintain depth value), reset depth properly.
+ +       */
+ +      se->depth = se->parent ? se->parent->depth + 1 : 0;
+ +#endif
+ +
+ +      /* Synchronize entity with its cfs_rq */
+ +      update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
+ +      attach_entity_load_avg(cfs_rq, se);
+ +      update_tg_load_avg(cfs_rq, false);
+ +      propagate_entity_cfs_rq(se);
+ +}
+ +
   static void detach_task_cfs_rq(struct task_struct *p)
   {
         struct sched_entity *se = &p->se;
@@@ -11807,7 -8152,8 +11815,7 @@@
                 se->vruntime -= cfs_rq->min_vruntime;
         }
   
- -      /* Catch up with the cfs_rq and remove our load when we leave */
- -      detach_entity_load_avg(cfs_rq, se);
+ +      detach_entity_cfs_rq(se);
   }
   
   static void attach_task_cfs_rq(struct task_struct *p)
@@@ -11815,7 -8161,16 +11823,7 @@@
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
   
- -#ifdef CONFIG_FAIR_GROUP_SCHED
- -      /*
- -       * Since the real-depth could have been changed (only FAIR
- -       * class maintain depth value), reset depth properly.
- -       */
- -      se->depth = se->parent ? se->parent->depth + 1 : 0;
- -#endif
- -
- -      /* Synchronize task with its cfs_rq */
- -      attach_entity_load_avg(cfs_rq, se);
+ +      attach_entity_cfs_rq(se);
   
         if (!vruntime_normalized(p))
                 se->vruntime += cfs_rq->min_vruntime;
@@@ -11869,23 -8224,12 +11877,23 @@@ void init_cfs_rq(struct cfs_rq *cfs_rq
         cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
   #endif
   #ifdef CONFIG_SMP
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      cfs_rq->propagate_avg = 0;
+ +#endif
         atomic_long_set(&cfs_rq->removed_load_avg, 0);
         atomic_long_set(&cfs_rq->removed_util_avg, 0);
   #endif
   }
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
+ +static void task_set_group_fair(struct task_struct *p)
+ +{
+ +      struct sched_entity *se = &p->se;
+ +
+ +      set_task_rq(p, task_cpu(p));
+ +      se->depth = se->parent ? se->parent->depth + 1 : 0;
+ +}
+ +
   static void task_move_group_fair(struct task_struct *p)
   {
         detach_task_cfs_rq(p);
@@@ -11898,19 -8242,6 +11906,19 @@@
         attach_task_cfs_rq(p);
   }
   
+ +static void task_change_group_fair(struct task_struct *p, int type)
+ +{
+ +      switch (type) {
+ +      case TASK_SET_GROUP:
+ +              task_set_group_fair(p);
+ +              break;
+ +
+ +      case TASK_MOVE_GROUP:
+ +              task_move_group_fair(p);
+ +              break;
+ +      }
+ +}
+ +
   void free_fair_sched_group(struct task_group *tg)
   {
         int i;
@@@ -11930,9 -8261,8 +11938,9 @@@
   
   int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
   {
- -      struct cfs_rq *cfs_rq;
         struct sched_entity *se;
+ +      struct cfs_rq *cfs_rq;
+ +      struct rq *rq;
         int i;
   
         tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@@ -11947,8 -8277,6 +11955,8 @@@
         init_cfs_bandwidth(tg_cfs_bandwidth(tg));
   
         for_each_possible_cpu(i) {
+ +              rq = cpu_rq(i);
+ +
                 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
                                       GFP_KERNEL, cpu_to_node(i));
                 if (!cfs_rq)
@@@ -11962,10 -8290,6 +11970,10 @@@
                 init_cfs_rq(cfs_rq);
                 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
                 init_entity_runnable_average(se);
+ +
+ +              raw_spin_lock_irq(&rq->lock);
+ +              post_init_entity_util_avg(se);
+ +              raw_spin_unlock_irq(&rq->lock);
         }
   
         return 1;
@@@ -12062,10 -8386,8 +12070,10 @@@ int sched_group_set_shares(struct task_
   
                 /* Possible calls to update_curr() need rq clock */
                 update_rq_clock(rq);
- -              for_each_sched_entity(se)
- -                      update_cfs_shares(group_cfs_rq(se));
+ +              for_each_sched_entity(se) {
+ +                      update_load_avg(se, UPDATE_TG);
+ +                      update_cfs_shares(se);
+ +              }
                 raw_spin_unlock_irqrestore(&rq->lock, flags);
         }
   
@@@ -12142,12 -8464,7 +12150,12 @@@ const struct sched_class fair_sched_cla
         .update_curr            = update_curr_fair,
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
- -      .task_move_group        = task_move_group_fair,
+ +      .task_change_group      = task_change_group_fair,
+ +#endif
+ +#ifdef CONFIG_SCHED_HMP
+ +      .inc_hmp_sched_stats    = inc_hmp_sched_stats_fair,
+ +      .dec_hmp_sched_stats    = dec_hmp_sched_stats_fair,
+ +      .fixup_hmp_sched_stats  = fixup_hmp_sched_stats_fair,
   #endif
   };
   
diff --combined kernel/workqueue.c

index 5957017,3fb2d45..696f091
--- 1/kernel/workqueue.c
--- 2/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -48,8 -48,6 +48,8 @@@
   #include <linux/nodemask.h>
   #include <linux/moduleparam.h>
   #include <linux/uaccess.h>
+ +#include <linux/bug.h>
+ +#include <linux/delay.h>
   
   #include "workqueue_internal.h"
   
@@@ -151,8 -149,6 +151,8 @@@ struct worker_pool 
         int                     id;             /* I: pool ID */
         unsigned int            flags;          /* X: flags */
   
+ +      unsigned long           watchdog_ts;    /* L: watchdog timestamp */
+ +
         struct list_head        worklist;       /* L: list of pending works */
         int                     nr_workers;     /* L: total number of workers */
   
@@@ -1127,8 -1123,6 +1127,8 @@@ static void pwq_activate_delayed_work(s
         struct pool_workqueue *pwq = get_work_pwq(work);
   
         trace_workqueue_activate_work(work);
+ +      if (list_empty(&pwq->pool->worklist))
+ +              pwq->pool->watchdog_ts = jiffies;
         move_linked_works(work, &pwq->pool->worklist, NULL);
         __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
         pwq->nr_active++;
@@@ -1287,12 -1281,6 +1287,12 @@@ fail
         if (work_is_canceling(work))
                 return -ENOENT;
         cpu_relax();
+ +      /*
+ +       * The queueing is in progress in another context. If we keep
+ +       * taking the pool->lock in a busy loop, the other context may
+ +       * never get the lock. Give 1 usec delay to avoid this contention.
+ +       */
+ +      udelay(1);
         return -EAGAIN;
   }
   
@@@ -1437,8 -1425,6 +1437,8 @@@ retry
                 trace_workqueue_activate_work(work);
                 pwq->nr_active++;
                 worklist = &pwq->pool->worklist;
+ +              if (list_empty(worklist))
+ +                      pwq->pool->watchdog_ts = jiffies;
         } else {
                 work_flags |= WORK_STRUCT_DELAYED;
                 worklist = &pwq->delayed_works;
@@@ -1510,6 -1496,8 +1510,6 @@@ static void __queue_delayed_work(int cp
                 return;
         }
   
- -      timer_stats_timer_set_start_info(&dwork->timer);
- -
         dwork->wq = wq;
         dwork->cpu = cpu;
         timer->expires = jiffies + delay;
@@@ -2088,7 -2076,6 +2088,7 @@@ __acquires(&pool->lock
                        current->comm, preempt_count(), task_pid_nr(current),
                        worker->current_func);
                 debug_show_held_locks(current);
+ +              BUG_ON(PANIC_CORRUPTION);
                 dump_stack();
         }
   
@@@ -2204,8 -2191,6 +2204,8 @@@ recheck
                         list_first_entry(&pool->worklist,
                                          struct work_struct, entry);
   
+ +              pool->watchdog_ts = jiffies;
+ +
                 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
                         /* optimization path, not strictly necessary */
                         process_one_work(worker, work);
@@@ -2289,7 -2274,6 +2289,7 @@@ repeat
                                         struct pool_workqueue, mayday_node);
                 struct worker_pool *pool = pwq->pool;
                 struct work_struct *work, *n;
+ +              bool first = true;
   
                 __set_current_state(TASK_RUNNING);
                 list_del_init(&pwq->mayday_node);
@@@ -2306,14 -2290,9 +2306,14 @@@
                  * process'em.
                  */
                 WARN_ON_ONCE(!list_empty(scheduled));
- -              list_for_each_entry_safe(work, n, &pool->worklist, entry)
- -                      if (get_work_pwq(work) == pwq)
+ +              list_for_each_entry_safe(work, n, &pool->worklist, entry) {
+ +                      if (get_work_pwq(work) == pwq) {
+ +                              if (first)
+ +                                      pool->watchdog_ts = jiffies;
                                 move_linked_works(work, scheduled, &n);
+ +                      }
+ +                      first = false;
+ +              }
   
                 if (!list_empty(scheduled)) {
                         process_scheduled_works(rescuer);
@@@ -2329,8 -2308,14 +2329,14 @@@
                          */
                         if (need_to_create_worker(pool)) {
                                 spin_lock(&wq_mayday_lock);
-                               get_pwq(pwq);
-                               list_move_tail(&pwq->mayday_node, &wq->maydays);
+                               /*
+                                * Queue iff we aren't racing destruction
+                                * and somebody else hasn't queued it already.
+                                */
+                               if (wq->rescuer && list_empty(&pwq->mayday_node)) {
+                                       get_pwq(pwq);
+                                       list_add_tail(&pwq->mayday_node, &wq->maydays);
+                               }
                                 spin_unlock(&wq_mayday_lock);
                         }
                 }
@@@ -2919,31 -2904,6 +2925,31 @@@ bool flush_delayed_work(struct delayed_
   }
   EXPORT_SYMBOL(flush_delayed_work);
   
+ +static bool __cancel_work(struct work_struct *work, bool is_dwork)
+ +{
+ +      unsigned long flags;
+ +      int ret;
+ +
+ +      do {
+ +              ret = try_to_grab_pending(work, is_dwork, &flags);
+ +      } while (unlikely(ret == -EAGAIN));
+ +
+ +      if (unlikely(ret < 0))
+ +              return false;
+ +
+ +      set_work_pool_and_clear_pending(work, get_work_pool_id(work));
+ +      local_irq_restore(flags);
+ +      return ret;
+ +}
+ +
+ +/*
+ + * See cancel_delayed_work()
+ + */
+ +bool cancel_work(struct work_struct *work)
+ +{
+ +      return __cancel_work(work, false);
+ +}
+ +
   /**
    * cancel_delayed_work - cancel a delayed work
    * @dwork: delayed_work to cancel
@@@ -2962,7 -2922,20 +2968,7 @@@
    */
   bool cancel_delayed_work(struct delayed_work *dwork)
   {
- -      unsigned long flags;
- -      int ret;
- -
- -      do {
- -              ret = try_to_grab_pending(&dwork->work, true, &flags);
- -      } while (unlikely(ret == -EAGAIN));
- -
- -      if (unlikely(ret < 0))
- -              return false;
- -
- -      set_work_pool_and_clear_pending(&dwork->work,
- -                                      get_work_pool_id(&dwork->work));
- -      local_irq_restore(flags);
- -      return ret;
+ +      return __cancel_work(&dwork->work, true);
   }
   EXPORT_SYMBOL(cancel_delayed_work);
   
@@@ -3136,7 -3109,6 +3142,7 @@@ static int init_worker_pool(struct work
         pool->cpu = -1;
         pool->node = NUMA_NO_NODE;
         pool->flags |= POOL_DISASSOCIATED;
+ +      pool->watchdog_ts = jiffies;
         INIT_LIST_HEAD(&pool->worklist);
         INIT_LIST_HEAD(&pool->idle_list);
         hash_init(pool->busy_hash);
@@@ -3983,9 -3955,29 +3989,29 @@@ void destroy_workqueue(struct workqueue
         struct pool_workqueue *pwq;
         int node;
   
+       /*
+        * Remove it from sysfs first so that sanity check failure doesn't
+        * lead to sysfs name conflicts.
+        */
+       workqueue_sysfs_unregister(wq);
+ 
         /* drain it before proceeding with destruction */
         drain_workqueue(wq);
   
+       /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
+       if (wq->rescuer) {
+               struct worker *rescuer = wq->rescuer;
+ 
+               /* this prevents new queueing */
+               spin_lock_irq(&wq_mayday_lock);
+               wq->rescuer = NULL;
+               spin_unlock_irq(&wq_mayday_lock);
+ 
+               /* rescuer will empty maydays list before exiting */
+               kthread_stop(rescuer->task);
+               kfree(rescuer);
+       }
+ 
         /* sanity checks */
         mutex_lock(&wq->mutex);
         for_each_pwq(pwq, wq) {
@@@ -4015,11 -4007,6 +4041,6 @@@
         list_del_rcu(&wq->list);
         mutex_unlock(&wq_pool_mutex);
   
-       workqueue_sysfs_unregister(wq);
- 
-       if (wq->rescuer)
-               kthread_stop(wq->rescuer->task);
- 
         if (!(wq->flags & WQ_UNBOUND)) {
                 /*
                  * The base ref is never dropped on per-cpu pwqs.  Directly
@@@ -4296,7 -4283,8 +4317,8 @@@ static void show_pwq(struct pool_workqu
         pr_info("  pwq %d:", pool->id);
         pr_cont_pool_info(pool);
   
-       pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
+       pr_cont(" active=%d/%d refcnt=%d%s\n",
+               pwq->nr_active, pwq->max_active, pwq->refcnt,
                 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
   
         hash_for_each(pool->busy_hash, bkt, worker, hentry) {
@@@ -4406,9 -4394,7 +4428,9 @@@ void show_workqueue_state(void
   
                 pr_info("pool %d:", pool->id);
                 pr_cont_pool_info(pool);
- -              pr_cont(" workers=%d", pool->nr_workers);
+ +              pr_cont(" hung=%us workers=%d",
+ +                      jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+ +                      pool->nr_workers);
                 if (pool->manager)
                         pr_cont(" manager: %d",
                                 task_pid_nr(pool->manager->task));
@@@ -5278,154 -5264,6 +5300,154 @@@ static void workqueue_sysfs_unregister(
   static void workqueue_sysfs_unregister(struct workqueue_struct *wq)   { }
   #endif        /* CONFIG_SYSFS */
   
+ +/*
+ + * Workqueue watchdog.
+ + *
+ + * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
+ + * flush dependency, a concurrency managed work item which stays RUNNING
+ + * indefinitely.  Workqueue stalls can be very difficult to debug as the
+ + * usual warning mechanisms don't trigger and internal workqueue state is
+ + * largely opaque.
+ + *
+ + * Workqueue watchdog monitors all worker pools periodically and dumps
+ + * state if some pools failed to make forward progress for a while where
+ + * forward progress is defined as the first item on ->worklist changing.
+ + *
+ + * This mechanism is controlled through the kernel parameter
+ + * "workqueue.watchdog_thresh" which can be updated at runtime through the
+ + * corresponding sysfs parameter file.
+ + */
+ +#ifdef CONFIG_WQ_WATCHDOG
+ +
+ +static void wq_watchdog_timer_fn(unsigned long data);
+ +
+ +static unsigned long wq_watchdog_thresh = 30;
+ +static struct timer_list wq_watchdog_timer =
+ +      TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
+ +
+ +static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
+ +static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+ +
+ +static void wq_watchdog_reset_touched(void)
+ +{
+ +      int cpu;
+ +
+ +      wq_watchdog_touched = jiffies;
+ +      for_each_possible_cpu(cpu)
+ +              per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+ +}
+ +
+ +static void wq_watchdog_timer_fn(unsigned long data)
+ +{
+ +      unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
+ +      bool lockup_detected = false;
+ +      struct worker_pool *pool;
+ +      int pi;
+ +
+ +      if (!thresh)
+ +              return;
+ +
+ +      rcu_read_lock();
+ +
+ +      for_each_pool(pool, pi) {
+ +              unsigned long pool_ts, touched, ts;
+ +
+ +              if (list_empty(&pool->worklist))
+ +                      continue;
+ +
+ +              /* get the latest of pool and touched timestamps */
+ +              pool_ts = READ_ONCE(pool->watchdog_ts);
+ +              touched = READ_ONCE(wq_watchdog_touched);
+ +
+ +              if (time_after(pool_ts, touched))
+ +                      ts = pool_ts;
+ +              else
+ +                      ts = touched;
+ +
+ +              if (pool->cpu >= 0) {
+ +                      unsigned long cpu_touched =
+ +                              READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
+ +                                                pool->cpu));
+ +                      if (time_after(cpu_touched, ts))
+ +                              ts = cpu_touched;
+ +              }
+ +
+ +              /* did we stall? */
+ +              if (time_after(jiffies, ts + thresh)) {
+ +                      lockup_detected = true;
+ +                      pr_emerg("BUG: workqueue lockup - pool");
+ +                      pr_cont_pool_info(pool);
+ +                      pr_cont(" stuck for %us!\n",
+ +                              jiffies_to_msecs(jiffies - pool_ts) / 1000);
+ +              }
+ +      }
+ +
+ +      rcu_read_unlock();
+ +
+ +      if (lockup_detected)
+ +              show_workqueue_state();
+ +
+ +      wq_watchdog_reset_touched();
+ +      mod_timer(&wq_watchdog_timer, jiffies + thresh);
+ +}
+ +
+ +void wq_watchdog_touch(int cpu)
+ +{
+ +      if (cpu >= 0)
+ +              per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+ +      else
+ +              wq_watchdog_touched = jiffies;
+ +}
+ +
+ +static void wq_watchdog_set_thresh(unsigned long thresh)
+ +{
+ +      wq_watchdog_thresh = 0;
+ +      del_timer_sync(&wq_watchdog_timer);
+ +
+ +      if (thresh) {
+ +              wq_watchdog_thresh = thresh;
+ +              wq_watchdog_reset_touched();
+ +              mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
+ +      }
+ +}
+ +
+ +static int wq_watchdog_param_set_thresh(const char *val,
+ +                                      const struct kernel_param *kp)
+ +{
+ +      unsigned long thresh;
+ +      int ret;
+ +
+ +      ret = kstrtoul(val, 0, &thresh);
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (system_wq)
+ +              wq_watchdog_set_thresh(thresh);
+ +      else
+ +              wq_watchdog_thresh = thresh;
+ +
+ +      return 0;
+ +}
+ +
+ +static const struct kernel_param_ops wq_watchdog_thresh_ops = {
+ +      .set    = wq_watchdog_param_set_thresh,
+ +      .get    = param_get_ulong,
+ +};
+ +
+ +module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
+ +              0644);
+ +
+ +static void wq_watchdog_init(void)
+ +{
+ +      wq_watchdog_set_thresh(wq_watchdog_thresh);
+ +}
+ +
+ +#else /* CONFIG_WQ_WATCHDOG */
+ +
+ +static inline void wq_watchdog_init(void) { }
+ +
+ +#endif        /* CONFIG_WQ_WATCHDOG */
+ +
   static void __init wq_numa_init(void)
   {
         cpumask_var_t *tbl;
@@@ -5549,9 -5387,6 +5571,9 @@@ static int __init init_workqueues(void
                !system_unbound_wq || !system_freezable_wq ||
                !system_power_efficient_wq ||
                !system_freezable_power_efficient_wq);
+ +
+ +      wq_watchdog_init();
+ +
         return 0;
   }
   early_initcall(init_workqueues);
diff --combined mm/shmem.c

index 9a77f53,07abbdf..358b133
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -1003,7 -1003,7 +1003,7 @@@ static int shmem_replace_page(struct pa
         copy_highpage(newpage, oldpage);
         flush_dcache_page(newpage);
   
- -      __set_page_locked(newpage);
+ +      __SetPageLocked(newpage);
         SetPageUptodate(newpage);
         SetPageSwapBacked(newpage);
         set_page_private(newpage, swap_index);
@@@ -1195,7 -1195,7 +1195,7 @@@ repeat
                 }
   
                 __SetPageSwapBacked(page);
- -              __set_page_locked(page);
+ +              __SetPageLocked(page);
                 if (sgp == SGP_WRITE)
                         __SetPageReferenced(page);
   
@@@ -1499,7 -1499,7 +1499,7 @@@ shmem_write_begin(struct file *file, st
         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
   
         /* i_mutex is held by caller */
- -      if (unlikely(info->seals)) {
+ +      if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
                 if (info->seals & F_SEAL_WRITE)
                         return -EPERM;
                 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
@@@ -2092,7 -2092,7 +2092,7 @@@ static long shmem_fallocate(struct fil
                 }
   
                 shmem_falloc.waitq = &shmem_falloc_waitq;
-               shmem_falloc.start = unmap_start >> PAGE_SHIFT;
+               shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
                 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
                 spin_lock(&inode->i_lock);
                 inode->i_private = &shmem_falloc;
@@@ -3411,14 -3411,6 +3411,14 @@@ struct file *shmem_file_setup(const cha
   }
   EXPORT_SYMBOL_GPL(shmem_file_setup);
   
+ +void shmem_set_file(struct vm_area_struct *vma, struct file *file)
+ +{
+ +      if (vma->vm_file)
+ +              fput(vma->vm_file);
+ +      vma->vm_file = file;
+ +      vma->vm_ops = &shmem_vm_ops;
+ +}
+ +
   /**
    * shmem_zero_setup - setup a shared anonymous mapping
    * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@@ -3438,7 -3430,10 +3438,7 @@@ int shmem_zero_setup(struct vm_area_str
         if (IS_ERR(file))
                 return PTR_ERR(file);
   
- -      if (vma->vm_file)
- -              fput(vma->vm_file);
- -      vma->vm_file = file;
- -      vma->vm_ops = &shmem_vm_ops;
+ +      shmem_set_file(vma, file);
         return 0;
   }
   
diff --combined net/bridge/br_device.c

index 0346c21,3d462fe..8d013fe
--- 1/net/bridge/br_device.c
--- 2/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@@ -48,17 -48,16 +48,17 @@@ netdev_tx_t br_dev_xmit(struct sk_buff 
                 return NETDEV_TX_OK;
         }
   
- -      u64_stats_update_begin(&brstats->syncp);
- -      brstats->tx_packets++;
- -      brstats->tx_bytes += skb->len;
- -      u64_stats_update_end(&brstats->syncp);
- -
         BR_INPUT_SKB_CB(skb)->brdev = dev;
   
         skb_reset_mac_header(skb);
         skb_pull(skb, ETH_HLEN);
   
+ +      u64_stats_update_begin(&brstats->syncp);
+ +      brstats->tx_packets++;
+ +      /* Exclude ETH_HLEN from byte stats for consistency with Rx chain */
+ +      brstats->tx_bytes += skb->len;
+ +      u64_stats_update_end(&brstats->syncp);
+ +
         if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
                 goto out;
   
@@@ -200,6 -199,12 +200,12 @@@ static int br_set_mac_address(struct ne
         if (!is_valid_ether_addr(addr->sa_data))
                 return -EADDRNOTAVAIL;
   
+       /* dev_set_mac_addr() can be called by a master device on bridge's
+        * NETDEV_UNREGISTER, but since it's being destroyed do nothing
+        */
+       if (dev->reg_state != NETREG_REGISTERED)
+               return -EBUSY;
+ 
         spin_lock_bh(&br->lock);
         if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
                 /* Mac address will be changed in br_stp_change_bridge_id(). */
diff --combined net/core/dev.c

index 1aa1261,108c329..d8b0769
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -137,8 -137,6 +137,8 @@@
   #include <linux/errqueue.h>
   #include <linux/hrtimer.h>
   #include <linux/netfilter_ingress.h>
+ +#include <linux/tcp.h>
+ +#include <net/tcp.h>
   
   #include "net-sysfs.h"
   
@@@ -185,7 -183,7 +185,7 @@@ EXPORT_SYMBOL(dev_base_lock)
   static DEFINE_SPINLOCK(napi_hash_lock);
   
   static unsigned int napi_gen_id = NR_CPUS;
- -static DEFINE_HASHTABLE(napi_hash, 8);
+ +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
   
   static seqcount_t devnet_rename_seq;
   
@@@ -2838,10 -2836,6 +2838,10 @@@ static struct sk_buff *validate_xmit_sk
         if (netif_needs_gso(skb, features)) {
                 struct sk_buff *segs;
   
+ +              __be16 src_port = tcp_hdr(skb)->source;
+ +              __be16 dest_port = tcp_hdr(skb)->dest;
+ +
+ +              trace_print_skb_gso(skb, src_port, dest_port);
                 segs = skb_gso_segment(skb, features);
                 if (IS_ERR(segs)) {
                         goto out_kfree_skb;
@@@ -2881,7 -2875,7 +2881,7 @@@ out_null
   
   struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
   {
- -      struct sk_buff *next, *head = NULL, *tail;
+ +      struct sk_buff *next, *head = NULL, *tail = NULL;
   
         for (; skb != NULL; skb = next) {
                 next = skb->next;
@@@ -4194,7 -4188,6 +4194,7 @@@ static int napi_gro_complete(struct sk_
         }
   
   out:
+ +      __this_cpu_add(softnet_data.gro_coalesced, NAPI_GRO_CB(skb)->count > 1);
         return netif_receive_skb_internal(skb);
   }
   
@@@ -4237,7 -4230,6 +4237,7 @@@ static void gro_list_prepare(struct nap
                 unsigned long diffs;
   
                 NAPI_GRO_CB(p)->flush = 0;
+ +              NAPI_GRO_CB(p)->flush_id = 0;
   
                 if (hash != skb_get_hash_raw(p)) {
                         NAPI_GRO_CB(p)->same_flow = 0;
@@@ -4620,24 -4612,6 +4620,24 @@@ __sum16 __skb_gro_checksum_complete(str
   }
   EXPORT_SYMBOL(__skb_gro_checksum_complete);
   
+ +static void net_rps_send_ipi(struct softnet_data *remsd)
+ +{
+ +#ifdef CONFIG_RPS
+ +      while (remsd) {
+ +              struct softnet_data *next = remsd->rps_ipi_next;
+ +
+ +              if (cpu_online(remsd->cpu)) {
+ +                      smp_call_function_single_async(remsd->cpu, &remsd->csd);
+ +              } else {
+ +                      rps_lock(remsd);
+ +                      remsd->backlog.state = 0;
+ +                      rps_unlock(remsd);
+ +              }
+ +              remsd = next;
+ +      }
+ +#endif
+ +}
+ +
   /*
    * net_rps_action_and_irq_enable sends any pending IPI's for rps.
    * Note: called with local irq disabled, but exits with local irq enabled.
@@@ -4653,7 -4627,14 +4653,7 @@@ static void net_rps_action_and_irq_enab
                 local_irq_enable();
   
                 /* Send pending IPI's to kick RPS processing on remote cpus. */
- -              while (remsd) {
- -                      struct softnet_data *next = remsd->rps_ipi_next;
- -
- -                      if (cpu_online(remsd->cpu))
- -                              smp_call_function_single_async(remsd->cpu,
- -                                                         &remsd->csd);
- -                      remsd = next;
- -              }
+ +              net_rps_send_ipi(remsd);
         } else
   #endif
                 local_irq_enable();
@@@ -4694,7 -4675,8 +4694,7 @@@ static int process_backlog(struct napi_
                         local_irq_disable();
                         input_queue_head_incr(sd);
                         if (++work >= quota) {
- -                              local_irq_enable();
- -                              return work;
+ +                              goto state_changed;
                         }
                 }
   
@@@ -4711,17 -4693,14 +4711,17 @@@
                         napi->state = 0;
                         rps_unlock(sd);
   
- -                      break;
+ +                      goto state_changed;
                 }
   
                 skb_queue_splice_tail_init(&sd->input_pkt_queue,
                                            &sd->process_queue);
                 rps_unlock(sd);
         }
+ +state_changed:
         local_irq_enable();
+ +      napi_gro_flush(napi, false);
+ +      sd->current_napi = NULL;
   
         return work;
   }
@@@ -4757,13 -4736,10 +4757,13 @@@ EXPORT_SYMBOL(__napi_schedule_irqoff)
   
   void __napi_complete(struct napi_struct *n)
   {
+ +      struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ +
         BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
   
         list_del_init(&n->poll_list);
         smp_mb__before_atomic();
+ +      sd->current_napi = NULL;
         clear_bit(NAPI_STATE_SCHED, &n->state);
   }
   EXPORT_SYMBOL(__napi_complete);
@@@ -4913,15 -4889,6 +4913,15 @@@ void netif_napi_del(struct napi_struct 
   }
   EXPORT_SYMBOL(netif_napi_del);
   
+ +
+ +struct napi_struct *get_current_napi_context(void)
+ +{
+ +      struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ +
+ +      return sd->current_napi;
+ +}
+ +EXPORT_SYMBOL(get_current_napi_context);
+ +
   static int napi_poll(struct napi_struct *n, struct list_head *repoll)
   {
         void *have;
@@@ -4941,9 -4908,6 +4941,9 @@@
          */
         work = 0;
         if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ +              struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ +
+ +              sd->current_napi = n;
                 work = n->poll(n, weight);
                 trace_napi_poll(n);
         }
@@@ -6162,7 -6126,8 +6162,8 @@@ static int __dev_set_mtu(struct net_dev
         if (ops->ndo_change_mtu)
                 return ops->ndo_change_mtu(dev, new_mtu);
   
-       dev->mtu = new_mtu;
+       /* Pairs with all the lockless reads of dev->mtu in the stack */
+       WRITE_ONCE(dev->mtu, new_mtu);
         return 0;
   }
   
@@@ -7569,7 -7534,7 +7570,7 @@@ static int dev_cpu_callback(struct noti
         struct sk_buff **list_skb;
         struct sk_buff *skb;
         unsigned int cpu, oldcpu = (unsigned long)ocpu;
- -      struct softnet_data *sd, *oldsd;
+ +      struct softnet_data *sd, *oldsd, *remsd;
   
         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
                 return NOTIFY_OK;
@@@ -7613,13 -7578,6 +7614,13 @@@
         raise_softirq_irqoff(NET_TX_SOFTIRQ);
         local_irq_enable();
   
+ +#ifdef CONFIG_RPS
+ +      remsd = oldsd->rps_ipi_list;
+ +      oldsd->rps_ipi_list = NULL;
+ +#endif
+ +      /* send out pending IPI's on offline CPU */
+ +      net_rps_send_ipi(remsd);
+ +
         /* Process offline CPU's input_pkt_queue */
         while ((skb = __skb_dequeue(&oldsd->process_queue))) {
                 netif_rx_ni(skb);
diff --combined net/ipv4/devinet.c

index 532a4ae,dbbe6f0..ec8020d
--- 1/net/ipv4/devinet.c
--- 2/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@@ -1364,11 -1364,6 +1364,6 @@@ skip
         }
   }
   
- static bool inetdev_valid_mtu(unsigned int mtu)
- {
-       return mtu >= IPV4_MIN_MTU;
- }
- 
   static void inetdev_send_gratuitous_arp(struct net_device *dev,
                                         struct in_device *in_dev)
   
@@@ -2197,8 -2192,6 +2192,8 @@@ static struct devinet_sysctl_table 
                                         "igmpv3_unsolicited_report_interval"),
                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
                                         "ignore_routes_with_linkdown"),
+ +              DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
+ +                                      "drop_gratuitous_arp"),
   
                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@@ -2206,10 -2199,6 +2201,10 @@@
                                               "promote_secondaries"),
                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
                                               "route_localnet"),
+ +              DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
+ +                                            "drop_unicast_in_l2_multicast"),
+ +              DEVINET_SYSCTL_RW_ENTRY(NF_IPV4_DEFRAG_SKIP,
+ +                                      "nf_ipv4_defrag_skip"),
         },
   };
   
diff --combined net/ipv4/ip_output.c

index c2380bb,d940c9e..3526024
--- 1/net/ipv4/ip_output.c
--- 2/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@@ -1145,13 -1145,17 +1145,17 @@@ static int ip_setup_cork(struct sock *s
         rt = *rtp;
         if (unlikely(!rt))
                 return -EFAULT;
-       /*
-        * We steal reference to this route, caller should not release it
-        */
-       *rtp = NULL;
+ 
         cork->fragsize = ip_sk_use_pmtu(sk) ?
-                        dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+                        dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+ 
+       if (!inetdev_valid_mtu(cork->fragsize))
+               return -ENETUNREACH;
+ 
         cork->dst = &rt->dst;
+       /* We stole this route, caller should not release it. */
+       *rtp = NULL;
+ 
         cork->length = 0;
         cork->ttl = ipc->ttl;
         cork->tos = ipc->tos;
@@@ -1587,8 -1591,7 +1591,8 @@@ void ip_send_unicast_reply(struct sock 
                            RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
                            ip_reply_arg_flowi_flags(arg),
                            daddr, saddr,
- -                         tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+ +                         tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+ +                         arg->uid);
         security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
         rt = ip_route_output_key(net, &fl4);
         if (IS_ERR(rt))
diff --combined net/ipv4/tcp_output.c

index e65c211,913c7fb..74ea82d
--- 1/net/ipv4/tcp_output.c
--- 2/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -196,7 -196,7 +196,7 @@@ u32 tcp_default_init_rwnd(u32 mss
          * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
          * limit when mss is larger than 1460.
          */
- -      u32 init_rwnd = TCP_INIT_CWND * 2;
+ +      u32 init_rwnd = sysctl_tcp_default_init_rwnd;
   
         if (mss > 1460)
                 init_rwnd = max((1460 * init_rwnd) / mss, 2U);
@@@ -710,8 -710,9 +710,9 @@@ static unsigned int tcp_established_opt
                         min_t(unsigned int, eff_sacks,
                               (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
                               TCPOLEN_SACK_PERBLOCK);
-               size += TCPOLEN_SACK_BASE_ALIGNED +
-                       opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+               if (likely(opts->num_sack_blocks))
+                       size += TCPOLEN_SACK_BASE_ALIGNED +
+                               opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
         }
   
         return size;
diff --combined net/ipv4/tcp_timer.c

index 28fed4a,710cde1..f336d87
--- 1/net/ipv4/tcp_timer.c
--- 2/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@@ -32,40 -32,6 +32,40 @@@ int sysctl_tcp_retries2 __read_mostly 
   int sysctl_tcp_orphan_retries __read_mostly;
   int sysctl_tcp_thin_linear_timeouts __read_mostly;
   
+ +/*Function to reset tcp_ack related sysctl on resetting master control */
+ +void set_tcp_default(void)
+ +{
+ +      sysctl_tcp_delack_seg   = TCP_DELACK_SEG;
+ +}
+ +
+ +/*sysctl handler for tcp_ack realted master control */
+ +int tcp_proc_delayed_ack_control(struct ctl_table *table, int write,
+ +                               void __user *buffer, size_t *length,
+ +                               loff_t *ppos)
+ +{
+ +      int ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ +
+ +      /* The ret value will be 0 if the input validation is successful
+ +       * and the values are written to sysctl table. If not, the stack
+ +       * will continue to work with currently configured values
+ +       */
+ +      return ret;
+ +}
+ +
+ +/*sysctl handler for tcp_ack realted master control */
+ +int tcp_use_userconfig_sysctl_handler(struct ctl_table *table, int write,
+ +                                    void __user *buffer, size_t *length,
+ +                                    loff_t *ppos)
+ +{
+ +      int ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ +
+ +      if (write && ret == 0) {
+ +              if (!sysctl_tcp_use_userconfig)
+ +                      set_tcp_default();
+ +      }
+ +      return ret;
+ +}
+ +
   static void tcp_write_err(struct sock *sk)
   {
         sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
@@@ -370,7 -336,7 +370,7 @@@ static void tcp_probe_timer(struct soc
                         return;
         }
   
-       if (icsk->icsk_probes_out > max_probes) {
+       if (icsk->icsk_probes_out >= max_probes) {
   abort:                tcp_write_err(sk);
         } else {
                 /* Only send another probe if we didn't close things up. */
diff --combined scripts/mod/modpost.c

index d22decb,f27df76..8b4a836
--- 1/scripts/mod/modpost.c
--- 2/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@@ -141,9 -141,6 +141,9 @@@ static struct module *new_module(const 
                 p[strlen(p) - 2] = '\0';
                 mod->is_dot_o = 1;
         }
+ +      /* strip trailing .lto */
+ +      if (strends(p, ".lto"))
+ +              p[strlen(p) - 4] = '\0';
   
         /* add to list */
         mod->name = p;
@@@ -1159,6 -1156,14 +1159,14 @@@ static const struct sectioncheck *secti
    *   fromsec = text section
    *   refsymname = *.constprop.*
    *
+  * Pattern 6:
+  *   Hide section mismatch warnings for ELF local symbols.  The goal
+  *   is to eliminate false positive modpost warnings caused by
+  *   compiler-generated ELF local symbol names such as ".LANCHOR1".
+  *   Autogenerated symbol names bypass modpost's "Pattern 2"
+  *   whitelisting, which relies on pattern-matching against symbol
+  *   names to work.  (One situation where gcc can autogenerate ELF
+  *   local symbols is when "-fsection-anchors" is used.)
    **/
   static int secref_whitelist(const struct sectioncheck *mismatch,
                             const char *fromsec, const char *fromsym,
@@@ -1197,6 -1202,10 +1205,10 @@@
             match(fromsym, optim_symbols))
                 return 0;
   
+       /* Check for pattern 6 */
+       if (strstarts(fromsym, ".L"))
+               return 0;
+ 
         return 1;
   }
   
@@@ -1924,10 -1933,6 +1936,10 @@@ static char *remove_dot(char *s
                 size_t m = strspn(s + n + 1, "0123456789");
                 if (m && (s[n + m] == '.' || s[n + m] == 0))
                         s[n] = 0;
+ +
+ +              /* strip trailing .lto */
+ +              if (strends(s, ".lto"))
+ +                      s[strlen(s) - 4] = '\0';
         }
         return s;
   }
diff --combined sound/core/pcm_lib.c

index ddf1cf3,9507307..e445483
--- 1/sound/core/pcm_lib.c
--- 2/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@@ -41,9 -41,6 +41,9 @@@
   #define trace_hw_ptr_error(substream, reason)
   #endif
   
+ +#define STRING_LENGTH_OF_INT 12
+ +#define MAX_USR_CTRL_CNT 128
+ +
   /*
    * fill ring buffer with silence
    * runtime->silence_start: starting pointer to silence area
@@@ -377,8 -374,7 +377,8 @@@ static int snd_pcm_update_hw_ptr0(struc
                  * the elapsed time to detect xruns.
                  */
                 jdelta = curr_jiffies - runtime->hw_ptr_jiffies;
- -              if (jdelta < runtime->hw_ptr_buffer_jiffies / 2)
+ +              if ((jdelta < runtime->hw_ptr_buffer_jiffies / 2) ||
+ +                  (runtime->hw_ptr_buffer_jiffies <= 0))
                         goto no_delta_check;
                 hdelta = jdelta - delta * HZ / runtime->rate;
                 xrun_threshold = runtime->hw_ptr_buffer_jiffies / 2 + 1;
@@@ -1801,11 -1797,6 +1801,11 @@@ static int snd_pcm_lib_ioctl_channel_in
         switch (runtime->access) {
         case SNDRV_PCM_ACCESS_MMAP_INTERLEAVED:
         case SNDRV_PCM_ACCESS_RW_INTERLEAVED:
+ +              if ((UINT_MAX/width) < info->channel) {
+ +                      snd_printd("%s: integer overflow while multiply\n",
+ +                                 __func__);
+ +                      return -EINVAL;
+ +              }
                 info->first = info->channel * width;
                 info->step = runtime->channels * width;
                 break;
@@@ -1813,12 -1804,6 +1813,12 @@@
         case SNDRV_PCM_ACCESS_RW_NONINTERLEAVED:
         {
                 size_t size = runtime->dma_bytes / runtime->channels;
+ +
+ +              if ((size > 0) && ((UINT_MAX/(size * 8)) < info->channel)) {
+ +                      snd_printd("%s: integer overflow while multiply\n",
+ +                                 __func__);
+ +                      return -EINVAL;
+ +              }
                 info->first = info->channel * size * 8;
                 info->step = width;
                 break;
@@@ -1892,11 -1877,14 +1892,14 @@@ void snd_pcm_period_elapsed(struct snd_
         struct snd_pcm_runtime *runtime;
         unsigned long flags;
   
-       if (PCM_RUNTIME_CHECK(substream))
+       if (snd_BUG_ON(!substream))
                 return;
-       runtime = substream->runtime;
   
         snd_pcm_stream_lock_irqsave(substream, flags);
+       if (PCM_RUNTIME_CHECK(substream))
+               goto _unlock;
+       runtime = substream->runtime;
+ 
         if (!snd_pcm_running(substream) ||
             snd_pcm_update_hw_ptr0(substream, 1) < 0)
                 goto _end;
@@@ -1907,6 -1895,7 +1910,7 @@@
   #endif
    _end:
         kill_fasync(&runtime->fasync, SIGIO, POLL_IN);
+  _unlock:
         snd_pcm_stream_unlock_irqrestore(substream, flags);
   }
   
@@@ -2135,9 -2124,6 +2139,9 @@@ static int pcm_sanity_check(struct snd_
         struct snd_pcm_runtime *runtime;
         if (PCM_RUNTIME_CHECK(substream))
                 return -ENXIO;
+ +      /* TODO: consider and -EINVAL here */
+ +      if (substream->hw_no_buffer)
+ +              snd_printd("%s: warning this PCM is host less\n", __func__);
         runtime = substream->runtime;
         if (snd_BUG_ON(!substream->ops->copy && !runtime->dma_area))
                 return -EINVAL;
@@@ -2588,23 -2574,6 +2592,23 @@@ static void pcm_chmap_ctl_private_free(
         kfree(info);
   }
   
+ +static int pcm_volume_ctl_info(struct snd_kcontrol *kcontrol,
+ +                              struct snd_ctl_elem_info *uinfo)
+ +{
+ +      uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ +      uinfo->count = 1;
+ +      uinfo->value.integer.min = 0;
+ +      uinfo->value.integer.max = 0x2000;
+ +      return 0;
+ +}
+ +
+ +static void pcm_volume_ctl_private_free(struct snd_kcontrol *kcontrol)
+ +{
+ +      struct snd_pcm_volume *info = snd_kcontrol_chip(kcontrol);
+ +      info->pcm->streams[info->stream].vol_kctl = NULL;
+ +      kfree(info);
+ +}
+ +
   /**
    * snd_pcm_add_chmap_ctls - create channel-mapping control elements
    * @pcm: the assigned PCM instance
@@@ -2664,166 -2633,3 +2668,166 @@@ int snd_pcm_add_chmap_ctls(struct snd_p
         return 0;
   }
   EXPORT_SYMBOL_GPL(snd_pcm_add_chmap_ctls);
+ +
+ +/**
+ + * snd_pcm_add_volume_ctls - create volume control elements
+ + * @pcm: the assigned PCM instance
+ + * @stream: stream direction
+ + * @max_length: the max length of the volume parameter of stream
+ + * @private_value: the value passed to each kcontrol's private_value field
+ + * @info_ret: store struct snd_pcm_volume instance if non-NULL
+ + *
+ + * Create volume control elements assigned to the given PCM stream(s).
+ + * Returns zero if succeed, or a negative error value.
+ + */
+ +int snd_pcm_add_volume_ctls(struct snd_pcm *pcm, int stream,
+ +                         const struct snd_pcm_volume_elem *volume,
+ +                         int max_length,
+ +                         unsigned long private_value,
+ +                         struct snd_pcm_volume **info_ret)
+ +{
+ +      struct snd_pcm_volume *info;
+ +      struct snd_kcontrol_new knew = {
+ +              .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ +              .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+ +                      SNDRV_CTL_ELEM_ACCESS_READWRITE,
+ +              .info = pcm_volume_ctl_info,
+ +      };
+ +      int err;
+ +      int size;
+ +
+ +      info = kzalloc(sizeof(*info), GFP_KERNEL);
+ +      if (!info)
+ +              return -ENOMEM;
+ +      info->pcm = pcm;
+ +      info->stream = stream;
+ +      info->volume = volume;
+ +      info->max_length = max_length;
+ +      size = sizeof("Playback ") + sizeof(" Volume") +
+ +              STRING_LENGTH_OF_INT*sizeof(char) + 1;
+ +      knew.name = kzalloc(size, GFP_KERNEL);
+ +      if (!knew.name) {
+ +              kfree(info);
+ +              return -ENOMEM;
+ +      }
+ +      if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+ +              snprintf((char *)knew.name, size, "%s %d %s",
+ +                      "Playback", pcm->device, "Volume");
+ +      else
+ +              snprintf((char *)knew.name, size, "%s %d %s",
+ +                      "Capture", pcm->device, "Volume");
+ +      knew.device = pcm->device;
+ +      knew.count = pcm->streams[stream].substream_count;
+ +      knew.private_value = private_value;
+ +      info->kctl = snd_ctl_new1(&knew, info);
+ +      if (!info->kctl) {
+ +              kfree(info);
+ +              kfree(knew.name);
+ +              return -ENOMEM;
+ +      }
+ +      info->kctl->private_free = pcm_volume_ctl_private_free;
+ +      err = snd_ctl_add(pcm->card, info->kctl);
+ +      if (err < 0) {
+ +              kfree(info);
+ +              kfree(knew.name);
+ +              return -ENOMEM;
+ +      }
+ +      pcm->streams[stream].vol_kctl = info->kctl;
+ +      if (info_ret)
+ +              *info_ret = info;
+ +      kfree(knew.name);
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(snd_pcm_add_volume_ctls);
+ +
+ +static int pcm_usr_ctl_info(struct snd_kcontrol *kcontrol,
+ +                          struct snd_ctl_elem_info *uinfo)
+ +{
+ +      uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ +      uinfo->count = MAX_USR_CTRL_CNT;
+ +      uinfo->value.integer.min = 0;
+ +      uinfo->value.integer.max = INT_MAX;
+ +      return 0;
+ +}
+ +
+ +static void pcm_usr_ctl_private_free(struct snd_kcontrol *kcontrol)
+ +{
+ +      struct snd_pcm_usr *info = snd_kcontrol_chip(kcontrol);
+ +      info->pcm->streams[info->stream].usr_kctl = NULL;
+ +      kfree(info);
+ +}
+ +
+ +/**
+ + * snd_pcm_add_usr_ctls - create user control elements
+ + * @pcm: the assigned PCM instance
+ + * @stream: stream direction
+ + * @max_length: the max length of the user parameter of stream
+ + * @private_value: the value passed to each kcontrol's private_value field
+ + * @info_ret: store struct snd_pcm_usr instance if non-NULL
+ + *
+ + * Create usr control elements assigned to the given PCM stream(s).
+ + * Returns zero if succeed, or a negative error value.
+ + */
+ +int snd_pcm_add_usr_ctls(struct snd_pcm *pcm, int stream,
+ +                       const struct snd_pcm_usr_elem *usr,
+ +                       int max_length, int max_kctrl_str_len,
+ +                       unsigned long private_value,
+ +                       struct snd_pcm_usr **info_ret)
+ +{
+ +      struct snd_pcm_usr *info;
+ +      struct snd_kcontrol_new knew = {
+ +              .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ +              .access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+ +              .info = pcm_usr_ctl_info,
+ +      };
+ +      int err;
+ +      char *buf;
+ +
+ +      info = kzalloc(sizeof(*info), GFP_KERNEL);
+ +      if (!info) {
+ +              pr_err("%s: snd_pcm_usr alloc failed\n", __func__);
+ +              return -ENOMEM;
+ +      }
+ +      info->pcm = pcm;
+ +      info->stream = stream;
+ +      info->usr = usr;
+ +      info->max_length = max_length;
+ +      buf = kzalloc(max_kctrl_str_len, GFP_KERNEL);
+ +      if (!buf) {
+ +              pr_err("%s: buffer allocation failed\n", __func__);
+ +              kfree(info);
+ +              return -ENOMEM;
+ +      }
+ +      knew.name = buf;
+ +      if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+ +              snprintf(buf, max_kctrl_str_len, "%s %d %s",
+ +                      "Playback", pcm->device, "User kcontrol");
+ +      else
+ +              snprintf(buf, max_kctrl_str_len, "%s %d %s",
+ +                      "Capture", pcm->device, "User kcontrol");
+ +      knew.device = pcm->device;
+ +      knew.count = pcm->streams[stream].substream_count;
+ +      knew.private_value = private_value;
+ +      info->kctl = snd_ctl_new1(&knew, info);
+ +      if (!info->kctl) {
+ +              kfree(info);
+ +              kfree(knew.name);
+ +              pr_err("%s: snd_ctl_new failed\n", __func__);
+ +              return -ENOMEM;
+ +      }
+ +      info->kctl->private_free = pcm_usr_ctl_private_free;
+ +      err = snd_ctl_add(pcm->card, info->kctl);
+ +      if (err < 0) {
+ +              kfree(info);
+ +              kfree(knew.name);
+ +              pr_err("%s: snd_ctl_add failed:%d\n", __func__,
+ +                      err);
+ +              return -ENOMEM;
+ +      }
+ +      pcm->streams[stream].usr_kctl = info->kctl;
+ +      if (info_ret)
+ +              *info_ret = info;
+ +      kfree(knew.name);
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL(snd_pcm_add_usr_ctls);
author	0ranko0P <ranko0p@outlook.com>
	Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)
committer	0ranko0P <ranko0p@outlook.com>
	Fri, 3 Jan 2020 14:15:27 +0000 (22:15 +0800)
		1	2
Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/include/asm/uaccess.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/ath/ar5523/ar5523.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/wireless/iwlwifi/mvm/mac80211.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/thermal/thermal_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/tty/serial/msm_serial.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/tty/serial/serial_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/core/hub.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/gadget/configfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/gadget/function/u_serial.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/host/xhci-hub.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/host/xhci-mem.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/host/xhci-ring.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/host/xhci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/usb/host/xhci.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/hdmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/virtio/virtio_balloon.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/cifs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/fuse_i.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/array.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/dma-mapping.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/regulator/consumer.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/serial_core.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/thread_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/ip.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/tcp.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/workqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/bridge/br_device.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/devinet.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ip_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_timer.c	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/mod/modpost.c	patch \|	diff1 \|	diff2 \|	blob \| history
sound/core/pcm_lib.c	patch \|	diff1 \|	diff2 \|	blob \| history