Merge branches 'acpi-scan', 'acpi-sysfs', 'acpi-wdat' and 'acpi-tables'

author Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)

committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)
author Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)
committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile

index c75e5d6..a6eb7dc 100644 (file)
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml  \
             kernel-api.xml filesystems.xml lsm.xml kgdb.xml \
             gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
             genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-           80211.xml sh.xml regulator.xml w1.xml \
+           sh.xml regulator.xml w1.xml \
             writing_musb_glue_layer.xml iio.xml
  
  ifeq ($(DOCBOOKS),)
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt

index a445da0..3f76c0c 100644 (file)
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -151,7 +151,7 @@ bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
  #else
         const u16 *a = (const u16 *)addr1;
         const u16 *b = (const u16 *)addr2;
-       return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+       return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
  #endif
  }
  
diff --git a/Makefile b/Makefile

index ec411ba..5470d59 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
  PATCHLEVEL = 10
  SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
  NAME = Roaring Lionus
  
  # *DOCUMENTATION*
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h

new file mode 100644 (file)

index 0000000..df411f3
--- /dev/null
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -0,0 +1,65 @@
+#ifndef __ASM_ASM_UACCESS_H
+#define __ASM_ASM_UACCESS_H
+
+#include <asm/alternative.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/sysreg.h>
+#include <asm/assembler.h>
+
+/*
+ * User access enabling/disabling macros.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+       .macro  __uaccess_ttbr0_disable, tmp1
+       mrs     \tmp1, ttbr1_el1                // swapper_pg_dir
+       add     \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
+       msr     ttbr0_el1, \tmp1                // set reserved TTBR0_EL1
+       isb
+       .endm
+
+       .macro  __uaccess_ttbr0_enable, tmp1
+       get_thread_info \tmp1
+       ldr     \tmp1, [\tmp1, #TSK_TI_TTBR0]   // load saved TTBR0_EL1
+       msr     ttbr0_el1, \tmp1                // set the non-PAN TTBR0_EL1
+       isb
+       .endm
+
+       .macro  uaccess_ttbr0_disable, tmp1
+alternative_if_not ARM64_HAS_PAN
+       __uaccess_ttbr0_disable \tmp1
+alternative_else_nop_endif
+       .endm
+
+       .macro  uaccess_ttbr0_enable, tmp1, tmp2
+alternative_if_not ARM64_HAS_PAN
+       save_and_disable_irq \tmp2              // avoid preemption
+       __uaccess_ttbr0_enable \tmp1
+       restore_irq \tmp2
+alternative_else_nop_endif
+       .endm
+#else
+       .macro  uaccess_ttbr0_disable, tmp1
+       .endm
+
+       .macro  uaccess_ttbr0_enable, tmp1, tmp2
+       .endm
+#endif
+
+/*
+ * These macros are no-ops when UAO is present.
+ */
+       .macro  uaccess_disable_not_uao, tmp1
+       uaccess_ttbr0_disable \tmp1
+alternative_if ARM64_ALT_PAN_NOT_UAO
+       SET_PSTATE_PAN(1)
+alternative_else_nop_endif
+       .endm
+
+       .macro  uaccess_enable_not_uao, tmp1, tmp2
+       uaccess_ttbr0_enable \tmp1, \tmp2
+alternative_if ARM64_ALT_PAN_NOT_UAO
+       SET_PSTATE_PAN(0)
+alternative_else_nop_endif
+       .endm
+
+#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h

index d26750c..46da3ea 100644 (file)
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -22,8 +22,6 @@
  #include <asm/kernel-pgtable.h>
  #include <asm/sysreg.h>
  
-#ifndef __ASSEMBLY__
-
  /*
   * User space memory access functions
   */
@@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
  extern __must_check long strlen_user(const char __user *str);
  extern __must_check long strnlen_user(const char __user *str, long n);
  
-#else  /* __ASSEMBLY__ */
-
-#include <asm/assembler.h>
-
-/*
- * User access enabling/disabling macros.
- */
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-       .macro  __uaccess_ttbr0_disable, tmp1
-       mrs     \tmp1, ttbr1_el1                // swapper_pg_dir
-       add     \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
-       msr     ttbr0_el1, \tmp1                // set reserved TTBR0_EL1
-       isb
-       .endm
-
-       .macro  __uaccess_ttbr0_enable, tmp1
-       get_thread_info \tmp1
-       ldr     \tmp1, [\tmp1, #TSK_TI_TTBR0]   // load saved TTBR0_EL1
-       msr     ttbr0_el1, \tmp1                // set the non-PAN TTBR0_EL1
-       isb
-       .endm
-
-       .macro  uaccess_ttbr0_disable, tmp1
-alternative_if_not ARM64_HAS_PAN
-       __uaccess_ttbr0_disable \tmp1
-alternative_else_nop_endif
-       .endm
-
-       .macro  uaccess_ttbr0_enable, tmp1, tmp2
-alternative_if_not ARM64_HAS_PAN
-       save_and_disable_irq \tmp2              // avoid preemption
-       __uaccess_ttbr0_enable \tmp1
-       restore_irq \tmp2
-alternative_else_nop_endif
-       .endm
-#else
-       .macro  uaccess_ttbr0_disable, tmp1
-       .endm
-
-       .macro  uaccess_ttbr0_enable, tmp1, tmp2
-       .endm
-#endif
-
-/*
- * These macros are no-ops when UAO is present.
- */
-       .macro  uaccess_disable_not_uao, tmp1
-       uaccess_ttbr0_disable \tmp1
-alternative_if ARM64_ALT_PAN_NOT_UAO
-       SET_PSTATE_PAN(1)
-alternative_else_nop_endif
-       .endm
-
-       .macro  uaccess_enable_not_uao, tmp1, tmp2
-       uaccess_ttbr0_enable \tmp1, \tmp2
-alternative_if ARM64_ALT_PAN_NOT_UAO
-       SET_PSTATE_PAN(0)
-alternative_else_nop_endif
-       .endm
-
-#endif /* __ASSEMBLY__ */
-
  #endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

index a7504f4..923841f 100644 (file)
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -31,7 +31,7 @@
  #include <asm/memory.h>
  #include <asm/ptrace.h>
  #include <asm/thread_info.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  #include <asm/unistd.h>
  
  /*
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S

index add4a13..e88fb99 100644 (file)
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,7 +17,7 @@
   */
  #include <linux/linkage.h>
  
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  
         .text
  
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S

index fd6cd05..4b5d826 100644 (file)
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -17,7 +17,7 @@
  #include <linux/linkage.h>
  
  #include <asm/cache.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  
  /*
   * Copy from user space to a kernel buffer (alignment handled by the hardware)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S

index d828540..47184c3 100644 (file)
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -19,7 +19,7 @@
  #include <linux/linkage.h>
  
  #include <asm/cache.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  
  /*
   * Copy from user space to user space (alignment handled by the hardware)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S

index 3e6ae26..351f076 100644 (file)
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -17,7 +17,7 @@
  #include <linux/linkage.h>
  
  #include <asm/cache.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  
  /*
   * Copy to user space from a kernel buffer (alignment handled by the hardware)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S

index 17f422a..83c27b6 100644 (file)
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -23,7 +23,7 @@
  #include <asm/assembler.h>
  #include <asm/cpufeature.h>
  #include <asm/alternative.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  
  /*
   *     flush_icache_range(start,end)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S

index 47cf3f9..947830a 100644 (file)
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -49,7 +49,7 @@
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
-#include <linux/uaccess.h>
+#include <asm/asm-uaccess.h>
  #include <xen/interface/xen.h>
  
  
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h

index 68557f5..8540227 100644 (file)
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
         asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
  }
  
+static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+{
+       bool negative;
+       asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+               CC_SET(s)
+               : CC_OUT(s) (negative), ADDR
+               : "ir" ((char) ~(1 << nr)) : "memory");
+       return negative;
+}
+
+// Let everybody know we have it
+#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+
  /*
   * __clear_bit_unlock - Clears a bit in memory
   * @nr: Bit to clear
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c

index ffacfdc..a5fd137 100644 (file)
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
         const char *name = get_name(bank, NULL);
         int err = 0;
  
+       if (!dev)
+               return -ENODEV;
+
         if (is_shared_bank(bank)) {
                 nb = node_to_amd_nb(amd_get_nb_id(cpu));
  
diff --git a/crypto/testmgr.c b/crypto/testmgr.c

index f616ad7..44e888b 100644 (file)
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1461,16 +1461,25 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
         for (i = 0; i < ctcount; i++) {
                 unsigned int dlen = COMP_BUF_SIZE;
                 int ilen = ctemplate[i].inlen;
+               void *input_vec;
  
+               input_vec = kmalloc(ilen, GFP_KERNEL);
+               if (!input_vec) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               memcpy(input_vec, ctemplate[i].input, ilen);
                 memset(output, 0, dlen);
                 init_completion(&result.completion);
-               sg_init_one(&src, ctemplate[i].input, ilen);
+               sg_init_one(&src, input_vec, ilen);
                 sg_init_one(&dst, output, dlen);
  
                 req = acomp_request_alloc(tfm);
                 if (!req) {
                         pr_err("alg: acomp: request alloc failed for %s\n",
                                algo);
+                       kfree(input_vec);
                         ret = -ENOMEM;
                         goto out;
                 }
@@ -1483,6 +1492,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                 if (ret) {
                         pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
                                i + 1, algo, -ret);
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
@@ -1491,6 +1501,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                         pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
                                i + 1, algo, req->dlen);
                         ret = -EINVAL;
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
@@ -1500,26 +1511,37 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                                i + 1, algo);
                         hexdump(output, req->dlen);
                         ret = -EINVAL;
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
  
+               kfree(input_vec);
                 acomp_request_free(req);
         }
  
         for (i = 0; i < dtcount; i++) {
                 unsigned int dlen = COMP_BUF_SIZE;
                 int ilen = dtemplate[i].inlen;
+               void *input_vec;
+
+               input_vec = kmalloc(ilen, GFP_KERNEL);
+               if (!input_vec) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
  
+               memcpy(input_vec, dtemplate[i].input, ilen);
                 memset(output, 0, dlen);
                 init_completion(&result.completion);
-               sg_init_one(&src, dtemplate[i].input, ilen);
+               sg_init_one(&src, input_vec, ilen);
                 sg_init_one(&dst, output, dlen);
  
                 req = acomp_request_alloc(tfm);
                 if (!req) {
                         pr_err("alg: acomp: request alloc failed for %s\n",
                                algo);
+                       kfree(input_vec);
                         ret = -ENOMEM;
                         goto out;
                 }
@@ -1532,6 +1554,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                 if (ret) {
                         pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n",
                                i + 1, algo, -ret);
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
@@ -1540,6 +1563,7 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                         pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n",
                                i + 1, algo, req->dlen);
                         ret = -EINVAL;
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
@@ -1549,10 +1573,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
                                i + 1, algo);
                         hexdump(output, req->dlen);
                         ret = -EINVAL;
+                       kfree(input_vec);
                         acomp_request_free(req);
                         goto out;
                 }
  
+               kfree(input_vec);
                 acomp_request_free(req);
         }
  
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c

index 13caebd..8c4e0a1 100644 (file)
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -114,7 +114,7 @@ void __init acpi_watchdog_init(void)
         pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE,
                                                resources, nresources);
         if (IS_ERR(pdev))
-               pr_err("Failed to create platform device\n");
+               pr_err("Device creation failed: %ld\n", PTR_ERR(pdev));
  
         kfree(resources);
  
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c

index f8d6564..fb19e1c 100644 (file)
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -98,7 +98,15 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
         if (check_children && list_empty(&adev->children))
                 return -ENODEV;
  
-       return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
+       /*
+        * If the device has a _HID (or _CID) returning a valid ACPI/PNP
+        * device ID, it is better to make it look less attractive here, so that
+        * the other device with the same _ADR value (that may not have a valid
+        * device ID) can be matched going forward.  [This means a second spec
+        * violation in a row, so whatever we do here is best effort anyway.]
+        */
+       return sta_present && list_empty(&adev->pnp.ids) ?
+                       FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
  }
  
  struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
@@ -250,7 +258,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
         return 0;
  
   err:
-       acpi_dma_deconfigure(dev);
         ACPI_COMPANION_SET(dev, NULL);
         put_device(dev);
         put_device(&acpi_dev->dev);
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h

index a768da7..b7872f6 100644 (file)
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -273,7 +273,8 @@ struct mv_cesa_op_ctx {
  #define CESA_TDMA_SRC_IN_SRAM                  BIT(30)
  #define CESA_TDMA_END_OF_REQ                   BIT(29)
  #define CESA_TDMA_BREAK_CHAIN                  BIT(28)
-#define CESA_TDMA_TYPE_MSK                     GENMASK(27, 0)
+#define CESA_TDMA_SET_STATE                    BIT(27)
+#define CESA_TDMA_TYPE_MSK                     GENMASK(26, 0)
  #define CESA_TDMA_DUMMY                                0
  #define CESA_TDMA_DATA                         1
  #define CESA_TDMA_OP                           2
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c

index 317cf02..77c0fb9 100644 (file)
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
         sreq->offset = 0;
  }
  
+static void mv_cesa_ahash_dma_step(struct ahash_request *req)
+{
+       struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+       struct mv_cesa_req *base = &creq->base;
+
+       /* We must explicitly set the digest state. */
+       if (base->chain.first->flags & CESA_TDMA_SET_STATE) {
+               struct mv_cesa_engine *engine = base->engine;
+               int i;
+
+               /* Set the hash state in the IVDIG regs. */
+               for (i = 0; i < ARRAY_SIZE(creq->state); i++)
+                       writel_relaxed(creq->state[i], engine->regs +
+                                      CESA_IVDIG(i));
+       }
+
+       mv_cesa_dma_step(base);
+}
+
  static void mv_cesa_ahash_step(struct crypto_async_request *req)
  {
         struct ahash_request *ahashreq = ahash_request_cast(req);
         struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
  
         if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ)
-               mv_cesa_dma_step(&creq->base);
+               mv_cesa_ahash_dma_step(ahashreq);
         else
                 mv_cesa_ahash_std_step(ahashreq);
  }
@@ -584,12 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
         struct mv_cesa_ahash_dma_iter iter;
         struct mv_cesa_op_ctx *op = NULL;
         unsigned int frag_len;
+       bool set_state = false;
         int ret;
         u32 type;
  
         basereq->chain.first = NULL;
         basereq->chain.last = NULL;
  
+       if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl))
+               set_state = true;
+
         if (creq->src_nents) {
                 ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
                                  DMA_TO_DEVICE);
@@ -683,6 +706,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
         if (type != CESA_TDMA_RESULT)
                 basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
  
+       if (set_state) {
+               /*
+                * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to
+                * let the step logic know that the IVDIG registers should be
+                * explicitly set before launching a TDMA chain.
+                */
+               basereq->chain.first->flags |= CESA_TDMA_SET_STATE;
+       }
+
         return 0;
  
  err_free_tdma:
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c

index 4416b88..c76375f 100644 (file)
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine,
                 last->next = dreq->chain.first;
                 engine->chain.last = dreq->chain.last;
  
-               if (!(last->flags & CESA_TDMA_BREAK_CHAIN))
+               /*
+                * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on
+                * the last element of the current chain, or if the request
+                * being queued needs the IV regs to be set before lauching
+                * the request.
+                */
+               if (!(last->flags & CESA_TDMA_BREAK_CHAIN) &&
+                   !(dreq->chain.first->flags & CESA_TDMA_SET_STATE))
                         last->next_dma = dreq->chain.first->cur_dma;
         }
  }
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c

index a88576d..8ccbd70 100644 (file)
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -903,8 +903,10 @@ int __init detect_intel_iommu(void)
                 x86_init.iommu.iommu_init = intel_iommu_init;
  #endif
  
-       acpi_put_table(dmar_tbl);
-       dmar_tbl = NULL;
+       if (dmar_tbl) {
+               acpi_put_table(dmar_tbl);
+               dmar_tbl = NULL;
+       }
         up_write(&dmar_global_lock);
  
         return ret ? 1 : -ENODEV;
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c

index cbeea91..8037426 100644 (file)
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work)
                                 DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR,
                                 &lp->rx_dma_regs->dmasm);
  
-       korina_free_ring(dev);
-
         napi_disable(&lp->napi);
  
+       korina_free_ring(dev);
+
         if (korina_init(dev) < 0) {
                 printk(KERN_ERR "%s: cannot restart device\n", dev->name);
                 return;
@@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev)
         tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR;
         writel(tmp, &lp->rx_dma_regs->dmasm);
  
-       korina_free_ring(dev);
-
         napi_disable(&lp->napi);
  
         cancel_work_sync(&lp->restart_task);
  
+       korina_free_ring(dev);
+
         free_irq(lp->rx_irq, dev);
         free_irq(lp->tx_irq, dev);
         free_irq(lp->ovr_irq, dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

index bcd9553..edbe200 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev)
  
         /* Configure tx cq's and rings */
         for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
-               u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1;
+               u8 num_tx_rings_p_up = t == TX ?
+                       priv->num_tx_rings_p_up : priv->tx_ring_num[t];
  
                 for (i = 0; i < priv->tx_ring_num[t]; i++) {
                         /* Configure cq */
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c

index f9b97f5..44389c9 100644 (file)
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -326,6 +326,7 @@ enum cfg_version {
  static const struct pci_device_id rtl8169_pci_tbl[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8129), 0, 0, RTL_CFG_0 },
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8136), 0, 0, RTL_CFG_2 },
+       { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8161), 0, 0, RTL_CFG_1 },
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8167), 0, 0, RTL_CFG_0 },
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8168), 0, 0, RTL_CFG_1 },
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     0x8169), 0, 0, RTL_CFG_0 },
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

index fda01f7..b0344c2 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
         unsigned int mii_address = priv->hw->mii.addr;
         unsigned int mii_data = priv->hw->mii.data;
  
-       u32 value = MII_WRITE | MII_BUSY;
+       u32 value = MII_BUSY;
  
         value |= (phyaddr << priv->hw->mii.addr_shift)
                 & priv->hw->mii.addr_mask;
@@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
                 & priv->hw->mii.clk_csr_mask;
         if (priv->plat->has_gmac4)
                 value |= MII_GMAC4_WRITE;
+       else
+               value |= MII_WRITE;
  
         /* Wait until any existing MII operation is complete */
         if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h

index 031093e..dbfbb33 100644 (file)
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -99,6 +99,11 @@ struct ipvl_port {
         int                     count;
  };
  
+struct ipvl_skb_cb {
+       bool tx_pkt;
+};
+#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
+
  static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
  {
         return rcu_dereference(d->rx_handler_data);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c

index b4e9907..83ce74a 100644 (file)
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work)
         unsigned int mac_hash;
         int ret;
         u8 pkt_type;
-       bool hlocal, dlocal;
+       bool tx_pkt;
  
         __skb_queue_head_init(&list);
  
@@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work)
         spin_unlock_bh(&port->backlog.lock);
  
         while ((skb = __skb_dequeue(&list)) != NULL) {
+               struct net_device *dev = skb->dev;
+               bool consumed = false;
+
                 ethh = eth_hdr(skb);
-               hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr);
+               tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
                 mac_hash = ipvlan_mac_hash(ethh->h_dest);
  
                 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
@@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work)
                 else
                         pkt_type = PACKET_MULTICAST;
  
-               dlocal = false;
                 rcu_read_lock();
                 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
-                       if (hlocal && (ipvlan->dev == skb->dev)) {
-                               dlocal = true;
+                       if (tx_pkt && (ipvlan->dev == skb->dev))
                                 continue;
-                       }
                         if (!test_bit(mac_hash, ipvlan->mac_filters))
                                 continue;
-
+                       if (!(ipvlan->dev->flags & IFF_UP))
+                               continue;
                         ret = NET_RX_DROP;
                         len = skb->len + ETH_HLEN;
                         nskb = skb_clone(skb, GFP_ATOMIC);
-                       if (!nskb)
-                               goto acct;
-
-                       nskb->pkt_type = pkt_type;
-                       nskb->dev = ipvlan->dev;
-                       if (hlocal)
-                               ret = dev_forward_skb(ipvlan->dev, nskb);
-                       else
-                               ret = netif_rx(nskb);
-acct:
+                       local_bh_disable();
+                       if (nskb) {
+                               consumed = true;
+                               nskb->pkt_type = pkt_type;
+                               nskb->dev = ipvlan->dev;
+                               if (tx_pkt)
+                                       ret = dev_forward_skb(ipvlan->dev, nskb);
+                               else
+                                       ret = netif_rx(nskb);
+                       }
                         ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+                       local_bh_enable();
                 }
                 rcu_read_unlock();
  
-               if (dlocal) {
+               if (tx_pkt) {
                         /* If the packet originated here, send it out. */
                         skb->dev = port->dev;
                         skb->pkt_type = pkt_type;
                         dev_queue_xmit(skb);
                 } else {
-                       kfree_skb(skb);
+                       if (consumed)
+                               consume_skb(skb);
+                       else
+                               kfree_skb(skb);
                 }
+               if (dev)
+                       dev_put(dev);
         }
  }
  
@@ -470,15 +477,24 @@ out:
  }
  
  static void ipvlan_multicast_enqueue(struct ipvl_port *port,
-                                    struct sk_buff *skb)
+                                    struct sk_buff *skb, bool tx_pkt)
  {
         if (skb->protocol == htons(ETH_P_PAUSE)) {
                 kfree_skb(skb);
                 return;
         }
  
+       /* Record that the deferred packet is from TX or RX path. By
+        * looking at mac-addresses on packet will lead to erronus decisions.
+        * (This would be true for a loopback-mode on master device or a
+        * hair-pin mode of the switch.)
+        */
+       IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
+
         spin_lock(&port->backlog.lock);
         if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
+               if (skb->dev)
+                       dev_hold(skb->dev);
                 __skb_queue_tail(&port->backlog, skb);
                 spin_unlock(&port->backlog.lock);
                 schedule_work(&port->wq);
@@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
  
         } else if (is_multicast_ether_addr(eth->h_dest)) {
                 ipvlan_skb_crossing_ns(skb, NULL);
-               ipvlan_multicast_enqueue(ipvlan->port, skb);
+               ipvlan_multicast_enqueue(ipvlan->port, skb, true);
                 return NET_XMIT_SUCCESS;
         }
  
@@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
                          */
                         if (nskb) {
                                 ipvlan_skb_crossing_ns(nskb, NULL);
-                               ipvlan_multicast_enqueue(port, nskb);
+                               ipvlan_multicast_enqueue(port, nskb, false);
                         }
                 }
         } else {
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c

index 693ec5b..8b0f993 100644 (file)
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -135,6 +135,7 @@ err:
  static void ipvlan_port_destroy(struct net_device *dev)
  {
         struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
+       struct sk_buff *skb;
  
         dev->priv_flags &= ~IFF_IPVLAN_MASTER;
         if (port->mode == IPVLAN_MODE_L3S) {
@@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
         }
         netdev_rx_handler_unregister(dev);
         cancel_work_sync(&port->wq);
-       __skb_queue_purge(&port->backlog);
+       while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
+               if (skb->dev)
+                       dev_put(skb->dev);
+               kfree_skb(skb);
+       }
         kfree(port);
  }
  
diff --git a/fs/dax.c b/fs/dax.c

index a8732fb..5c74f60 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
                 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
  }
  
+static int __dax_invalidate_mapping_entry(struct address_space *mapping,
+                                         pgoff_t index, bool trunc)
+{
+       int ret = 0;
+       void *entry;
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+
+       spin_lock_irq(&mapping->tree_lock);
+       entry = get_unlocked_mapping_entry(mapping, index, NULL);
+       if (!entry || !radix_tree_exceptional_entry(entry))
+               goto out;
+       if (!trunc &&
+           (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
+            radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+               goto out;
+       radix_tree_delete(page_tree, index);
+       mapping->nrexceptional--;
+       ret = 1;
+out:
+       put_unlocked_mapping_entry(mapping, index, entry);
+       spin_unlock_irq(&mapping->tree_lock);
+       return ret;
+}
  /*
   * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
   * entry to get unlocked before deleting it.
   */
  int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
  {
-       void *entry;
+       int ret = __dax_invalidate_mapping_entry(mapping, index, true);
  
-       spin_lock_irq(&mapping->tree_lock);
-       entry = get_unlocked_mapping_entry(mapping, index, NULL);
         /*
          * This gets called from truncate / punch_hole path. As such, the caller
          * must hold locks protecting against concurrent modifications of the
@@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
          * caller has seen exceptional entry for this index, we better find it
          * at that index as well...
          */
-       if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) {
-               spin_unlock_irq(&mapping->tree_lock);
-               return 0;
-       }
-       radix_tree_delete(&mapping->page_tree, index);
+       WARN_ON_ONCE(!ret);
+       return ret;
+}
+
+/*
+ * Invalidate exceptional DAX entry if easily possible. This handles DAX
+ * entries for invalidate_inode_pages() so we evict the entry only if we can
+ * do so without blocking.
+ */
+int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
+{
+       int ret = 0;
+       void *entry, **slot;
+       struct radix_tree_root *page_tree = &mapping->page_tree;
+
+       spin_lock_irq(&mapping->tree_lock);
+       entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
+       if (!entry || !radix_tree_exceptional_entry(entry) ||
+           slot_locked(mapping, slot))
+               goto out;
+       if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
+           radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+               goto out;
+       radix_tree_delete(page_tree, index);
         mapping->nrexceptional--;
+       ret = 1;
+out:
         spin_unlock_irq(&mapping->tree_lock);
-       dax_wake_mapping_entry_waiter(mapping, index, entry, true);
+       if (ret)
+               dax_wake_mapping_entry_waiter(mapping, index, entry, true);
+       return ret;
+}
  
-       return 1;
+/*
+ * Invalidate exceptional DAX entry if it is clean.
+ */
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+                                     pgoff_t index)
+{
+       return __dax_invalidate_mapping_entry(mapping, index, false);
  }
  
  /*
@@ -488,15 +539,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
   * otherwise it will simply fall out of the page cache under memory
   * pressure without ever having been dirtied.
   */
-static int dax_load_hole(struct address_space *mapping, void *entry,
+static int dax_load_hole(struct address_space *mapping, void **entry,
                          struct vm_fault *vmf)
  {
         struct page *page;
+       int ret;
  
         /* Hole page already exists? Return it...  */
-       if (!radix_tree_exceptional_entry(entry)) {
-               vmf->page = entry;
-               return VM_FAULT_LOCKED;
+       if (!radix_tree_exceptional_entry(*entry)) {
+               page = *entry;
+               goto out;
         }
  
         /* This will replace locked radix tree entry with a hole page */
@@ -504,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
                                    vmf->gfp_mask | __GFP_ZERO);
         if (!page)
                 return VM_FAULT_OOM;
+ out:
         vmf->page = page;
-       return VM_FAULT_LOCKED;
+       ret = finish_fault(vmf);
+       vmf->page = NULL;
+       *entry = page;
+       if (!ret) {
+               /* Grab reference for PTE that is now referencing the page */
+               get_page(page);
+               return VM_FAULT_NOPAGE;
+       }
+       return ret;
  }
  
  static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
@@ -934,6 +995,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
         if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
                 return -EIO;
  
+       /*
+        * Write can allocate block for an area which has a hole page mapped
+        * into page tables. We have to tear down these mappings so that data
+        * written by write(2) is visible in mmap.
+        */
+       if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
+               invalidate_inode_pages2_range(inode->i_mapping,
+                                             pos >> PAGE_SHIFT,
+                                             (end - 1) >> PAGE_SHIFT);
+       }
+
         while (pos < end) {
                 unsigned offset = pos & (PAGE_SIZE - 1);
                 struct blk_dax_ctl dax = { 0 };
@@ -992,23 +1064,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
         if (iov_iter_rw(iter) == WRITE)
                 flags |= IOMAP_WRITE;
  
-       /*
-        * Yes, even DAX files can have page cache attached to them:  A zeroed
-        * page is inserted into the pagecache when we have to serve a write
-        * fault on a hole.  It should never be dirtied and can simply be
-        * dropped from the pagecache once we get real data for the page.
-        *
-        * XXX: This is racy against mmap, and there's nothing we can do about
-        * it. We'll eventually need to shift this down even further so that
-        * we can check if we allocated blocks over a hole first.
-        */
-       if (mapping->nrpages) {
-               ret = invalidate_inode_pages2_range(mapping,
-                               pos >> PAGE_SHIFT,
-                               (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
-               WARN_ON_ONCE(ret);
-       }
-
         while (iov_iter_count(iter)) {
                 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
                                 iter, dax_iomap_actor);
@@ -1023,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
  }
  EXPORT_SYMBOL_GPL(dax_iomap_rw);
  
+static int dax_fault_return(int error)
+{
+       if (error == 0)
+               return VM_FAULT_NOPAGE;
+       if (error == -ENOMEM)
+               return VM_FAULT_OOM;
+       return VM_FAULT_SIGBUS;
+}
+
  /**
   * dax_iomap_fault - handle a page fault on a DAX file
   * @vma: The virtual memory area where the fault occurred
@@ -1055,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
         if (pos >= i_size_read(inode))
                 return VM_FAULT_SIGBUS;
  
-       entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
-       if (IS_ERR(entry)) {
-               error = PTR_ERR(entry);
-               goto out;
-       }
-
         if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
                 flags |= IOMAP_WRITE;
  
@@ -1071,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
          */
         error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
         if (error)
-               goto unlock_entry;
+               return dax_fault_return(error);
         if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
-               error = -EIO;           /* fs corruption? */
+               vmf_ret = dax_fault_return(-EIO);       /* fs corruption? */
+               goto finish_iomap;
+       }
+
+       entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
+       if (IS_ERR(entry)) {
+               vmf_ret = dax_fault_return(PTR_ERR(entry));
                 goto finish_iomap;
         }
  
@@ -1096,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                 }
  
                 if (error)
-                       goto finish_iomap;
+                       goto error_unlock_entry;
  
                 __SetPageUptodate(vmf->cow_page);
                 vmf_ret = finish_fault(vmf);
                 if (!vmf_ret)
                         vmf_ret = VM_FAULT_DONE_COW;
-               goto finish_iomap;
+               goto unlock_entry;
         }
  
         switch (iomap.type) {
@@ -1114,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                 }
                 error = dax_insert_mapping(mapping, iomap.bdev, sector,
                                 PAGE_SIZE, &entry, vma, vmf);
+               /* -EBUSY is fine, somebody else faulted on the same PTE */
+               if (error == -EBUSY)
+                       error = 0;
                 break;
         case IOMAP_UNWRITTEN:
         case IOMAP_HOLE:
                 if (!(vmf->flags & FAULT_FLAG_WRITE)) {
-                       vmf_ret = dax_load_hole(mapping, entry, vmf);
-                       break;
+                       vmf_ret = dax_load_hole(mapping, &entry, vmf);
+                       goto unlock_entry;
                 }
                 /*FALLTHRU*/
         default:
@@ -1128,31 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                 break;
         }
  
+ error_unlock_entry:
+       vmf_ret = dax_fault_return(error) | major;
+ unlock_entry:
+       put_locked_mapping_entry(mapping, vmf->pgoff, entry);
   finish_iomap:
         if (ops->iomap_end) {
-               if (error || (vmf_ret & VM_FAULT_ERROR)) {
-                       /* keep previous error */
-                       ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
-                                       &iomap);
-               } else {
-                       error = ops->iomap_end(inode, pos, PAGE_SIZE,
-                                       PAGE_SIZE, flags, &iomap);
-               }
-       }
- unlock_entry:
-       if (vmf_ret != VM_FAULT_LOCKED || error)
-               put_locked_mapping_entry(mapping, vmf->pgoff, entry);
- out:
-       if (error == -ENOMEM)
-               return VM_FAULT_OOM | major;
-       /* -EBUSY is fine, somebody else faulted on the same PTE */
-       if (error < 0 && error != -EBUSY)
-               return VM_FAULT_SIGBUS | major;
-       if (vmf_ret) {
-               WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
-               return vmf_ret;
+               int copied = PAGE_SIZE;
+
+               if (vmf_ret & VM_FAULT_ERROR)
+                       copied = 0;
+               /*
+                * The fault is done by now and there's no way back (other
+                * thread may be already happily using PTE we have installed).
+                * Just ignore error from ->iomap_end since we cannot do much
+                * with it.
+                */
+               ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
         }
-       return VM_FAULT_NOPAGE | major;
+       return vmf_ret;
  }
  EXPORT_SYMBOL_GPL(dax_iomap_fault);
  
@@ -1277,16 +1338,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                 goto fallback;
  
         /*
-        * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
-        * PMD or a HZP entry.  If it can't (because a 4k page is already in
-        * the tree, for instance), it will return -EEXIST and we just fall
-        * back to 4k entries.
-        */
-       entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
-       if (IS_ERR(entry))
-               goto fallback;
-
-       /*
          * Note that we don't use iomap_apply here.  We aren't doing I/O, only
          * setting up a mapping, so really we're using iomap_begin() as a way
          * to look up our filesystem block.
@@ -1294,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         pos = (loff_t)pgoff << PAGE_SHIFT;
         error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
         if (error)
-               goto unlock_entry;
+               goto fallback;
+
         if (iomap.offset + iomap.length < pos + PMD_SIZE)
                 goto finish_iomap;
  
+       /*
+        * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
+        * PMD or a HZP entry.  If it can't (because a 4k page is already in
+        * the tree, for instance), it will return -EEXIST and we just fall
+        * back to 4k entries.
+        */
+       entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+       if (IS_ERR(entry))
+               goto finish_iomap;
+
         vmf.pgoff = pgoff;
         vmf.flags = flags;
         vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
@@ -1310,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         case IOMAP_UNWRITTEN:
         case IOMAP_HOLE:
                 if (WARN_ON_ONCE(write))
-                       goto finish_iomap;
+                       goto unlock_entry;
                 result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
                                 &entry);
                 break;
@@ -1319,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                 break;
         }
  
+ unlock_entry:
+       put_locked_mapping_entry(mapping, pgoff, entry);
   finish_iomap:
         if (ops->iomap_end) {
-               if (result == VM_FAULT_FALLBACK) {
-                       ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
-                                       &iomap);
-               } else {
-                       error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
-                                       iomap_flags, &iomap);
-                       if (error)
-                               result = VM_FAULT_FALLBACK;
-               }
+               int copied = PMD_SIZE;
+
+               if (result == VM_FAULT_FALLBACK)
+                       copied = 0;
+               /*
+                * The fault is done by now and there's no way back (other
+                * thread may be already happily using PMD we have installed).
+                * Just ignore error from ->iomap_end since we cannot do much
+                * with it.
+                */
+               ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
+                               &iomap);
         }
- unlock_entry:
-       put_locked_mapping_entry(mapping, pgoff, entry);
   fallback:
         if (result == VM_FAULT_FALLBACK) {
                 split_huge_pmd(vma, pmd, address);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 0093ea2..f073bfc 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode,
                         mutex_unlock(&ei->truncate_mutex);
                         goto cleanup;
                 }
-       } else {
-               *new = true;
         }
+       *new = true;
  
         ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
         mutex_unlock(&ei->truncate_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index b5f1844..d663d3d 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -258,7 +258,6 @@ out:
  static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
         int result;
-       handle_t *handle = NULL;
         struct inode *inode = file_inode(vma->vm_file);
         struct super_block *sb = inode->i_sb;
         bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         if (write) {
                 sb_start_pagefault(sb);
                 file_update_time(vma->vm_file);
-               down_read(&EXT4_I(inode)->i_mmap_sem);
-               handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
-                                               EXT4_DATA_TRANS_BLOCKS(sb));
-       } else
-               down_read(&EXT4_I(inode)->i_mmap_sem);
-
-       if (IS_ERR(handle))
-               result = VM_FAULT_SIGBUS;
-       else
-               result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
-
-       if (write) {
-               if (!IS_ERR(handle))
-                       ext4_journal_stop(handle);
-               up_read(&EXT4_I(inode)->i_mmap_sem);
+       }
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       if (write)
                 sb_end_pagefault(sb);
-       } else
-               up_read(&EXT4_I(inode)->i_mmap_sem);
  
         return result;
  }
@@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
                                                 pmd_t *pmd, unsigned int flags)
  {
         int result;
-       handle_t *handle = NULL;
         struct inode *inode = file_inode(vma->vm_file);
         struct super_block *sb = inode->i_sb;
         bool write = flags & FAULT_FLAG_WRITE;
@@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
         if (write) {
                 sb_start_pagefault(sb);
                 file_update_time(vma->vm_file);
-               down_read(&EXT4_I(inode)->i_mmap_sem);
-               handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
-                               ext4_chunk_trans_blocks(inode,
-                                                       PMD_SIZE / PAGE_SIZE));
-       } else
-               down_read(&EXT4_I(inode)->i_mmap_sem);
-
-       if (IS_ERR(handle))
-               result = VM_FAULT_SIGBUS;
-       else {
-               result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
-                                            &ext4_iomap_ops);
         }
-
-       if (write) {
-               if (!IS_ERR(handle))
-                       ext4_journal_stop(handle);
-               up_read(&EXT4_I(inode)->i_mmap_sem);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
+                                    &ext4_iomap_ops);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       if (write)
                 sb_end_pagefault(sb);
-       } else
-               up_read(&EXT4_I(inode)->i_mmap_sem);
  
         return result;
  }
diff --git a/include/linux/dax.h b/include/linux/dax.h

index f97bcfe..24ad711 100644 (file)
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
  int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         struct iomap_ops *ops);
  int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+                                     pgoff_t index);
  void dax_wake_mapping_entry_waiter(struct address_space *mapping,
                 pgoff_t index, void *entry, bool wake_all);
  
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 7023142..a0934e6 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func);
  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                        const struct bpf_insn *patch, u32 len);
  void bpf_warn_invalid_xdp_action(u32 act);
-void bpf_warn_invalid_xdp_buffer(void);
  
  #ifdef CONFIG_BPF_JIT
  extern int bpf_jit_enable;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index c56b398..6b5818d 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,13 +73,13 @@
   */
  enum pageflags {
         PG_locked,              /* Page is locked. Don't touch. */
-       PG_waiters,             /* Page has waiters, check its waitqueue */
         PG_error,
         PG_referenced,
         PG_uptodate,
         PG_dirty,
         PG_lru,
         PG_active,
+       PG_waiters,             /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
         PG_slab,
         PG_owner_priv_1,        /* Owner use. If pagecache, fs may use*/
         PG_arch_1,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h

index f0cf5a1..0378e88 100644 (file)
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -110,6 +110,7 @@ struct netns_ipv4 {
         int sysctl_tcp_orphan_retries;
         int sysctl_tcp_fin_timeout;
         unsigned int sysctl_tcp_notsent_lowat;
+       int sysctl_tcp_tw_reuse;
  
         int sysctl_igmp_max_memberships;
         int sysctl_igmp_max_msf;
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 207147b..6061963 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3];
  extern int sysctl_tcp_rmem[3];
  extern int sysctl_tcp_app_win;
  extern int sysctl_tcp_adv_win_scale;
-extern int sysctl_tcp_tw_reuse;
  extern int sysctl_tcp_frto;
  extern int sysctl_tcp_low_latency;
  extern int sysctl_tcp_nometrics_save;
diff --git a/kernel/cpu.c b/kernel/cpu.c

index 042fd7e..f75c4d0 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1471,6 +1471,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                         bool multi_instance)
  {
         int cpu, ret = 0;
+       bool dynstate;
  
         if (cpuhp_cb_check(state) || !name)
                 return -EINVAL;
@@ -1480,6 +1481,12 @@ int __cpuhp_setup_state(enum cpuhp_state state,
         ret = cpuhp_store_callbacks(state, name, startup, teardown,
                                     multi_instance);
  
+       dynstate = state == CPUHP_AP_ONLINE_DYN;
+       if (ret > 0 && dynstate) {
+               state = ret;
+               ret = 0;
+       }
+
         if (ret || !invoke || !startup)
                 goto out;
  
@@ -1508,7 +1515,7 @@ out:
          * If the requested state is CPUHP_AP_ONLINE_DYN, return the
          * dynamically allocated state in case of success.
          */
-       if (!ret && state == CPUHP_AP_ONLINE_DYN)
+       if (!ret && dynstate)
                 return state;
         return ret;
  }
diff --git a/mm/filemap.c b/mm/filemap.c

index 82f26cd..d0e4d10 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
  }
  EXPORT_SYMBOL_GPL(add_page_wait_queue);
  
+#ifndef clear_bit_unlock_is_negative_byte
+
+/*
+ * PG_waiters is the high bit in the same byte as PG_lock.
+ *
+ * On x86 (and on many other architectures), we can clear PG_lock and
+ * test the sign bit at the same time. But if the architecture does
+ * not support that special operation, we just do this all by hand
+ * instead.
+ *
+ * The read of PG_waiters has to be after (or concurrently with) PG_locked
+ * being cleared, but a memory barrier should be unneccssary since it is
+ * in the same byte as PG_locked.
+ */
+static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
+{
+       clear_bit_unlock(nr, mem);
+       /* smp_mb__after_atomic(); */
+       return test_bit(PG_waiters, mem);
+}
+
+#endif
+
  /**
   * unlock_page - unlock a locked page
   * @page: the page
@@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
   * mechanism between PageLocked pages and PageWriteback pages is shared.
   * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
   *
- * The mb is necessary to enforce ordering between the clear_bit and the read
- * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
+ * Note that this depends on PG_waiters being the sign bit in the byte
+ * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
+ * clear the PG_locked bit and test PG_waiters at the same time fairly
+ * portably (architectures that do LL/SC can test any bit, while x86 can
+ * test the sign bit).
   */
  void unlock_page(struct page *page)
  {
+       BUILD_BUG_ON(PG_waiters != 7);
         page = compound_head(page);
         VM_BUG_ON_PAGE(!PageLocked(page), page);
-       clear_bit_unlock(PG_locked, &page->flags);
-       smp_mb__after_atomic();
-       wake_up_page(page, PG_locked);
+       if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
+               wake_up_page_bit(page, PG_locked);
  }
  EXPORT_SYMBOL(unlock_page);
  
diff --git a/mm/truncate.c b/mm/truncate.c

index fd97f1d..dd7b24e 100644 (file)
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -24,20 +24,12 @@
  #include <linux/rmap.h>
  #include "internal.h"
  
-static void clear_exceptional_entry(struct address_space *mapping,
-                                   pgoff_t index, void *entry)
+static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
+                              void *entry)
  {
         struct radix_tree_node *node;
         void **slot;
  
-       /* Handled by shmem itself */
-       if (shmem_mapping(mapping))
-               return;
-
-       if (dax_mapping(mapping)) {
-               dax_delete_mapping_entry(mapping, index);
-               return;
-       }
         spin_lock_irq(&mapping->tree_lock);
         /*
          * Regular page slots are stabilized by the page lock even
@@ -55,6 +47,56 @@ unlock:
         spin_unlock_irq(&mapping->tree_lock);
  }
  
+/*
+ * Unconditionally remove exceptional entry. Usually called from truncate path.
+ */
+static void truncate_exceptional_entry(struct address_space *mapping,
+                                      pgoff_t index, void *entry)
+{
+       /* Handled by shmem itself */
+       if (shmem_mapping(mapping))
+               return;
+
+       if (dax_mapping(mapping)) {
+               dax_delete_mapping_entry(mapping, index);
+               return;
+       }
+       clear_shadow_entry(mapping, index, entry);
+}
+
+/*
+ * Invalidate exceptional entry if easily possible. This handles exceptional
+ * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
+ * clean entries.
+ */
+static int invalidate_exceptional_entry(struct address_space *mapping,
+                                       pgoff_t index, void *entry)
+{
+       /* Handled by shmem itself */
+       if (shmem_mapping(mapping))
+               return 1;
+       if (dax_mapping(mapping))
+               return dax_invalidate_mapping_entry(mapping, index);
+       clear_shadow_entry(mapping, index, entry);
+       return 1;
+}
+
+/*
+ * Invalidate exceptional entry if clean. This handles exceptional entries for
+ * invalidate_inode_pages2() so for DAX it evicts only clean entries.
+ */
+static int invalidate_exceptional_entry2(struct address_space *mapping,
+                                        pgoff_t index, void *entry)
+{
+       /* Handled by shmem itself */
+       if (shmem_mapping(mapping))
+               return 1;
+       if (dax_mapping(mapping))
+               return dax_invalidate_mapping_entry_sync(mapping, index);
+       clear_shadow_entry(mapping, index, entry);
+       return 1;
+}
+
  /**
   * do_invalidatepage - invalidate part or all of a page
   * @page: the page which is affected
@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                 break;
  
                         if (radix_tree_exceptional_entry(page)) {
-                               clear_exceptional_entry(mapping, index, page);
+                               truncate_exceptional_entry(mapping, index,
+                                                          page);
                                 continue;
                         }
  
@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
                         }
  
                         if (radix_tree_exceptional_entry(page)) {
-                               clear_exceptional_entry(mapping, index, page);
+                               truncate_exceptional_entry(mapping, index,
+                                                          page);
                                 continue;
                         }
  
@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                 break;
  
                         if (radix_tree_exceptional_entry(page)) {
-                               clear_exceptional_entry(mapping, index, page);
+                               invalidate_exceptional_entry(mapping, index,
+                                                            page);
                                 continue;
                         }
  
@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                 break;
  
                         if (radix_tree_exceptional_entry(page)) {
-                               clear_exceptional_entry(mapping, index, page);
+                               if (!invalidate_exceptional_entry2(mapping,
+                                                                  index, page))
+                                       ret = -EBUSY;
                                 continue;
                         }
  
diff --git a/net/core/filter.c b/net/core/filter.c

index e6c412b..1969b3f 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act)
  }
  EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
  
-void bpf_warn_invalid_xdp_buffer(void)
-{
-       WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n");
-}
-EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
-
  static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                                         int src_reg, int ctx_off,
                                         struct bpf_insn *insn_buf,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index 80bc36b..22cbd61 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -433,13 +433,6 @@ static struct ctl_table ipv4_table[] = {
                 .extra2         = &tcp_adv_win_scale_max,
         },
         {
-               .procname       = "tcp_tw_reuse",
-               .data           = &sysctl_tcp_tw_reuse,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
-       {
                 .procname       = "tcp_frto",
                 .data           = &sysctl_tcp_frto,
                 .maxlen         = sizeof(int),
@@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec,
         },
+       {
+               .procname       = "tcp_tw_reuse",
+               .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
  #ifdef CONFIG_IP_ROUTE_MULTIPATH
         {
                 .procname       = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 30d81f5..fe9da4f 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,7 +84,6 @@
  #include <crypto/hash.h>
  #include <linux/scatterlist.h>
  
-int sysctl_tcp_tw_reuse __read_mostly;
  int sysctl_tcp_low_latency __read_mostly;
  
  #ifdef CONFIG_TCP_MD5SIG
@@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
            and use initial timestamp retrieved from peer table.
          */
         if (tcptw->tw_ts_recent_stamp &&
-           (!twp || (sysctl_tcp_tw_reuse &&
+           (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
                              get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
                 if (tp->write_seq == 0)
@@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net)
         net->ipv4.sysctl_tcp_orphan_retries = 0;
         net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
         net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+       net->ipv4.sysctl_tcp_tw_reuse = 0;
  
         return 0;
  fail:
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c

index 2d4c4d3..9c62b63 100644 (file)
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
         rcu_assign_pointer(flow->sf_acts, acts);
         packet->priority = flow->key.phy.priority;
         packet->mark = flow->key.phy.skb_mark;
-       packet->protocol = flow->key.eth.type;
  
         rcu_read_lock();
         dp = get_dp_rcu(net, ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c

index 08aa926..2c0a00f 100644 (file)
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
   * Returns 0 if it encounters a non-vlan or incomplete packet.
   * Returns 1 after successfully parsing vlan tag.
   */
-static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
+                         bool untag_vlan)
  {
         struct vlan_head *vh = (struct vlan_head *)skb->data;
  
@@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
         key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
         key_vh->tpid = vh->tpid;
  
-       __skb_pull(skb, sizeof(struct vlan_head));
+       if (unlikely(untag_vlan)) {
+               int offset = skb->data - skb_mac_header(skb);
+               u16 tci;
+               int err;
+
+               __skb_push(skb, offset);
+               err = __skb_vlan_pop(skb, &tci);
+               __skb_pull(skb, offset);
+               if (err)
+                       return err;
+               __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
+       } else {
+               __skb_pull(skb, sizeof(struct vlan_head));
+       }
         return 1;
  }
  
@@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
                 key->eth.vlan.tpid = skb->vlan_proto;
         } else {
                 /* Parse outer vlan tag in the non-accelerated case. */
-               res = parse_vlan_tag(skb, &key->eth.vlan);
+               res = parse_vlan_tag(skb, &key->eth.vlan, true);
                 if (res <= 0)
                         return res;
         }
  
         /* Parse inner vlan tag. */
-       res = parse_vlan_tag(skb, &key->eth.cvlan);
+       res = parse_vlan_tag(skb, &key->eth.cvlan, false);
         if (res <= 0)
                 return res;
  
@@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
         if (err)
                 return err;
  
-       if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
-               /* key_extract assumes that skb->protocol is set-up for
-                * layer 3 packets which is the case for other callers,
-                * in particular packets recieved from the network stack.
-                * Here the correct value can be set from the metadata
-                * extracted above.
-                */
-               skb->protocol = key->eth.type;
-       } else {
-               struct ethhdr *eth;
-
-               skb_reset_mac_header(skb);
-               eth = eth_hdr(skb);
-
-               /* Normally, setting the skb 'protocol' field would be
-                * handled by a call to eth_type_trans(), but it assumes
-                * there's a sending device, which we may not have.
-                */
-               if (eth_proto_is_802_3(eth->h_proto))
-                       skb->protocol = eth->h_proto;
-               else
-                       skb->protocol = htons(ETH_P_802_2);
-       }
+       /* key_extract assumes that skb->protocol is set-up for
+        * layer 3 packets which is the case for other callers,
+        * in particular packets received from the network stack.
+        * Here the correct value can be set from the metadata
+        * extracted above.
+        * For L2 packet key eth type would be zero. skb protocol
+        * would be set to correct value later during key-extact.
+        */
  
+       skb->protocol = key->eth.type;
         return key_extract(skb, key);
  }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 3fbba79..1ecdf80 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
         unsigned long cl;
         unsigned long fh;
         int err;
-       int tp_created = 0;
+       int tp_created;
  
         if ((n->nlmsg_type != RTM_GETTFILTER) &&
             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                 return -EPERM;
  
  replay:
+       tp_created = 0;
+
         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
         if (err < 0)
                 return err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index 333c5da..800caaa 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error)
         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
                 if (TIPC_SKB_CB(skb)->bytes_read) {
                         kfree_skb(skb);
-               } else {
-                       if (!tipc_sk_type_connectionless(sk) &&
-                           sk->sk_state != TIPC_DISCONNECTING) {
-                               tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-                               tipc_node_remove_conn(net, dnode, tsk->portid);
-                       }
-                       tipc_sk_respond(sk, skb, error);
+                       continue;
+               }
+               if (!tipc_sk_type_connectionless(sk) &&
+                   sk->sk_state != TIPC_DISCONNECTING) {
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+                       tipc_node_remove_conn(net, dnode, tsk->portid);
                 }
+               tipc_sk_respond(sk, skb, error);
         }
+
+       if (tipc_sk_type_connectionless(sk))
+               return;
+
         if (sk->sk_state != TIPC_DISCONNECTING) {
                 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
                                       TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
@@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error)
                                       tsk->portid, error);
                 if (skb)
                         tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
-               if (!tipc_sk_type_connectionless(sk)) {
-                       tipc_node_remove_conn(net, dnode, tsk->portid);
-                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-               }
+               tipc_node_remove_conn(net, dnode, tsk->portid);
+               tipc_set_sk_state(sk, TIPC_DISCONNECTING);
         }
  }
author	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
	Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
	Fri, 6 Jan 2017 13:36:30 +0000 (14:36 +0100)
Documentation/DocBook/Makefile		patch \| blob \| history
Documentation/unaligned-memory-access.txt		patch \| blob \| history
Makefile		patch \| blob \| history
arch/arm64/include/asm/asm-uaccess.h	[new file with mode: 0644]	patch \| blob
arch/arm64/include/asm/uaccess.h		patch \| blob \| history
arch/arm64/kernel/entry.S		patch \| blob \| history
arch/arm64/lib/clear_user.S		patch \| blob \| history
arch/arm64/lib/copy_from_user.S		patch \| blob \| history
arch/arm64/lib/copy_in_user.S		patch \| blob \| history
arch/arm64/lib/copy_to_user.S		patch \| blob \| history
arch/arm64/mm/cache.S		patch \| blob \| history
arch/arm64/xen/hypercall.S		patch \| blob \| history
arch/x86/include/asm/bitops.h		patch \| blob \| history
arch/x86/kernel/cpu/mcheck/mce_amd.c		patch \| blob \| history
crypto/testmgr.c		patch \| blob \| history
drivers/acpi/acpi_watchdog.c		patch \| blob \| history
drivers/acpi/glue.c		patch \| blob \| history
drivers/crypto/marvell/cesa.h		patch \| blob \| history
drivers/crypto/marvell/hash.c		patch \| blob \| history
drivers/crypto/marvell/tdma.c		patch \| blob \| history
drivers/iommu/dmar.c		patch \| blob \| history
drivers/net/ethernet/korina.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx4/en_netdev.c		patch \| blob \| history
drivers/net/ethernet/realtek/r8169.c		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c		patch \| blob \| history
drivers/net/ipvlan/ipvlan.h		patch \| blob \| history
drivers/net/ipvlan/ipvlan_core.c		patch \| blob \| history
drivers/net/ipvlan/ipvlan_main.c		patch \| blob \| history
fs/dax.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
include/linux/dax.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/linux/page-flags.h		patch \| blob \| history
include/net/netns/ipv4.h		patch \| blob \| history
include/net/tcp.h		patch \| blob \| history
kernel/cpu.c		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/truncate.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/openvswitch/datapath.c		patch \| blob \| history
net/openvswitch/flow.c		patch \| blob \| history
net/sched/cls_api.c		patch \| blob \| history
net/tipc/socket.c		patch \| blob \| history