From e275d2109cdaea8b4554b9eb8a828bdb8f8ba068 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 26 Oct 2020 10:08:47 +0200
Subject: bus: ti-sysc: Fix reset status check for modules with quirks

Commit d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and
wait for softreset bit") started showing a "OCP softreset timed out"
warning on enable if the interconnect target module is not out of reset.
This caused the warning to be often triggered for i2c and hdq while the
devices are working properly.

Turns out that some interconnect target modules seem to have an unusable
reset status bits unless the module specific reset quirks are activated.

Let's just skip the reset status check for those modules as we only want
to activate the reset quirks when doing a reset, and not on enable. This
way we don't see the bogus "OCP softreset timed out" warnings during boot.

Fixes: d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit")
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/ti-sysc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index c59999ce044e..240dce553a0b 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -50,6 +50,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_MODULE_QUIRK_ENA_RESETDONE	BIT(25)
 #define SYSC_MODULE_QUIRK_PRUSS		BIT(24)
 #define SYSC_MODULE_QUIRK_DSS_RESET	BIT(23)
 #define SYSC_MODULE_QUIRK_RTC_UNLOCK	BIT(22)
-- 
cgit v1.2.3


From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 2 Nov 2020 12:43:27 +1100
Subject: swiotlb: using SIZE_MAX needs limits.h included

After merging the drm-misc tree, linux-next build (arm
multi_v7_defconfig) failed like this:

In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26:
include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size':
include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function)
   99 |  return SIZE_MAX;
      |         ^~~~~~~~
include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header '<stdint.h>'; did you forget to '#include <stdint.h>'?
    6 | #include <linux/init.h>
  +++ |+#include <stdint.h>
    7 | #include <linux/types.h>
include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in
   99 |  return SIZE_MAX;
      |         ^~~~~~~~

Caused by commit

  abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()")

but only exposed by commit "drm/nouveu: fix swiotlb include"

Fix it by including linux/limits.h as appropriate.

Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/swiotlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 046bb94bd4d6..fa5122c6711e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -5,6 +5,7 @@
 #include <linux/dma-direction.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/limits.h>
 
 struct device;
 struct page;
-- 
cgit v1.2.3


From 93bd813c17763177cf87e96c2313bd4dd747d234 Mon Sep 17 00:00:00 2001
From: Jack Yu <jack.yu@realtek.com>
Date: Thu, 5 Nov 2020 11:08:04 +0800
Subject: ASoC: rt1015: add delay to fix pop noise from speaker

Add delay to fix pop noise from speaker.

Signed-off-by: Jack Yu <jack.yu@realtek.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20201105030804.31115-1-jack.yu@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/rt1015.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 include/sound/rt1015.h

(limited to 'include')

diff --git a/include/sound/rt1015.h b/include/sound/rt1015.h
new file mode 100644
index 000000000000..70a7538d4c89
--- /dev/null
+++ b/include/sound/rt1015.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/sound/rt1015.h -- Platform data for RT1015
+ *
+ * Copyright 2020 Realtek Microelectronics
+ */
+
+#ifndef __LINUX_SND_RT1015_H
+#define __LINUX_SND_RT1015_H
+
+struct rt1015_platform_data {
+	unsigned int power_up_delay_ms;
+};
+
+#endif
-- 
cgit v1.2.3


From 3084db0e0d5076cd48408274ab0911cd3ccdae88 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Mon, 2 Nov 2020 15:23:04 -0800
Subject: kunit: fix display of failed expectations for strings

Currently the following expectation
  KUNIT_EXPECT_STREQ(test, "hi", "bye");
will produce:
  Expected "hi" == "bye", but
      "hi" == 1625079497
      "bye" == 1625079500

After this patch:
  Expected "hi" == "bye", but
      "hi" == hi
      "bye" == bye

KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() was written but just mistakenly
not actually used by KUNIT_EXPECT_STREQ() and friends.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index db1b0ae666c4..df60be7e22ca 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -1105,7 +1105,7 @@ do {									       \
 	KUNIT_ASSERTION(test,						       \
 			strcmp(__left, __right) op 0,			       \
 			kunit_binary_str_assert,			       \
-			KUNIT_INIT_BINARY_ASSERT_STRUCT(test,		       \
+			KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test,	       \
 							assert_type,	       \
 							#op,		       \
 							#left,		       \
-- 
cgit v1.2.3


From 5e844cc37a5cbaa460e68f9a989d321d63088a89 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Wed, 11 Nov 2020 20:07:10 +0100
Subject: spi: Introduce device-managed SPI controller allocation

SPI driver probing currently comprises two steps, whereas removal
comprises only one step:

    spi_alloc_master()
    spi_register_controller()

    spi_unregister_controller()

That's because spi_unregister_controller() calls device_unregister()
instead of device_del(), thereby releasing the reference on the
spi_controller which was obtained by spi_alloc_master().

An SPI driver's private data is contained in the same memory allocation
as the spi_controller struct.  Thus, once spi_unregister_controller()
has been called, the private data is inaccessible.  But some drivers
need to access it after spi_unregister_controller() to perform further
teardown steps.

Introduce devm_spi_alloc_master() and devm_spi_alloc_slave(), which
release a reference on the spi_controller struct only after the driver
has unbound, thereby keeping the memory allocation accessible.  Change
spi_unregister_controller() to not release a reference if the
spi_controller was allocated by one of these new devm functions.

The present commit is small enough to be backportable to stable.
It allows fixing drivers which use the private data in their ->remove()
hook after it's been freed.  It also allows fixing drivers which neglect
to release a reference on the spi_controller in the probe error path.

Long-term, most SPI drivers shall be moved over to the devm functions
introduced herein.  The few that can't shall be changed in a treewide
commit to explicitly release the last reference on the controller.
That commit shall amend spi_unregister_controller() to no longer release
a reference, thereby completing the migration.

As a result, the behaviour will be less surprising and more consistent
with subsystems such as IIO, which also includes the private data in the
allocation of the generic iio_dev struct, but calls device_del() in
iio_device_unregister().

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/272bae2ef08abd21388c98e23729886663d19192.1605121038.git.lukas@wunner.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 99380c0825db..b390fdac1587 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host,
 	return __spi_alloc_controller(host, size, true);
 }
 
+struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
+						   unsigned int size,
+						   bool slave);
+
+static inline struct spi_controller *devm_spi_alloc_master(struct device *dev,
+							   unsigned int size)
+{
+	return __devm_spi_alloc_controller(dev, size, false);
+}
+
+static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev,
+							  unsigned int size)
+{
+	if (!IS_ENABLED(CONFIG_SPI_SLAVE))
+		return NULL;
+
+	return __devm_spi_alloc_controller(dev, size, true);
+}
+
 extern int spi_register_controller(struct spi_controller *ctlr);
 extern int devm_spi_register_controller(struct device *dev,
 					struct spi_controller *ctlr);
-- 
cgit v1.2.3


From c3213d260a23e263ef85ba21ac68c9e7578020b5 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 12 Nov 2020 15:17:32 -0500
Subject: SUNRPC: Fix oops in the rpc_xdr_buf event class

Backchannel rpc tasks don't have task->tk_client set, so it's necessary
to check it for NULL before dereferencing.

Fixes: c509f15a5801 ("SUNRPC: Split the xdr_buf event class")
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/trace/events/sunrpc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 2477014e3fa6..2a03263b5f9d 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -68,7 +68,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class,
 
 	TP_fast_assign(
 		__entry->task_id = task->tk_pid;
-		__entry->client_id = task->tk_client->cl_clid;
+		__entry->client_id = task->tk_client ?
+				     task->tk_client->cl_clid : -1;
 		__entry->head_base = xdr->head[0].iov_base;
 		__entry->head_len = xdr->head[0].iov_len;
 		__entry->tail_base = xdr->tail[0].iov_base;
-- 
cgit v1.2.3


From 8cf8821e15cd553339a5b48ee555a0439c2b2742 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@akamai.com>
Date: Thu, 12 Nov 2020 20:58:15 -0500
Subject: net: Exempt multicast addresses from five-second neighbor lifetime

Commit 58956317c8de ("neighbor: Improve garbage collection")
guarantees neighbour table entries a five-second lifetime.  Processes
which make heavy use of multicast can fill the neighour table with
multicast addresses in five seconds.  At that point, neighbour entries
can't be GC-ed because they aren't five seconds old yet, the kernel
log starts to fill up with "neighbor table overflow!" messages, and
sends start to fail.

This patch allows multicast addresses to be thrown out before they've
lived out their five seconds.  This makes room for non-multicast
addresses and makes messages to all addresses more reliable in these
circumstances.

Fixes: 58956317c8de ("neighbor: Improve garbage collection")
Signed-off-by: Jeff Dike <jdike@akamai.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20201113015815.31397-1-jdike@akamai.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 81ee17594c32..22ced1381ede 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -204,6 +204,7 @@ struct neigh_table {
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
 	void			(*proxy_redo)(struct sk_buff *skb);
+	int			(*is_multicast)(const void *pkey);
 	bool			(*allow_add)(const struct net_device *dev,
 					     struct netlink_ext_ack *extack);
 	char			*id;
-- 
cgit v1.2.3


From 9c2e14b48119b39446031d29d994044ae958d8fc Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Tue, 10 Nov 2020 16:16:40 -0800
Subject: ip_tunnels: Set tunnel option flag when tunnel metadata is present

Currently, we may set the tunnel option flag when the size of metadata
is zero.  For example, we set TUNNEL_GENEVE_OPT in the receive function
no matter the geneve option is present or not.  As this may result in
issues on the tunnel flags consumers, this patch fixes the issue.

Related discussion:
* https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u

Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags")
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_tunnels.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 02ccd32542d0..61620677b034 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 					   const void *from, int len,
 					   __be16 flags)
 {
-	memcpy(ip_tunnel_info_opts(info), from, len);
 	info->options_len = len;
-	info->key.tun_flags |= flags;
+	if (len > 0) {
+		memcpy(ip_tunnel_info_opts(info), from, len);
+		info->key.tun_flags |= flags;
+	}
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 					   __be16 flags)
 {
 	info->options_len = 0;
-	info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */
-- 
cgit v1.2.3


From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Nov 2020 17:52:58 +0100
Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed

Stefan Agner reported a bug when using zsram on 32-bit Arm machines
with RAM above the 4GB address boundary:

  Unable to handle kernel NULL pointer dereference at virtual address 00000000
  pgd = a27bd01c
  [00000000] *pgd=236a0003, *pmd=1ffa64003
  Internal error: Oops: 207 [#1] SMP ARM
  Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet
  CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1
  Hardware name: BCM2711
  PC is at zs_map_object+0x94/0x338
  LR is at zram_bvec_rw.constprop.0+0x330/0xa64
  pc : [<c0602b38>]    lr : [<c0bda6a0>]    psr: 60000013
  sp : e376bbe0  ip : 00000000  fp : c1e2921c
  r10: 00000002  r9 : c1dda730  r8 : 00000000
  r7 : e8ff7a00  r6 : 00000000  r5 : 02f9ffa0  r4 : e3710000
  r3 : 000fdffe  r2 : c1e0ce80  r1 : ebf979a0  r0 : 00000000
  Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
  Control: 30c5383d  Table: 235c2a80  DAC: fffffffd
  Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6)
  Stack: (0xe376bbe0 to 0xe376c000)

As it turns out, zsram needs to know the maximum memory size, which
is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in
MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture.

The same problem will be hit on all 32-bit architectures that have a
physical address space larger than 4GB and happen to not enable sparsemem
and include asm/sparsemem.h from asm/pgtable.h.

After the initial discussion, I suggested just always defining
MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is
set, or provoking a build error otherwise. This addresses all
configurations that can currently have this runtime bug, but
leaves all other configurations unchanged.

I looked up the possible number of bits in source code and
datasheets, here is what I found:

 - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used
 - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never
   support more than 32 bits, even though supersections in theory allow
   up to 40 bits as well.
 - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5
   XPA supports up to 60 bits in theory, but 40 bits are more than
   anyone will ever ship
 - On PowerPC, there are three different implementations of 36 bit
   addressing, but 32-bit is used without CONFIG_PTE_64BIT
 - On RISC-V, the normal page table format can support 34 bit
   addressing. There is no highmem support on RISC-V, so anything
   above 2GB is unused, but it might be useful to eventually support
   CONFIG_ZRAM for high pages.

Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library")
Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS")
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/pgtable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 71125a4676c4..e237004d498d 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask;
 
 #endif /* !__ASSEMBLY__ */
 
+#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+/*
+ * ZSMALLOC needs to know the highest PFN on 32-bit architectures
+ * with physical address space extension, but falls back to
+ * BITS_PER_LONG otherwise.
+ */
+#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
+#else
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+#endif
+
 #ifndef has_transparent_hugepage
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define has_transparent_hugepage() 1
-- 
cgit v1.2.3


From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 10 Nov 2020 17:29:32 +0800
Subject: PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter

In many case, we need to check return value of pm_runtime_get_sync, but
it brings a trouble to the usage counter processing. Many callers forget
to decrease the usage counter when it failed, which could resulted in
reference leak. It has been discussed a lot[0][1]. So we add a function
to deal with the usage counter for better coding.

[0]https://lkml.org/lkml/2020/6/14/88
[1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Acked-by: Rafael J. Wysocki  <rafael.j.wysocki@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pm_runtime.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 4b708f4e8eed..b492ae00cc90 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev)
 	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
+/**
+ * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
+ * @dev: Target device.
+ *
+ * Resume @dev synchronously and if that is successful, increment its runtime
+ * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been
+ * incremented or a negative error code otherwise.
+ */
+static inline int pm_runtime_resume_and_get(struct device *dev)
+{
+	int ret;
+
+	ret = __pm_runtime_resume(dev, RPM_GET_PUT);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
  * @dev: Target device.
-- 
cgit v1.2.3


From 9d9e937b1c8be97b424e3e11938e183fcde905c0 Mon Sep 17 00:00:00 2001
From: Georg Kohmann <geokohma@cisco.com>
Date: Wed, 11 Nov 2020 12:50:25 +0100
Subject: ipv6/netfilter: Discard first fragment not including all headers

Packets are processed even though the first fragment don't include all
headers through the upper layer header. This breaks TAHI IPv6 Core
Conformance Test v6LC.1.3.6.

Referring to RFC8200 SECTION 4.5: "If the first fragment does not include
all headers through an Upper-Layer header, then that fragment should be
discarded and an ICMP Parameter Problem, Code 3, message should be sent to
the source of the fragment, with the Pointer field set to zero."

The fragment needs to be validated the same way it is done in
commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't
include all headers") for ipv6. Wrap the validation into a common function,
ipv6_frag_thdr_truncated() to check for truncation in the upper layer
header. This validation does not fullfill all aspects of RFC 8200,
section 4.5, but is at the moment sufficient to pass mentioned TAHI test.

In netfilter, utilize the fragment offset returned by find_prev_fhdr() to
let ipv6_frag_thdr_truncated() start it's traverse from the fragment
header.

Return 0 to drop the fragment in the netfilter. This is the same behaviour
as used on other protocol errors in this function, e.g. when
nf_ct_frag6_queue() returns -EPROTO. The Fragment will later be picked up
by ipv6_frag_rcv() in reassembly.c. ipv6_frag_rcv() will then send an
appropriate ICMP Parameter Problem message back to the source.

References commit 2efdaaaf883a ("IPv6: reply ICMP error if the first
fragment don't include all headers")

Signed-off-by: Georg Kohmann <geokohma@cisco.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/r/20201111115025.28879-1-geokohma@cisco.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index bd1f396cc9c7..637cc6dd12b7 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1064,6 +1064,8 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
 
 bool ipv6_ext_hdr(u8 nexthdr);
 
+bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp);
+
 enum {
 	IP6_FH_F_FRAG		= (1 << 0),
 	IP6_FH_F_AUTH		= (1 << 1),
-- 
cgit v1.2.3


From fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31 Mon Sep 17 00:00:00 2001
From: Lee Duncan <lduncan@suse.com>
Date: Fri, 6 Nov 2020 11:33:17 -0800
Subject: scsi: libiscsi: Fix NOP race condition

iSCSI NOPs are sometimes "lost", mistakenly sent to the user-land iscsid
daemon instead of handled in the kernel, as they should be, resulting in a
message from the daemon like:

  iscsid: Got nop in, but kernel supports nop handling.

This can occur because of the new forward- and back-locks, and the fact
that an iSCSI NOP response can occur before processing of the NOP send is
complete. This can result in "conn->ping_task" being NULL in
iscsi_nop_out_rsp(), when the pointer is actually in the process of being
set.

To work around this, we add a new state to the "ping_task" pointer. In
addition to NULL (not assigned) and a pointer (assigned), we add the state
"being set", which is signaled with an INVALID pointer (using "-1").

Link: https://lore.kernel.org/r/20201106193317.16993-1-leeman.duncan@gmail.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libiscsi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index c25fb86ffae9..b3bbd10eb3f0 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -132,6 +132,9 @@ struct iscsi_task {
 	void			*dd_data;	/* driver/transport data */
 };
 
+/* invalid scsi_task pointer */
+#define	INVALID_SCSI_TASK	(struct iscsi_task *)-1l
+
 static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)
 {
 	return task->unsol_r2t.data_length > task->unsol_r2t.sent;
-- 
cgit v1.2.3


From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 09:08:41 +0100
Subject: sched: Fix data-race in wakeup

Mel reported that on some ARM64 platforms loadavg goes bananas and
Will tracked it down to the following race:

  CPU0					CPU1

  schedule()
    prev->sched_contributes_to_load = X;
    deactivate_task(prev);

					try_to_wake_up()
					  if (p->on_rq &&) // false
					  if (smp_load_acquire(&p->on_cpu) && // true
					      ttwu_queue_wakelist())
					        p->sched_remote_wakeup = Y;

    smp_store_release(prev->on_cpu, 0);

where both p->sched_contributes_to_load and p->sched_remote_wakeup are
in the same word, and thus the stores X and Y race (and can clobber
one another's data).

Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu()
spinning on p->on_cpu") the p->on_cpu handoff serialized access to
p->sched_remote_wakeup (just as it still does with
p->sched_contributes_to_load) that commit broke that by calling
ttwu_queue_wakelist() with p->on_cpu != 0.

However, due to

  p->XXX = X			ttwu()
  schedule()			  if (p->on_rq && ...) // false
    smp_mb__after_spinlock()	  if (smp_load_acquire(&p->on_cpu) &&
    deactivate_task()		      ttwu_queue_wakelist())
      p->on_rq = 0;		        p->sched_remote_wakeup = Y;

We can be sure any 'current' store is complete and 'current' is
guaranteed asleep. Therefore we can move p->sched_remote_wakeup into
the current flags word.

Note: while the observed failure was loadavg accounting gone wrong due
to ttwu() cobbering p->sched_contributes_to_load, the reverse problem
is also possible where schedule() clobbers p->sched_remote_wakeup,
this could result in enqueue_entity() wrecking ->vruntime and causing
scheduling artifacts.

Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Debugged-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net
---
 include/linux/sched.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d383cf09e78f..0e91b451d2a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -769,7 +769,6 @@ struct task_struct {
 	unsigned			sched_reset_on_fork:1;
 	unsigned			sched_contributes_to_load:1;
 	unsigned			sched_migrated:1;
-	unsigned			sched_remote_wakeup:1;
 #ifdef CONFIG_PSI
 	unsigned			sched_psi_wake_requeue:1;
 #endif
@@ -779,6 +778,21 @@ struct task_struct {
 
 	/* Unserialized, strictly 'current' */
 
+	/*
+	 * This field must not be in the scheduler word above due to wakelist
+	 * queueing no longer being serialized by p->on_cpu. However:
+	 *
+	 * p->XXX = X;			ttwu()
+	 * schedule()			  if (p->on_rq && ..) // false
+	 *   smp_mb__after_spinlock();	  if (smp_load_acquire(&p->on_cpu) && //true
+	 *   deactivate_task()		      ttwu_queue_wakelist())
+	 *     p->on_rq = 0;			p->sched_remote_wakeup = Y;
+	 *
+	 * guarantees all stores of 'current' are visible before
+	 * ->sched_remote_wakeup gets used, so it can be in this word.
+	 */
+	unsigned			sched_remote_wakeup:1;
+
 	/* Bit to tell LSMs we're in execve(): */
 	unsigned			in_execve:1;
 	unsigned			in_iowait:1;
-- 
cgit v1.2.3


From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Tue, 17 Nov 2020 07:14:32 +0100
Subject: sched/deadline: Fix priority inheritance with multiple scheduling
 classes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Glenn reported that "an application [he developed produces] a BUG in
deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested
PTHREAD_PRIO_INHERIT mutexes.  I believe the bug is triggered when a CFS
task that was boosted by a SCHED_DEADLINE task boosts another CFS task
(nested priority inheritance).

 ------------[ cut here ]------------
 kernel BUG at kernel/sched/deadline.c:1462!
 invalid opcode: 0000 [#1] PREEMPT SMP
 CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ...
 Hardware name: ...
 RIP: 0010:enqueue_task_dl+0x335/0x910
 Code: ...
 RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002
 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500
 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600
 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078
 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600
 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600
 FS:  00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 PKRU: 55555554
 Call Trace:
  ? intel_pstate_update_util_hwp+0x13/0x170
  rt_mutex_setprio+0x1cc/0x4b0
  task_blocks_on_rt_mutex+0x225/0x260
  rt_spin_lock_slowlock_locked+0xab/0x2d0
  rt_spin_lock_slowlock+0x50/0x80
  hrtimer_grab_expiry_lock+0x20/0x30
  hrtimer_cancel+0x13/0x30
  do_nanosleep+0xa0/0x150
  hrtimer_nanosleep+0xe1/0x230
  ? __hrtimer_init_sleeper+0x60/0x60
  __x64_sys_nanosleep+0x8d/0xa0
  do_syscall_64+0x4a/0x100
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 RIP: 0033:0x7fa58b52330d
 ...
 ---[ end trace 0000000000000002 ]—

He also provided a simple reproducer creating the situation below:

 So the execution order of locking steps are the following
 (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2
 are mutexes that are enabled * with priority inheritance.)

 Time moves forward as this timeline goes down:

 N1              N2               D1
 |               |                |
 |               |                |
 Lock(M1)        |                |
 |               |                |
 |             Lock(M2)           |
 |               |                |
 |               |              Lock(M2)
 |               |                |
 |             Lock(M1)           |
 |             (!!bug triggered!) |

Daniel reported a similar situation as well, by just letting ksoftirqd
run with DEADLINE (and eventually block on a mutex).

Problem is that boosted entities (Priority Inheritance) use static
DEADLINE parameters of the top priority waiter. However, there might be
cases where top waiter could be a non-DEADLINE entity that is currently
boosted by a DEADLINE entity from a different lock chain (i.e., nested
priority chains involving entities of non-DEADLINE classes). In this
case, top waiter static DEADLINE parameters could be null (initialized
to 0 at fork()) and replenish_dl_entity() would hit a BUG().

Fix this by keeping track of the original donor and using its parameters
when a task is boosted.

Reported-by: Glenn Elliott <glenn@aurora.tech>
Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com
---
 include/linux/sched.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0e91b451d2a2..095fdec07b38 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -551,7 +551,6 @@ struct sched_dl_entity {
 	 * overruns.
 	 */
 	unsigned int			dl_throttled      : 1;
-	unsigned int			dl_boosted        : 1;
 	unsigned int			dl_yielded        : 1;
 	unsigned int			dl_non_contending : 1;
 	unsigned int			dl_overrun	  : 1;
@@ -570,6 +569,15 @@ struct sched_dl_entity {
 	 * time.
 	 */
 	struct hrtimer inactive_timer;
+
+#ifdef CONFIG_RT_MUTEXES
+	/*
+	 * Priority Inheritance. When a DEADLINE scheduling entity is boosted
+	 * pi_se points to the donor, otherwise points to the dl_se it belongs
+	 * to (the original one/itself).
+	 */
+	struct sched_dl_entity *pi_se;
+#endif
 };
 
 #ifdef CONFIG_UCLAMP_TASK
-- 
cgit v1.2.3


From 138559b9f99d3b6b1d5e75c78facc067a23871c6 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Sun, 15 Nov 2020 15:14:48 +0200
Subject: net/tls: Fix wrong record sn in async mode of device resync

In async_resync mode, we log the TCP seq of records until the async request
is completed.  Later, in case one of the logged seqs matches the resync
request, we return it, together with its record serial number.  Before this
fix, we mistakenly returned the serial number of the current record
instead.

Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Boris Pismenny <borisp@nvidia.com>
Link: https://lore.kernel.org/r/20201115131448.2702-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index baf1e99d8193..cf1473099453 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -300,7 +300,8 @@ enum tls_offload_sync_type {
 #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX		13
 struct tls_offload_resync_async {
 	atomic64_t req;
-	u32 loglen;
+	u16 loglen;
+	u16 rcd_delta;
 	u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
 };
 
@@ -471,6 +472,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 	return (i == -1);
 }
 
+static inline void tls_bigint_subtract(unsigned char *seq, int  n)
+{
+	u64 rcd_sn;
+	__be64 *p;
+
+	BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8);
+
+	p = (__be64 *)seq;
+	rcd_sn = be64_to_cpu(*p);
+	*p = cpu_to_be64(rcd_sn - n);
+}
+
 static inline struct tls_context *tls_get_ctx(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -639,6 +652,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
 	atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
 		     ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
 	rx_ctx->resync_async->loglen = 0;
+	rx_ctx->resync_async->rcd_delta = 0;
 }
 
 static inline void
-- 
cgit v1.2.3


From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@gmail.com>
Date: Tue, 10 Nov 2020 15:19:08 +0800
Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system

"intel_iommu=off" command line is used to disable iommu but iommu is force
enabled in a tboot system for security reason.

However for better performance on high speed network device, a new option
"intel_iommu=tboot_noforce" is introduced to disable the force on.

By default kernel should panic if iommu init fail in tboot for security
reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off".

Fix the code setting force_on and move intel_iommu_tboot_noforce
from tboot code to intel iommu code.

Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@gmail.com>
Tested-by: Lukasz Hawrylko <lukasz.hawrylko@linux.intel.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/intel-iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fbf5b3e7707e..d956987ed032 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
 extern int intel_iommu_enabled;
-extern int intel_iommu_tboot_noforce;
 extern int intel_iommu_gfx_mapped;
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
-- 
cgit v1.2.3


From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:40 +0900
Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4

Align the bootconfig applied initrd image size to 4. To fill the gap,
the bootconfig command uses null characters in between the bootconfig
data and the footer. This will expands the footer size but don't change
the checksum.
Thus the block image of the initrd file with bootconfig is as follows.

[initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"]

Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 9903088891fa..2696eb0fc149 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -12,6 +12,9 @@
 
 #define BOOTCONFIG_MAGIC	"#BOOTCONFIG\n"
 #define BOOTCONFIG_MAGIC_LEN	12
+#define BOOTCONFIG_ALIGN_SHIFT	2
+#define BOOTCONFIG_ALIGN	(1 << BOOTCONFIG_ALIGN_SHIFT)
+#define BOOTCONFIG_ALIGN_MASK	(BOOTCONFIG_ALIGN - 1)
 
 /* XBC tree node */
 struct xbc_node {
-- 
cgit v1.2.3


From 2d8f6481c17db9fa5238b277cdbc392084060b09 Mon Sep 17 00:00:00 2001
From: Georg Kohmann <geokohma@cisco.com>
Date: Thu, 19 Nov 2020 10:58:33 +0100
Subject: ipv6: Remove dependency of ipv6_frag_thdr_truncated on ipv6 module

IPV6=m
NF_DEFRAG_IPV6=y

ld: net/ipv6/netfilter/nf_conntrack_reasm.o: in function
`nf_ct_frag6_gather':
net/ipv6/netfilter/nf_conntrack_reasm.c:462: undefined reference to
`ipv6_frag_thdr_truncated'

Netfilter is depending on ipv6 symbol ipv6_frag_thdr_truncated. This
dependency is forcing IPV6=y.

Remove this dependency by moving ipv6_frag_thdr_truncated out of ipv6. This
is the same solution as used with a similar issues: Referring to
commit 70b095c843266 ("ipv6: remove dependency of nf_defrag_ipv6 on ipv6
module")

Fixes: 9d9e937b1c8b ("ipv6/netfilter: Discard first fragment not including all headers")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Georg Kohmann <geokohma@cisco.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Link: https://lore.kernel.org/r/20201119095833.8409-1-geokohma@cisco.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h      |  2 --
 include/net/ipv6_frag.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 637cc6dd12b7..bd1f396cc9c7 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1064,8 +1064,6 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
 
 bool ipv6_ext_hdr(u8 nexthdr);
 
-bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp);
-
 enum {
 	IP6_FH_F_FRAG		= (1 << 0),
 	IP6_FH_F_AUTH		= (1 << 1),
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index a21e8b1381a1..851029ecff13 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -108,5 +108,35 @@ out_rcu_unlock:
 	rcu_read_unlock();
 	inet_frag_put(&fq->q);
 }
+
+/* Check if the upper layer header is truncated in the first fragment. */
+static inline bool
+ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
+{
+	u8 nexthdr = *nexthdrp;
+	__be16 frag_off;
+	int offset;
+
+	offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
+	if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
+		return false;
+	switch (nexthdr) {
+	case NEXTHDR_TCP:
+		offset += sizeof(struct tcphdr);
+		break;
+	case NEXTHDR_UDP:
+		offset += sizeof(struct udphdr);
+		break;
+	case NEXTHDR_ICMP:
+		offset += sizeof(struct icmp6hdr);
+		break;
+	default:
+		offset += 1;
+	}
+	if (offset > skb->len)
+		return true;
+	return false;
+}
+
 #endif
 #endif
-- 
cgit v1.2.3


From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 16 Nov 2020 11:18:08 +0100
Subject: jbd2: fix kernel-doc markups

Kernel-doc markup should use this format:
        identifier - description

They should not have any type before that, as otherwise
the parser won't do the right thing.

Also, some identifiers have different names between their
prototypes and the kernel-doc markup.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1c49fd62ff2e..578ff196b3ce 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
 #define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
 
 /**
- * struct jbd_inode - The jbd_inode type is the structure linking inodes in
+ * struct jbd2_inode - The jbd_inode type is the structure linking inodes in
  * ordered mode present in a transaction so that we can sync them during commit.
  */
 struct jbd2_inode {
-- 
cgit v1.2.3


From 537cf4e3cc2f6cc9088dcd6162de573f603adc29 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 20 Nov 2020 12:53:39 +0100
Subject: xsk: Fix umem cleanup bug at socket destruct

Fix a bug that is triggered when a partially setup socket is
destroyed. For a fully setup socket, a socket that has been bound to a
device, the cleanup of the umem is performed at the end of the buffer
pool's cleanup work queue item. This has to be performed in a work
queue, and not in RCU cleanup, as it is doing a vunmap that cannot
execute in interrupt context. However, when a socket has only been
partially set up so that a umem has been created but the buffer pool
has not, the code erroneously directly calls the umem cleanup function
instead of using a work queue, and this leads to a BUG_ON() in
vunmap().

As there in this case is no buffer pool, we cannot use its work queue,
so we need to introduce a work queue for the umem and schedule this for
the cleanup. So in the case there is no pool, we are going to use the
umem's own work queue to schedule the cleanup. But if there is a
pool, the cleanup of the umem is still being performed by the pool's
work queue, as it is important that the umem is cleaned up after the
pool.

Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close")
Reported-by: Marek Majtyka <marekx.majtyka@intel.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Marek Majtyka <marekx.majtyka@intel.com>
Link: https://lore.kernel.org/bpf/1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com
---
 include/net/xdp_sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1a9559c0cbdd..4f4e93bf814c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -31,6 +31,7 @@ struct xdp_umem {
 	struct page **pgs;
 	int id;
 	struct list_head xsk_dma_list;
+	struct work_struct work;
 };
 
 struct xsk_map {
-- 
cgit v1.2.3


From b9ad3e9f5a7a760ab068e33e1f18d240ba32ce92 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@nuviainc.com>
Date: Fri, 20 Nov 2020 14:28:27 +0000
Subject: bonding: wait for sysfs kobject destruction before freeing struct
 slave

syzkaller found that with CONFIG_DEBUG_KOBJECT_RELEASE=y, releasing a
struct slave device could result in the following splat:

  kobject: 'bonding_slave' (00000000cecdd4fe): kobject_release, parent 0000000074ceb2b2 (delayed 1000)
  bond0 (unregistering): (slave bond_slave_1): Releasing backup interface
  ------------[ cut here ]------------
  ODEBUG: free active (active state 0) object type: timer_list hint: workqueue_select_cpu_near kernel/workqueue.c:1549 [inline]
  ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x98 kernel/workqueue.c:1600
  WARNING: CPU: 1 PID: 842 at lib/debugobjects.c:485 debug_print_object+0x180/0x240 lib/debugobjects.c:485
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 1 PID: 842 Comm: kworker/u4:4 Tainted: G S                5.9.0-rc8+ #96
  Hardware name: linux,dummy-virt (DT)
  Workqueue: netns cleanup_net
  Call trace:
   dump_backtrace+0x0/0x4d8 include/linux/bitmap.h:239
   show_stack+0x34/0x48 arch/arm64/kernel/traps.c:142
   __dump_stack lib/dump_stack.c:77 [inline]
   dump_stack+0x174/0x1f8 lib/dump_stack.c:118
   panic+0x360/0x7a0 kernel/panic.c:231
   __warn+0x244/0x2ec kernel/panic.c:600
   report_bug+0x240/0x398 lib/bug.c:198
   bug_handler+0x50/0xc0 arch/arm64/kernel/traps.c:974
   call_break_hook+0x160/0x1d8 arch/arm64/kernel/debug-monitors.c:322
   brk_handler+0x30/0xc0 arch/arm64/kernel/debug-monitors.c:329
   do_debug_exception+0x184/0x340 arch/arm64/mm/fault.c:864
   el1_dbg+0x48/0xb0 arch/arm64/kernel/entry-common.c:65
   el1_sync_handler+0x170/0x1c8 arch/arm64/kernel/entry-common.c:93
   el1_sync+0x80/0x100 arch/arm64/kernel/entry.S:594
   debug_print_object+0x180/0x240 lib/debugobjects.c:485
   __debug_check_no_obj_freed lib/debugobjects.c:967 [inline]
   debug_check_no_obj_freed+0x200/0x430 lib/debugobjects.c:998
   slab_free_hook mm/slub.c:1536 [inline]
   slab_free_freelist_hook+0x190/0x210 mm/slub.c:1577
   slab_free mm/slub.c:3138 [inline]
   kfree+0x13c/0x460 mm/slub.c:4119
   bond_free_slave+0x8c/0xf8 drivers/net/bonding/bond_main.c:1492
   __bond_release_one+0xe0c/0xec8 drivers/net/bonding/bond_main.c:2190
   bond_slave_netdev_event drivers/net/bonding/bond_main.c:3309 [inline]
   bond_netdev_event+0x8f0/0xa70 drivers/net/bonding/bond_main.c:3420
   notifier_call_chain+0xf0/0x200 kernel/notifier.c:83
   __raw_notifier_call_chain kernel/notifier.c:361 [inline]
   raw_notifier_call_chain+0x44/0x58 kernel/notifier.c:368
   call_netdevice_notifiers_info+0xbc/0x150 net/core/dev.c:2033
   call_netdevice_notifiers_extack net/core/dev.c:2045 [inline]
   call_netdevice_notifiers net/core/dev.c:2059 [inline]
   rollback_registered_many+0x6a4/0xec0 net/core/dev.c:9347
   unregister_netdevice_many.part.0+0x2c/0x1c0 net/core/dev.c:10509
   unregister_netdevice_many net/core/dev.c:10508 [inline]
   default_device_exit_batch+0x294/0x338 net/core/dev.c:10992
   ops_exit_list.isra.0+0xec/0x150 net/core/net_namespace.c:189
   cleanup_net+0x44c/0x888 net/core/net_namespace.c:603
   process_one_work+0x96c/0x18c0 kernel/workqueue.c:2269
   worker_thread+0x3f0/0xc30 kernel/workqueue.c:2415
   kthread+0x390/0x498 kernel/kthread.c:292
   ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:925

This is a potential use-after-free if the sysfs nodes are being accessed
whilst removing the struct slave, so wait for the object destruction to
complete before freeing the struct slave itself.

Fixes: 07699f9a7c8d ("bonding: add sysfs /slave dir for bond slave devices.")
Fixes: a068aab42258 ("bonding: Fix reference count leak in bond_sysfs_slave_add.")
Cc: Qiushi Wu <wu000273@umn.edu>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20201120142827.879226-1-jamie@nuviainc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/bonding.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index 7d132cc1e584..d9d0ff3b0ad3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -185,6 +185,11 @@ struct slave {
 	struct rtnl_link_stats64 slave_stats;
 };
 
+static inline struct slave *to_slave(struct kobject *kobj)
+{
+	return container_of(kobj, struct slave, kobj);
+}
+
 struct bond_up_slave {
 	unsigned int	count;
 	struct rcu_head rcu;
@@ -750,6 +755,9 @@ extern struct bond_parm_tbl ad_select_tbl[];
 /* exported from bond_netlink.c */
 extern struct rtnl_link_ops bond_link_ops;
 
+/* exported from bond_sysfs_slave.c */
+extern const struct sysfs_ops slave_sysfs_ops;
+
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
 	atomic_long_inc(&dev->tx_dropped);
-- 
cgit v1.2.3


From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Sat, 21 Nov 2020 22:17:01 -0800
Subject: compiler-clang: remove version check for BPF Tracing

bpftrace parses the kernel headers and uses Clang under the hood.

Remove the version check when __BPF_TRACING__ is defined (as bpftrace
does) so that this tool can continue to parse kernel headers, even with
older clang sources.

Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1")
Reported-by: Chen Yu <yu.chen.surf@gmail.com>
Reported-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index dd7233c48bf3..98cff1b4b088 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -8,8 +8,10 @@
 		     + __clang_patchlevel__)
 
 #if CLANG_VERSION < 100001
+#ifndef __BPF_TRACING__
 # error Sorry, your version of Clang is too old - please use 10.0.1 or newer.
 #endif
+#endif
 
 /* Compiler specific definitions for Clang compiler */
 
-- 
cgit v1.2.3


From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 21 Nov 2020 22:17:05 -0800
Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid()
 exports

The core-mm has a default __weak implementation of phys_to_target_node()
to mirror the weak definition of memory_add_physaddr_to_nid().  That
symbol is exported for modules.  However, while the export in
mm/memory_hotplug.c exported the symbol in the configuration cases of:

	CONFIG_NUMA_KEEP_MEMINFO=y
	CONFIG_MEMORY_HOTPLUG=y

...and:

	CONFIG_NUMA_KEEP_MEMINFO=n
	CONFIG_MEMORY_HOTPLUG=y

...it failed to export the symbol in the case of:

	CONFIG_NUMA_KEEP_MEMINFO=y
	CONFIG_MEMORY_HOTPLUG=n

Not only is that broken, but Christoph points out that the kernel should
not be exporting any __weak symbol, which means that
memory_add_physaddr_to_nid() example that phys_to_target_node() copied
is broken too.

Rework the definition of phys_to_target_node() and
memory_add_physaddr_to_nid() to not require weak symbols.  Move to the
common arch override design-pattern of an asm header defining a symbol
to replace the default implementation.

The only common header that all memory_add_physaddr_to_nid() producing
architectures implement is asm/sparsemem.h.  In fact, powerpc already
defines its memory_add_physaddr_to_nid() helper in sparsemem.h.
Double-down on that observation and define phys_to_target_node() where
necessary in asm/sparsemem.h.  An alternate consideration that was
discarded was to put this override in asm/numa.h, but that entangles
with the definition of MAX_NUMNODES relative to the inclusion of
linux/nodemask.h, and requires powerpc to grow a new header.

The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid
now that the symbol is properly exported / stubbed in all combinations
of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG.

[dan.j.williams@intel.com: v4]
  Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com
[dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning]
  Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com

Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 14 --------------
 include/linux/numa.h           | 30 +++++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d65c6fdc5cfc..551093b74596 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-#ifdef CONFIG_NUMA
-extern int memory_add_physaddr_to_nid(u64 start);
-extern int phys_to_target_node(u64 start);
-#else
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-static inline int phys_to_target_node(u64 start)
-{
-	return 0;
-}
-#endif
-
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 /*
  * pgdat resizing functions
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 8cb33ccfb671..cb44cfe2b725 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -21,13 +21,41 @@
 #endif
 
 #ifdef CONFIG_NUMA
+#include <linux/printk.h>
+#include <asm/sparsemem.h>
+
 /* Generic implementation available */
 int numa_map_to_online_node(int node);
-#else
+
+#ifndef memory_add_physaddr_to_nid
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
+#ifndef phys_to_target_node
+static inline int phys_to_target_node(u64 start)
+{
+	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
+#else /* !CONFIG_NUMA */
 static inline int numa_map_to_online_node(int node)
 {
 	return NUMA_NO_NODE;
 }
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+	return 0;
+}
+static inline int phys_to_target_node(u64 start)
+{
+	return 0;
+}
 #endif
 
 #endif /* _LINUX_NUMA_H */
-- 
cgit v1.2.3


From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 21 Nov 2020 22:17:08 -0800
Subject: mm: fix readahead_page_batch for retry entries

Both btrfs and fuse have reported faults caused by seeing a retry entry
instead of the page they were looking for.  This was caused by a missing
check in the iterator.

As can be seen in the below panic log, the accessing 0x402 causes a
panic.  In the xarray.h, 0x402 means RETRY_ENTRY.

  BUG: kernel NULL pointer dereference, address: 0000000000000402
  CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1
  Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020
  RIP: 0010:fuse_readahead+0x152/0x470 [fuse]
  Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01
  RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246
  RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002
  RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60
  RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0
  R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68
  FS:  00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0
  Call Trace:
    read_pages+0x83/0x270
    page_cache_readahead_unbounded+0x197/0x230
    generic_file_buffered_read+0x57a/0xa20
    new_sync_read+0x112/0x1a0
    vfs_read+0xf8/0x180
    ksys_read+0x5f/0xe0
    do_syscall_64+0x33/0x80
    entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 042124cc64c3 ("mm: add new readahead_control API")
Reported-by: David Sterba <dsterba@suse.com>
Reported-by: Wonhyuk Yang <vvghjk1234@gmail.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1e19c1f9ec9..d5570deff400 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
 	xas_set(&xas, rac->_index);
 	rcu_read_lock();
 	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
+		if (xas_retry(&xas, page))
+			continue;
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 		VM_BUG_ON_PAGE(PageTail(page), page);
 		array[i++] = page;
-- 
cgit v1.2.3


From 01770a166165738a6e05c3d911fb4609cc4eb416 Mon Sep 17 00:00:00 2001
From: Ricardo Dias <rdias@singlestore.com>
Date: Fri, 20 Nov 2020 11:11:33 +0000
Subject: tcp: fix race condition when creating child sockets from syncookies

When the TCP stack is in SYN flood mode, the server child socket is
created from the SYN cookie received in a TCP packet with the ACK flag
set.

The child socket is created when the server receives the first TCP
packet with a valid SYN cookie from the client. Usually, this packet
corresponds to the final step of the TCP 3-way handshake, the ACK
packet. But is also possible to receive a valid SYN cookie from the
first TCP data packet sent by the client, and thus create a child socket
from that SYN cookie.

Since a client socket is ready to send data as soon as it receives the
SYN+ACK packet from the server, the client can send the ACK packet (sent
by the TCP stack code), and the first data packet (sent by the userspace
program) almost at the same time, and thus the server will equally
receive the two TCP packets with valid SYN cookies almost at the same
instant.

When such event happens, the TCP stack code has a race condition that
occurs between the momement a lookup is done to the established
connections hashtable to check for the existence of a connection for the
same client, and the moment that the child socket is added to the
established connections hashtable. As a consequence, this race condition
can lead to a situation where we add two child sockets to the
established connections hashtable and deliver two sockets to the
userspace program to the same client.

This patch fixes the race condition by checking if an existing child
socket exists for the same client when we are adding the second child
socket to the established connections socket. If an existing child
socket exists, we drop the packet and discard the second child socket
to the same client.

Signed-off-by: Ricardo Dias <rdias@singlestore.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_hashtables.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 92560974ea67..ca6a3ea9057e 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
 			 unsigned long high_limit);
 int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
 
-bool inet_ehash_insert(struct sock *sk, struct sock *osk);
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
+			 bool *found_dup_sk);
 int __inet_hash(struct sock *sk, struct sock *osk);
 int inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
-- 
cgit v1.2.3


From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sat, 21 Nov 2020 08:28:17 +0200
Subject: net/packet: fix packet receive on L3 devices without visible hard
 header

In the patchset merged by commit b9fcf0a0d826
("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which
did not have header_ops were given one for the purpose of protocol parsing
on af_packet transmit path.

That change made af_packet receive path regard these devices as having a
visible L3 header and therefore aligned incoming skb->data to point to the
skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not
reset their mac_header prior to ingress and therefore their incoming
packets became malformed.

Ideally these devices would reset their mac headers, or af_packet would be
able to rely on dev->hard_header_len being 0 for such cases, but it seems
this is not the case.

Fix by changing af_packet RX ll visibility criteria to include the
existence of a '.create()' header operation, which is used when creating
a device hard header - via dev_hard_header() - by upper layers, and does
not exist in these L3 devices.

As this predicate may be useful in other situations, add it as a common
dev_has_header() helper in netdevice.h.

Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 964b494b0e8d..fa275a054f46 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev,
 	return false;
 }
 
+static inline bool dev_has_header(const struct net_device *dev)
+{
+	return dev->header_ops && dev->header_ops->create;
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
 			   int len, int size);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
-- 
cgit v1.2.3


From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amit.sunil.dhamne@xilinx.com>
Date: Mon, 23 Nov 2020 21:52:41 -0800
Subject: firmware: xilinx: Use hash-table for api feature check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bi