From e275d2109cdaea8b4554b9eb8a828bdb8f8ba068 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: bus: ti-sysc: Fix reset status check for modules with quirks Commit d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") started showing a "OCP softreset timed out" warning on enable if the interconnect target module is not out of reset. This caused the warning to be often triggered for i2c and hdq while the devices are working properly. Turns out that some interconnect target modules seem to have an unusable reset status bits unless the module specific reset quirks are activated. Let's just skip the reset status check for those modules as we only want to activate the reset quirks when doing a reset, and not on enable. This way we don't see the bogus "OCP softreset timed out" warnings during boot. Fixes: d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index c59999ce044e..240dce553a0b 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) -- cgit v1.2.3 From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Nov 2020 12:43:27 +1100 Subject: swiotlb: using SIZE_MAX needs limits.h included After merging the drm-misc tree, linux-next build (arm multi_v7_defconfig) failed like this: In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26: include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size': include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function) 99 | return SIZE_MAX; | ^~~~~~~~ include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header ''; did you forget to '#include '? 6 | #include +++ |+#include 7 | #include include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in 99 | return SIZE_MAX; | ^~~~~~~~ Caused by commit abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") but only exposed by commit "drm/nouveu: fix swiotlb include" Fix it by including linux/limits.h as appropriate. Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au Signed-off-by: Michael S. Tsirkin --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..fa5122c6711e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -5,6 +5,7 @@ #include #include #include +#include struct device; struct page; -- cgit v1.2.3 From 93bd813c17763177cf87e96c2313bd4dd747d234 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Thu, 5 Nov 2020 11:08:04 +0800 Subject: ASoC: rt1015: add delay to fix pop noise from speaker Add delay to fix pop noise from speaker. Signed-off-by: Jack Yu Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20201105030804.31115-1-jack.yu@realtek.com Signed-off-by: Mark Brown --- include/sound/rt1015.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/sound/rt1015.h (limited to 'include') diff --git a/include/sound/rt1015.h b/include/sound/rt1015.h new file mode 100644 index 000000000000..70a7538d4c89 --- /dev/null +++ b/include/sound/rt1015.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/sound/rt1015.h -- Platform data for RT1015 + * + * Copyright 2020 Realtek Microelectronics + */ + +#ifndef __LINUX_SND_RT1015_H +#define __LINUX_SND_RT1015_H + +struct rt1015_platform_data { + unsigned int power_up_delay_ms; +}; + +#endif -- cgit v1.2.3 From 3084db0e0d5076cd48408274ab0911cd3ccdae88 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 2 Nov 2020 15:23:04 -0800 Subject: kunit: fix display of failed expectations for strings Currently the following expectation KUNIT_EXPECT_STREQ(test, "hi", "bye"); will produce: Expected "hi" == "bye", but "hi" == 1625079497 "bye" == 1625079500 After this patch: Expected "hi" == "bye", but "hi" == hi "bye" == bye KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() was written but just mistakenly not actually used by KUNIT_EXPECT_STREQ() and friends. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index db1b0ae666c4..df60be7e22ca 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1105,7 +1105,7 @@ do { \ KUNIT_ASSERTION(test, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \ + KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ assert_type, \ #op, \ #left, \ -- cgit v1.2.3 From 5e844cc37a5cbaa460e68f9a989d321d63088a89 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:10 +0100 Subject: spi: Introduce device-managed SPI controller allocation SPI driver probing currently comprises two steps, whereas removal comprises only one step: spi_alloc_master() spi_register_controller() spi_unregister_controller() That's because spi_unregister_controller() calls device_unregister() instead of device_del(), thereby releasing the reference on the spi_controller which was obtained by spi_alloc_master(). An SPI driver's private data is contained in the same memory allocation as the spi_controller struct. Thus, once spi_unregister_controller() has been called, the private data is inaccessible. But some drivers need to access it after spi_unregister_controller() to perform further teardown steps. Introduce devm_spi_alloc_master() and devm_spi_alloc_slave(), which release a reference on the spi_controller struct only after the driver has unbound, thereby keeping the memory allocation accessible. Change spi_unregister_controller() to not release a reference if the spi_controller was allocated by one of these new devm functions. The present commit is small enough to be backportable to stable. It allows fixing drivers which use the private data in their ->remove() hook after it's been freed. It also allows fixing drivers which neglect to release a reference on the spi_controller in the probe error path. Long-term, most SPI drivers shall be moved over to the devm functions introduced herein. The few that can't shall be changed in a treewide commit to explicitly release the last reference on the controller. That commit shall amend spi_unregister_controller() to no longer release a reference, thereby completing the migration. As a result, the behaviour will be less surprising and more consistent with subsystems such as IIO, which also includes the private data in the allocation of the generic iio_dev struct, but calls device_del() in iio_device_unregister(). Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/272bae2ef08abd21388c98e23729886663d19192.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..b390fdac1587 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +struct spi_controller *__devm_spi_alloc_controller(struct device *dev, + unsigned int size, + bool slave); + +static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); -- cgit v1.2.3 From c3213d260a23e263ef85ba21ac68c9e7578020b5 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 12 Nov 2020 15:17:32 -0500 Subject: SUNRPC: Fix oops in the rpc_xdr_buf event class Backchannel rpc tasks don't have task->tk_client set, so it's necessary to check it for NULL before dereferencing. Fixes: c509f15a5801 ("SUNRPC: Split the xdr_buf event class") Signed-off-by: Scott Mayhew Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2477014e3fa6..2a03263b5f9d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -68,7 +68,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, TP_fast_assign( __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; -- cgit v1.2.3 From 8cf8821e15cd553339a5b48ee555a0439c2b2742 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 12 Nov 2020 20:58:15 -0500 Subject: net: Exempt multicast addresses from five-second neighbor lifetime Commit 58956317c8de ("neighbor: Improve garbage collection") guarantees neighbour table entries a five-second lifetime. Processes which make heavy use of multicast can fill the neighour table with multicast addresses in five seconds. At that point, neighbour entries can't be GC-ed because they aren't five seconds old yet, the kernel log starts to fill up with "neighbor table overflow!" messages, and sends start to fail. This patch allows multicast addresses to be thrown out before they've lived out their five seconds. This makes room for non-multicast addresses and makes messages to all addresses more reliable in these circumstances. Fixes: 58956317c8de ("neighbor: Improve garbage collection") Signed-off-by: Jeff Dike Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201113015815.31397-1-jdike@akamai.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 81ee17594c32..22ced1381ede 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -204,6 +204,7 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; -- cgit v1.2.3 From 9c2e14b48119b39446031d29d994044ae958d8fc Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 10 Nov 2020 16:16:40 -0800 Subject: ip_tunnels: Set tunnel option flag when tunnel metadata is present Currently, we may set the tunnel option flag when the size of metadata is zero. For example, we set TUNNEL_GENEVE_OPT in the receive function no matter the geneve option is present or not. As this may result in issues on the tunnel flags consumers, this patch fixes the issue. Related discussion: * https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags") Signed-off-by: Yi-Hung Wei Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 02ccd32542d0..61620677b034 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ -- cgit v1.2.3 From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Nov 2020 17:52:58 +0100 Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed Stefan Agner reported a bug when using zsram on 32-bit Arm machines with RAM above the 4GB address boundary: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = a27bd01c [00000000] *pgd=236a0003, *pmd=1ffa64003 Internal error: Oops: 207 [#1] SMP ARM Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1 Hardware name: BCM2711 PC is at zs_map_object+0x94/0x338 LR is at zram_bvec_rw.constprop.0+0x330/0xa64 pc : [] lr : [] psr: 60000013 sp : e376bbe0 ip : 00000000 fp : c1e2921c r10: 00000002 r9 : c1dda730 r8 : 00000000 r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000 r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5383d Table: 235c2a80 DAC: fffffffd Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6) Stack: (0xe376bbe0 to 0xe376c000) As it turns out, zsram needs to know the maximum memory size, which is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture. The same problem will be hit on all 32-bit architectures that have a physical address space larger than 4GB and happen to not enable sparsemem and include asm/sparsemem.h from asm/pgtable.h. After the initial discussion, I suggested just always defining MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is set, or provoking a build error otherwise. This addresses all configurations that can currently have this runtime bug, but leaves all other configurations unchanged. I looked up the possible number of bits in source code and datasheets, here is what I found: - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never support more than 32 bits, even though supersections in theory allow up to 40 bits as well. - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5 XPA supports up to 60 bits in theory, but 40 bits are more than anyone will ever ship - On PowerPC, there are three different implementations of 36 bit addressing, but 32-bit is used without CONFIG_PTE_64BIT - On RISC-V, the normal page table format can support 34 bit addressing. There is no highmem support on RISC-V, so anything above 2GB is unused, but it might be useful to eventually support CONFIG_ZRAM for high pages. Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library") Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS") Acked-by: Thomas Bogendoerfer Reviewed-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Mike Rapoport Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/ Signed-off-by: Arnd Bergmann --- include/linux/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..e237004d498d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ +#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) +#ifdef CONFIG_PHYS_ADDR_T_64BIT +/* + * ZSMALLOC needs to know the highest PFN on 32-bit architectures + * with physical address space extension, but falls back to + * BITS_PER_LONG otherwise. + */ +#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition +#else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#endif +#endif + #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3 From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:32 +0800 Subject: PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed, which could resulted in reference leak. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139 Signed-off-by: Zhang Qilong Acked-by: Rafael J. Wysocki Signed-off-by: Jakub Kicinski --- include/linux/pm_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..b492ae00cc90 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Resume @dev synchronously and if that is successful, increment its runtime + * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been + * incremented or a negative error code otherwise. + */ +static inline int pm_runtime_resume_and_get(struct device *dev) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. -- cgit v1.2.3 From 9d9e937b1c8be97b424e3e11938e183fcde905c0 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Wed, 11 Nov 2020 12:50:25 +0100 Subject: ipv6/netfilter: Discard first fragment not including all headers Packets are processed even though the first fragment don't include all headers through the upper layer header. This breaks TAHI IPv6 Core Conformance Test v6LC.1.3.6. Referring to RFC8200 SECTION 4.5: "If the first fragment does not include all headers through an Upper-Layer header, then that fragment should be discarded and an ICMP Parameter Problem, Code 3, message should be sent to the source of the fragment, with the Pointer field set to zero." The fragment needs to be validated the same way it is done in commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") for ipv6. Wrap the validation into a common function, ipv6_frag_thdr_truncated() to check for truncation in the upper layer header. This validation does not fullfill all aspects of RFC 8200, section 4.5, but is at the moment sufficient to pass mentioned TAHI test. In netfilter, utilize the fragment offset returned by find_prev_fhdr() to let ipv6_frag_thdr_truncated() start it's traverse from the fragment header. Return 0 to drop the fragment in the netfilter. This is the same behaviour as used on other protocol errors in this function, e.g. when nf_ct_frag6_queue() returns -EPROTO. The Fragment will later be picked up by ipv6_frag_rcv() in reassembly.c. ipv6_frag_rcv() will then send an appropriate ICMP Parameter Problem message back to the source. References commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20201111115025.28879-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c7..637cc6dd12b7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,6 +1064,8 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); +bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); + enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), -- cgit v1.2.3 From fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 6 Nov 2020 11:33:17 -0800 Subject: scsi: libiscsi: Fix NOP race condition iSCSI NOPs are sometimes "lost", mistakenly sent to the user-land iscsid daemon instead of handled in the kernel, as they should be, resulting in a message from the daemon like: iscsid: Got nop in, but kernel supports nop handling. This can occur because of the new forward- and back-locks, and the fact that an iSCSI NOP response can occur before processing of the NOP send is complete. This can result in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), when the pointer is actually in the process of being set. To work around this, we add a new state to the "ping_task" pointer. In addition to NULL (not assigned) and a pointer (assigned), we add the state "being set", which is signaled with an INVALID pointer (using "-1"). Link: https://lore.kernel.org/r/20201106193317.16993-1-leeman.duncan@gmail.com Reviewed-by: Mike Christie Signed-off-by: Lee Duncan Signed-off-by: Martin K. Petersen --- include/scsi/libiscsi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c25fb86ffae9..b3bbd10eb3f0 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -132,6 +132,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; -- cgit v1.2.3 From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 09:08:41 +0100 Subject: sched: Fix data-race in wakeup Mel reported that on some ARM64 platforms loadavg goes bananas and Will tracked it down to the following race: CPU0 CPU1 schedule() prev->sched_contributes_to_load = X; deactivate_task(prev); try_to_wake_up() if (p->on_rq &&) // false if (smp_load_acquire(&p->on_cpu) && // true ttwu_queue_wakelist()) p->sched_remote_wakeup = Y; smp_store_release(prev->on_cpu, 0); where both p->sched_contributes_to_load and p->sched_remote_wakeup are in the same word, and thus the stores X and Y race (and can clobber one another's data). Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the p->on_cpu handoff serialized access to p->sched_remote_wakeup (just as it still does with p->sched_contributes_to_load) that commit broke that by calling ttwu_queue_wakelist() with p->on_cpu != 0. However, due to p->XXX = X ttwu() schedule() if (p->on_rq && ...) // false smp_mb__after_spinlock() if (smp_load_acquire(&p->on_cpu) && deactivate_task() ttwu_queue_wakelist()) p->on_rq = 0; p->sched_remote_wakeup = Y; We can be sure any 'current' store is complete and 'current' is guaranteed asleep. Therefore we can move p->sched_remote_wakeup into the current flags word. Note: while the observed failure was loadavg accounting gone wrong due to ttwu() cobbering p->sched_contributes_to_load, the reverse problem is also possible where schedule() clobbers p->sched_remote_wakeup, this could result in enqueue_entity() wrecking ->vruntime and causing scheduling artifacts. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Mel Gorman Debugged-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net --- include/linux/sched.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..0e91b451d2a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -769,7 +769,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; #ifdef CONFIG_PSI unsigned sched_psi_wake_requeue:1; #endif @@ -779,6 +778,21 @@ struct task_struct { /* Unserialized, strictly 'current' */ + /* + * This field must not be in the scheduler word above due to wakelist + * queueing no longer being serialized by p->on_cpu. However: + * + * p->XXX = X; ttwu() + * schedule() if (p->on_rq && ..) // false + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true + * deactivate_task() ttwu_queue_wakelist()) + * p->on_rq = 0; p->sched_remote_wakeup = Y; + * + * guarantees all stores of 'current' are visible before + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; + /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; -- cgit v1.2.3 From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 17 Nov 2020 07:14:32 +0100 Subject: sched/deadline: Fix priority inheritance with multiple scheduling classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glenn reported that "an application [he developed produces] a BUG in deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested PTHREAD_PRIO_INHERIT mutexes. I believe the bug is triggered when a CFS task that was boosted by a SCHED_DEADLINE task boosts another CFS task (nested priority inheritance). ------------[ cut here ]------------ kernel BUG at kernel/sched/deadline.c:1462! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ... Hardware name: ... RIP: 0010:enqueue_task_dl+0x335/0x910 Code: ... RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600 FS: 00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? intel_pstate_update_util_hwp+0x13/0x170 rt_mutex_setprio+0x1cc/0x4b0 task_blocks_on_rt_mutex+0x225/0x260 rt_spin_lock_slowlock_locked+0xab/0x2d0 rt_spin_lock_slowlock+0x50/0x80 hrtimer_grab_expiry_lock+0x20/0x30 hrtimer_cancel+0x13/0x30 do_nanosleep+0xa0/0x150 hrtimer_nanosleep+0xe1/0x230 ? __hrtimer_init_sleeper+0x60/0x60 __x64_sys_nanosleep+0x8d/0xa0 do_syscall_64+0x4a/0x100 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa58b52330d ... ---[ end trace 0000000000000002 ]— He also provided a simple reproducer creating the situation below: So the execution order of locking steps are the following (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2 are mutexes that are enabled * with priority inheritance.) Time moves forward as this timeline goes down: N1 N2 D1 | | | | | | Lock(M1) | | | | | | Lock(M2) | | | | | | Lock(M2) | | | | Lock(M1) | | (!!bug triggered!) | Daniel reported a similar situation as well, by just letting ksoftirqd run with DEADLINE (and eventually block on a mutex). Problem is that boosted entities (Priority Inheritance) use static DEADLINE parameters of the top priority waiter. However, there might be cases where top waiter could be a non-DEADLINE entity that is currently boosted by a DEADLINE entity from a different lock chain (i.e., nested priority chains involving entities of non-DEADLINE classes). In this case, top waiter static DEADLINE parameters could be null (initialized to 0 at fork()) and replenish_dl_entity() would hit a BUG(). Fix this by keeping track of the original donor and using its parameters when a task is boosted. Reported-by: Glenn Elliott Reported-by: Daniel Bristot de Oliveira Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Tested-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e91b451d2a2..095fdec07b38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -551,7 +551,6 @@ struct sched_dl_entity { * overruns. */ unsigned int dl_throttled : 1; - unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; @@ -570,6 +569,15 @@ struct sched_dl_entity { * time. */ struct hrtimer inactive_timer; + +#ifdef CONFIG_RT_MUTEXES + /* + * Priority Inheritance. When a DEADLINE scheduling entity is boosted + * pi_se points to the donor, otherwise points to the dl_se it belongs + * to (the original one/itself). + */ + struct sched_dl_entity *pi_se; +#endif }; #ifdef CONFIG_UCLAMP_TASK -- cgit v1.2.3 From 138559b9f99d3b6b1d5e75c78facc067a23871c6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 15 Nov 2020 15:14:48 +0200 Subject: net/tls: Fix wrong record sn in async mode of device resync In async_resync mode, we log the TCP seq of records until the async request is completed. Later, in case one of the logged seqs matches the resync request, we return it, together with its record serial number. Before this fix, we mistakenly returned the serial number of the current record instead. Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Link: https://lore.kernel.org/r/20201115131448.2702-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index baf1e99d8193..cf1473099453 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -300,7 +300,8 @@ enum tls_offload_sync_type { #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 struct tls_offload_resync_async { atomic64_t req; - u32 loglen; + u16 loglen; + u16 rcd_delta; u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; @@ -471,6 +472,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline void tls_bigint_subtract(unsigned char *seq, int n) +{ + u64 rcd_sn; + __be64 *p; + + BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8); + + p = (__be64 *)seq; + rcd_sn = be64_to_cpu(*p); + *p = cpu_to_be64(rcd_sn - n); +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -639,6 +652,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); rx_ctx->resync_async->loglen = 0; + rx_ctx->resync_async->rcd_delta = 0; } static inline void -- cgit v1.2.3 From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan Tested-by: Lukasz Hawrylko Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -- cgit v1.2.3 From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { -- cgit v1.2.3 From 2d8f6481c17db9fa5238b277cdbc392084060b09 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Thu, 19 Nov 2020 10:58:33 +0100 Subject: ipv6: Remove dependency of ipv6_frag_thdr_truncated on ipv6 module IPV6=m NF_DEFRAG_IPV6=y ld: net/ipv6/netfilter/nf_conntrack_reasm.o: in function `nf_ct_frag6_gather': net/ipv6/netfilter/nf_conntrack_reasm.c:462: undefined reference to `ipv6_frag_thdr_truncated' Netfilter is depending on ipv6 symbol ipv6_frag_thdr_truncated. This dependency is forcing IPV6=y. Remove this dependency by moving ipv6_frag_thdr_truncated out of ipv6. This is the same solution as used with a similar issues: Referring to commit 70b095c843266 ("ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module") Fixes: 9d9e937b1c8b ("ipv6/netfilter: Discard first fragment not including all headers") Reported-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20201119095833.8409-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 -- include/net/ipv6_frag.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 637cc6dd12b7..bd1f396cc9c7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,8 +1064,6 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); -bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); - enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index a21e8b1381a1..851029ecff13 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -108,5 +108,35 @@ out_rcu_unlock: rcu_read_unlock(); inet_frag_put(&fq->q); } + +/* Check if the upper layer header is truncated in the first fragment. */ +static inline bool +ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) +{ + u8 nexthdr = *nexthdrp; + __be16 frag_off; + int offset; + + offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); + if (offset < 0 || (frag_off & htons(IP6_OFFSET))) + return false; + switch (nexthdr) { + case NEXTHDR_TCP: + offset += sizeof(struct tcphdr); + break; + case NEXTHDR_UDP: + offset += sizeof(struct udphdr); + break; + case NEXTHDR_ICMP: + offset += sizeof(struct icmp6hdr); + break; + default: + offset += 1; + } + if (offset > skb->len) + return true; + return false; +} + #endif #endif -- cgit v1.2.3 From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:08 +0100 Subject: jbd2: fix kernel-doc markups Kernel-doc markup should use this format: identifier - description They should not have any type before that, as otherwise the parser won't do the right thing. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Reviewed-by: Jan Kara Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1c49fd62ff2e..578ff196b3ce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { -- cgit v1.2.3 From 537cf4e3cc2f6cc9088dcd6162de573f603adc29 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 20 Nov 2020 12:53:39 +0100 Subject: xsk: Fix umem cleanup bug at socket destruct Fix a bug that is triggered when a partially setup socket is destroyed. For a fully setup socket, a socket that has been bound to a device, the cleanup of the umem is performed at the end of the buffer pool's cleanup work queue item. This has to be performed in a work queue, and not in RCU cleanup, as it is doing a vunmap that cannot execute in interrupt context. However, when a socket has only been partially set up so that a umem has been created but the buffer pool has not, the code erroneously directly calls the umem cleanup function instead of using a work queue, and this leads to a BUG_ON() in vunmap(). As there in this case is no buffer pool, we cannot use its work queue, so we need to introduce a work queue for the umem and schedule this for the cleanup. So in the case there is no pool, we are going to use the umem's own work queue to schedule the cleanup. But if there is a pool, the cleanup of the umem is still being performed by the pool's work queue, as it is important that the umem is cleaned up after the pool. Fixes: e5e1a4bc916d ("xsk: Fix possible memory leak at socket close") Reported-by: Marek Majtyka Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Marek Majtyka Link: https://lore.kernel.org/bpf/1605873219-21629-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1a9559c0cbdd..4f4e93bf814c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -31,6 +31,7 @@ struct xdp_umem { struct page **pgs; int id; struct list_head xsk_dma_list; + struct work_struct work; }; struct xsk_map { -- cgit v1.2.3 From b9ad3e9f5a7a760ab068e33e1f18d240ba32ce92 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 20 Nov 2020 14:28:27 +0000 Subject: bonding: wait for sysfs kobject destruction before freeing struct slave syzkaller found that with CONFIG_DEBUG_KOBJECT_RELEASE=y, releasing a struct slave device could result in the following splat: kobject: 'bonding_slave' (00000000cecdd4fe): kobject_release, parent 0000000074ceb2b2 (delayed 1000) bond0 (unregistering): (slave bond_slave_1): Releasing backup interface ------------[ cut here ]------------ ODEBUG: free active (active state 0) object type: timer_list hint: workqueue_select_cpu_near kernel/workqueue.c:1549 [inline] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x98 kernel/workqueue.c:1600 WARNING: CPU: 1 PID: 842 at lib/debugobjects.c:485 debug_print_object+0x180/0x240 lib/debugobjects.c:485 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 842 Comm: kworker/u4:4 Tainted: G S 5.9.0-rc8+ #96 Hardware name: linux,dummy-virt (DT) Workqueue: netns cleanup_net Call trace: dump_backtrace+0x0/0x4d8 include/linux/bitmap.h:239 show_stack+0x34/0x48 arch/arm64/kernel/traps.c:142 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x174/0x1f8 lib/dump_stack.c:118 panic+0x360/0x7a0 kernel/panic.c:231 __warn+0x244/0x2ec kernel/panic.c:600 report_bug+0x240/0x398 lib/bug.c:198 bug_handler+0x50/0xc0 arch/arm64/kernel/traps.c:974 call_break_hook+0x160/0x1d8 arch/arm64/kernel/debug-monitors.c:322 brk_handler+0x30/0xc0 arch/arm64/kernel/debug-monitors.c:329 do_debug_exception+0x184/0x340 arch/arm64/mm/fault.c:864 el1_dbg+0x48/0xb0 arch/arm64/kernel/entry-common.c:65 el1_sync_handler+0x170/0x1c8 arch/arm64/kernel/entry-common.c:93 el1_sync+0x80/0x100 arch/arm64/kernel/entry.S:594 debug_print_object+0x180/0x240 lib/debugobjects.c:485 __debug_check_no_obj_freed lib/debugobjects.c:967 [inline] debug_check_no_obj_freed+0x200/0x430 lib/debugobjects.c:998 slab_free_hook mm/slub.c:1536 [inline] slab_free_freelist_hook+0x190/0x210 mm/slub.c:1577 slab_free mm/slub.c:3138 [inline] kfree+0x13c/0x460 mm/slub.c:4119 bond_free_slave+0x8c/0xf8 drivers/net/bonding/bond_main.c:1492 __bond_release_one+0xe0c/0xec8 drivers/net/bonding/bond_main.c:2190 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3309 [inline] bond_netdev_event+0x8f0/0xa70 drivers/net/bonding/bond_main.c:3420 notifier_call_chain+0xf0/0x200 kernel/notifier.c:83 __raw_notifier_call_chain kernel/notifier.c:361 [inline] raw_notifier_call_chain+0x44/0x58 kernel/notifier.c:368 call_netdevice_notifiers_info+0xbc/0x150 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] rollback_registered_many+0x6a4/0xec0 net/core/dev.c:9347 unregister_netdevice_many.part.0+0x2c/0x1c0 net/core/dev.c:10509 unregister_netdevice_many net/core/dev.c:10508 [inline] default_device_exit_batch+0x294/0x338 net/core/dev.c:10992 ops_exit_list.isra.0+0xec/0x150 net/core/net_namespace.c:189 cleanup_net+0x44c/0x888 net/core/net_namespace.c:603 process_one_work+0x96c/0x18c0 kernel/workqueue.c:2269 worker_thread+0x3f0/0xc30 kernel/workqueue.c:2415 kthread+0x390/0x498 kernel/kthread.c:292 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:925 This is a potential use-after-free if the sysfs nodes are being accessed whilst removing the struct slave, so wait for the object destruction to complete before freeing the struct slave itself. Fixes: 07699f9a7c8d ("bonding: add sysfs /slave dir for bond slave devices.") Fixes: a068aab42258 ("bonding: Fix reference count leak in bond_sysfs_slave_add.") Cc: Qiushi Wu Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jamie Iles Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201120142827.879226-1-jamie@nuviainc.com Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index 7d132cc1e584..d9d0ff3b0ad3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -185,6 +185,11 @@ struct slave { struct rtnl_link_stats64 slave_stats; }; +static inline struct slave *to_slave(struct kobject *kobj) +{ + return container_of(kobj, struct slave, kobj); +} + struct bond_up_slave { unsigned int count; struct rcu_head rcu; @@ -750,6 +755,9 @@ extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; +/* exported from bond_sysfs_slave.c */ +extern const struct sysfs_ops slave_sysfs_ops; + static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { atomic_long_inc(&dev->tx_dropped); -- cgit v1.2.3 From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 21 Nov 2020 22:17:01 -0800 Subject: compiler-clang: remove version check for BPF Tracing bpftrace parses the kernel headers and uses Clang under the hood. Remove the version check when __BPF_TRACING__ is defined (as bpftrace does) so that this tool can continue to parse kernel headers, even with older clang sources. Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1") Reported-by: Chen Yu Reported-by: Jarkko Sakkinen Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: Song Liu Acked-by: Nathan Chancellor Acked-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index dd7233c48bf3..98cff1b4b088 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -8,8 +8,10 @@ + __clang_patchlevel__) #if CLANG_VERSION < 100001 +#ifndef __BPF_TRACING__ # error Sorry, your version of Clang is too old - please use 10.0.1 or newer. #endif +#endif /* Compiler specific definitions for Clang compiler */ -- cgit v1.2.3 From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap Reported-by: Thomas Gleixner Reported-by: kernel test robot Reported-by: Christoph Hellwig Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Joao Martins Cc: Tony Luck Cc: Fenghua Yu Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vishal Verma Cc: Stephen Rothwell Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 14 -------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include +#include + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:08 -0800 Subject: mm: fix readahead_page_batch for retry entries Both btrfs and fuse have reported faults caused by seeing a retry entry instead of the page they were looking for. This was caused by a missing check in the iterator. As can be seen in the below panic log, the accessing 0x402 causes a panic. In the xarray.h, 0x402 means RETRY_ENTRY. BUG: kernel NULL pointer dereference, address: 0000000000000402 CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 RIP: 0010:fuse_readahead+0x152/0x470 [fuse] Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 Call Trace: read_pages+0x83/0x270 page_cache_readahead_unbounded+0x197/0x230 generic_file_buffered_read+0x57a/0xa20 new_sync_read+0x112/0x1a0 vfs_read+0xf8/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 042124cc64c3 ("mm: add new readahead_control API") Reported-by: David Sterba Reported-by: Wonhyuk Yang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1e19c1f9ec9..d5570deff400 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; -- cgit v1.2.3 From 01770a166165738a6e05c3d911fb4609cc4eb416 Mon Sep 17 00:00:00 2001 From: Ricardo Dias Date: Fri, 20 Nov 2020 11:11:33 +0000 Subject: tcp: fix race condition when creating child sockets from syncookies When the TCP stack is in SYN flood mode, the server child socket is created from the SYN cookie received in a TCP packet with the ACK flag set. The child socket is created when the server receives the first TCP packet with a valid SYN cookie from the client. Usually, this packet corresponds to the final step of the TCP 3-way handshake, the ACK packet. But is also possible to receive a valid SYN cookie from the first TCP data packet sent by the client, and thus create a child socket from that SYN cookie. Since a client socket is ready to send data as soon as it receives the SYN+ACK packet from the server, the client can send the ACK packet (sent by the TCP stack code), and the first data packet (sent by the userspace program) almost at the same time, and thus the server will equally receive the two TCP packets with valid SYN cookies almost at the same instant. When such event happens, the TCP stack code has a race condition that occurs between the momement a lookup is done to the established connections hashtable to check for the existence of a connection for the same client, and the moment that the child socket is added to the established connections hashtable. As a consequence, this race condition can lead to a situation where we add two child sockets to the established connections hashtable and deliver two sockets to the userspace program to the same client. This patch fixes the race condition by checking if an existing child socket exists for the same client when we are adding the second child socket to the established connections socket. If an existing child socket exists, we drop the packet and discard the second child socket to the same client. Signed-off-by: Ricardo Dias Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 92560974ea67..ca6a3ea9057e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long high_limit); int inet_hashinfo2_init_mod(struct inet_hashinfo *h); -bool inet_ehash_insert(struct sock *sk, struct sock *osk); -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, + bool *found_dup_sk); int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); -- cgit v1.2.3 From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sat, 21 Nov 2020 08:28:17 +0200 Subject: net/packet: fix packet receive on L3 devices without visible hard header In the patchset merged by commit b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which did not have header_ops were given one for the purpose of protocol parsing on af_packet transmit path. That change made af_packet receive path regard these devices as having a visible L3 header and therefore aligned incoming skb->data to point to the skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not reset their mac_header prior to ingress and therefore their incoming packets became malformed. Ideally these devices would reset their mac headers, or af_packet would be able to rely on dev->hard_header_len being 0 for such cases, but it seems this is not the case. Fix by changing af_packet RX ll visibility criteria to include the existence of a '.create()' header operation, which is used when creating a device hard header - via dev_hard_header() - by upper layers, and does not exist in these L3 devices. As this predicate may be useful in other situations, add it as a common dev_has_header() helper in netdevice.h. Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") Signed-off-by: Eyal Birger Acked-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..fa275a054f46 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev, return false; } +static inline bool dev_has_header(const struct net_device *dev) +{ + return dev->header_ops && dev->header_ops->create; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len, int size); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -- cgit v1.2.3 From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 23 Nov 2020 21:52:41 -0800 Subject: firmware: xilinx: Use hash-table for api feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bi