From 0cbc2659123e6946ba926c5ec3871efe310123e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:28:34 +0100 Subject: arm64: vdso32: Remove a bunch of #ifdef CONFIG_COMPAT_VDSO guards Most of the compat vDSO code can be built and guarded using IS_ENABLED, so drop the unnecessary #ifdefs. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d4202a32abc9..1501b22a3cf6 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -30,15 +30,11 @@ #include extern char vdso_start[], vdso_end[]; -#ifdef CONFIG_COMPAT_VDSO extern char vdso32_start[], vdso32_end[]; -#endif /* CONFIG_COMPAT_VDSO */ enum vdso_abi { VDSO_ABI_AA64, -#ifdef CONFIG_COMPAT_VDSO VDSO_ABI_AA32, -#endif /* CONFIG_COMPAT_VDSO */ }; enum vvar_pages { @@ -284,21 +280,17 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ -#ifdef CONFIG_COMPAT_VDSO static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } -#endif /* CONFIG_COMPAT_VDSO */ enum aarch32_map { AA32_MAP_VECTORS, /* kuser helpers */ -#ifdef CONFIG_COMPAT_VDSO + AA32_MAP_SIGPAGE, AA32_MAP_VVAR, AA32_MAP_VDSO, -#endif - AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; @@ -309,7 +301,10 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vectors]", /* ABI */ .pages = &aarch32_vectors_page, }, -#ifdef CONFIG_COMPAT_VDSO + [AA32_MAP_SIGPAGE] = { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_sig_page, + }, [AA32_MAP_VVAR] = { .name = "[vvar]", .fault = vvar_fault, @@ -319,11 +314,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#endif /* CONFIG_COMPAT_VDSO */ - [AA32_MAP_SIGPAGE] = { - .name = "[sigpage]", /* ABI */ - .pages = &aarch32_sig_page, - }, }; static int aarch32_alloc_kuser_vdso_page(void) @@ -362,25 +352,25 @@ static int aarch32_alloc_sigpage(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO static int __aarch32_alloc_vdso_pages(void) { + + if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) + return 0; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA32); } -#endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { int ret; -#ifdef CONFIG_COMPAT_VDSO ret = __aarch32_alloc_vdso_pages(); if (ret) return ret; -#endif ret = aarch32_alloc_sigpage(); if (ret) @@ -449,14 +439,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; -#ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, - uses_interp); - if (ret) - goto out; -#endif /* CONFIG_COMPAT_VDSO */ + if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { + ret = __setup_additional_pages(VDSO_ABI_AA32, + mm, + bprm, + uses_interp); + if (ret) + goto out; + } ret = aarch32_sigreturn_setup(mm); out: -- cgit v1.2.3 From 2a30aca81a72d71e128eb692cc4755f33e317670 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:37:09 +0100 Subject: arm64: vdso: Fix unusual formatting in *setup_additional_pages() There's really no need to put every parameter on a new line when calling a function with a long name, so reformat the *setup_additional_pages() functions in the vDSO setup code to follow the usual conventions. Acked-by: Mark Rutland Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 1501b22a3cf6..debb8995d57f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -440,9 +440,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, uses_interp); if (ret) goto out; @@ -487,8 +485,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; @@ -496,11 +493,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(VDSO_ABI_AA64, - mm, - bprm, - uses_interp); - + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); mmap_write_unlock(mm); return ret; -- cgit v1.2.3 From b4c971245925db1a6e0898878ad410f8f17ec59a Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 4 Aug 2020 16:53:47 +0800 Subject: arm64: traps: Add str of description to panic() in die() Currently, there are different description strings in die() such as die("Oops",,), die("Oops - BUG",,). And panic() called by die() will always show "Fatal exception" or "Fatal exception in interrupt". Note that panic() will run any panic handler via panic_notifier_list. And the string above will be formatted and placed in static buf[] which will be passed to handler. So panic handler can not distinguish which Oops it is from the buf if we want to do some things like reserve the string in memory or panic statistics. It's not benefit to debug. We need to add more codes to troubleshoot. Let's utilize existing resource to make debug much simpler. Signed-off-by: Yue Hu Link: https://lore.kernel.org/r/20200804085347.10720-1-zbestahu@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..af25cedf54e9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -200,9 +200,9 @@ void die(const char *str, struct pt_regs *regs, int err) oops_exit(); if (in_interrupt()) - panic("Fatal exception in interrupt"); + panic("%s: Fatal exception in interrupt", str); if (panic_on_oops) - panic("Fatal exception"); + panic("%s: Fatal exception", str); raw_spin_unlock_irqrestore(&die_lock, flags); -- cgit v1.2.3 From ffdbd3d83553beb0fda00081293d5640cba477a2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 11 Aug 2020 13:35:05 +0800 Subject: arm64: perf: Add general hardware LLC events for PMUv3 This patch is to add the general hardware last level cache (LLC) events for PMUv3: one event is for LLC access and another is for LLC miss. With this change, perf tool can support last level cache profiling, below is an example to demonstrate the usage on Arm64: $ perf stat -e LLC-load-misses -e LLC-loads -- \ perf bench mem memcpy -s 1024MB -l 100 -f default [...] Performance counter stats for 'perf bench mem memcpy -s 1024MB -l 100 -f default': 35,534,262 LLC-load-misses # 2.16% of all LL-cache hits 1,643,946,443 LLC-loads [...] Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200811053505.21223-1-leo.yan@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 462f9a9cc44b..86e2328b0c2e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -69,6 +69,9 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; -- cgit v1.2.3 From d51eb416fa111c1129c46ea3320faab36bf4c99c Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 4 Sep 2020 10:21:37 +0800 Subject: drivers/perf: hisi: Add missing include of linux/module.h MODULE_*** is used in HiSilicon uncore PMU drivers and is provided by linux/module.h, but the header file is not directly included. Add the missing include. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1599186097-18599-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b0c97b3eb0..b59ec22169ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From ad14c19242b5a5f87aa86c4c930e668121de2809 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 28 Aug 2020 11:18:22 +0800 Subject: arm64: fix some spelling mistakes in the comments by codespell arch/arm64/include/asm/cpu_ops.h:24: necesary ==> necessary arch/arm64/include/asm/kvm_arm.h:69: maintainance ==> maintenance arch/arm64/include/asm/cpufeature.h:361: capabilties ==> capabilities arch/arm64/kernel/perf_regs.c:19: compatability ==> compatibility arch/arm64/kernel/smp_spin_table.c:86: endianess ==> endianness arch/arm64/kernel/smp_spin_table.c:88: endianess ==> endianness arch/arm64/kvm/vgic/vgic-mmio-v3.c:1004: targetting ==> targeting arch/arm64/kvm/vgic/vgic-mmio-v3.c:1005: targetting ==> targeting Signed-off-by: Xiaoming Ni Link: https://lore.kernel.org/r/20200828031822.35928-1-nixiaoming@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu_ops.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kernel/perf_regs.c | 2 +- arch/arm64/kernel/smp_spin_table.c | 4 ++-- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index d28e8f37d3b4..e95c4df83911 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,7 +21,7 @@ * mechanism for doing so, tests whether it is possible to boot * the given CPU. * @cpu_boot: Boots a cpu into the kernel. - * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary * synchronisation. Called from the cpu being booted. * @cpu_can_disable: Determines whether a CPU can be disabled based on * mechanism-specific information. diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..3a42dc8e697c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) } /* - * Generic helper for handling capabilties with multiple (match,enable) pairs + * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da8e3dc4455..32acf95d49c7 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -66,7 +66,7 @@ * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain - * FB: Force broadcast of all maintainance operations + * FB: Force broadcast of all maintenance operations * AMO: Override CPSR.A and enable signaling with VA * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 666b225aeb3a..94e8718e7229 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) /* * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but - * we're stuck with it for ABI compatability reasons. + * we're stuck with it for ABI compatibility reasons. * * For a 32-bit consumer inspecting a 32-bit task, then it will look at * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c8a3fee00c11..5892e79fa429 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) /* * We write the release address as LE regardless of the native - * endianess of the kernel. Therefore, any boot-loaders that + * endianness of the kernel. Therefore, any boot-loaders that * read this address need to convert this address to the - * boot-loader's endianess before jumping. This is mandated by + * boot-loader's endianness before jumping. This is mandated by * the boot protocol. */ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 5c786b915cd3..52d6f24f65dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1001,8 +1001,8 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) raw_spin_lock_irqsave(&irq->irq_lock, flags); /* - * An access targetting Group0 SGIs can only generate - * those, while an access targetting Group1 SGIs can + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can * generate interrupts of either group. */ if (!irq->group || allow_group1) { -- cgit v1.2.3 From 9a747c91e8d69d7283c850031d2462f4d27bf25b Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Tue, 1 Sep 2020 17:11:54 +0800 Subject: arm64/numa: Fix a typo in comment of arm64_numa_init Fix a typo in comment of arm64_numa_init. 'encomapssing' should be 'encompassing'. Signed-off-by: Yanfei Xu Link: https://lore.kernel.org/r/20200901091154.10112-1-yanfei.xu@windriver.com Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..d402da26cdca 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -448,7 +448,7 @@ static int __init dummy_numa_init(void) * arm64_numa_init() - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory. + * last fallback is dummy single node config encompassing whole memory. */ void __init arm64_numa_init(void) { -- cgit v1.2.3 From 1ab64cf81489e871bcb94fb2dea35c7fac561fff Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 18 Aug 2020 14:36:24 +0800 Subject: ACPI/IORT: Drop the unused @ops of iort_add_device_replay() Since commit d2e1a003af56 ("ACPI/IORT: Don't call iommu_ops->add_device directly"), we use the IOMMU core API to replace a direct invoke of the specified callback. The parameter @ops has therefore became unused. Let's drop it. Signed-off-by: Zenghui Yu Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20200818063625.980-2-yuzenghui@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ec782e4a0fe4..a0ece0e201b2 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -811,8 +811,7 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) return (fwspec && fwspec->ops) ? fwspec->ops : NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) +static inline int iort_add_device_replay(struct device *dev) { int err = 0; @@ -1072,7 +1071,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, */ if (!err) { ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(ops, dev); + err = iort_add_device_replay(dev); } /* Ignore all other errors apart from EPROBE_DEFER */ @@ -1089,8 +1088,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, #else static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) { return NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) +static inline int iort_add_device_replay(struct device *dev) { return 0; } int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -- cgit v1.2.3 From c2bea7a1a1c0c4c5c970696ff556a73f55f998eb Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 18 Aug 2020 14:36:25 +0800 Subject: ACPI/IORT: Remove the unused inline functions Since commit 8212688600ed ("ACPI/IORT: Fix build error when IOMMU_SUPPORT is disabled"), iort_fwspec_iommu_ops() and iort_add_device_replay() are not needed anymore when CONFIG_IOMMU_API is not selected. Let's remove them. Signed-off-by: Zenghui Yu Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20200818063625.980-3-yuzenghui@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a0ece0e201b2..e670785a6201 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1086,10 +1086,6 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, } #else -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ return NULL; } -static inline int iort_add_device_replay(struct device *dev) -{ return 0; } int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } const struct iommu_ops *iort_iommu_configure_id(struct device *dev, -- cgit v1.2.3 From 120dc60d0bdbadcad7460222f74c9ed15cdeb73e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 25 Aug 2020 15:54:40 +0200 Subject: arm64: get rid of TEXT_OFFSET TEXT_OFFSET serves no purpose, and for this reason, it was redefined as 0x0 in the v5.8 timeframe. Since this does not appear to have caused any issues that require us to revisit that decision, let's get rid of the macro entirely, along with any references to it. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200825135440.11288-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 6 ------ arch/arm64/include/asm/boot.h | 3 +-- arch/arm64/include/asm/kernel-pgtable.h | 2 +- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/kernel/Makefile | 2 -- arch/arm64/kernel/head.S | 16 ++++++---------- arch/arm64/kernel/image.h | 1 - arch/arm64/kernel/vmlinux.lds.S | 4 ++-- drivers/firmware/efi/libstub/Makefile | 1 - drivers/firmware/efi/libstub/arm64-stub.c | 6 +++--- 10 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 130569f90c54..0fd4c1be4f64 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -11,7 +11,6 @@ # Copyright (C) 1995-2001 by Russell King LDFLAGS_vmlinux :=--no-undefined -X -CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -132,9 +131,6 @@ endif # Default value head-y := arch/arm64/kernel/head.o -# The byte offset of the kernel image in RAM from the start of RAM. -TEXT_OFFSET := 0x0 - ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 else @@ -145,8 +141,6 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET - core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index c7f67da13cd9..3e7943fd17a4 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -13,8 +13,7 @@ #define MAX_FDT_SIZE SZ_2M /* - * arm64 requires the kernel image to placed - * TEXT_OFFSET bytes beyond a 2 MB aligned base + * arm64 requires the kernel image to placed at a 2 MB aligned base address */ #define MIN_KIMG_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 329fb15f6bac..19ca76ea60d9 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -86,7 +86,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index afa722504bfd..20a9b322d342 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -169,7 +169,7 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +/* the virtual base of the kernel image */ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a561cbb91d4d..4197ef2fb22e 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 037421c66b14..d8d9caf02834 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,14 +36,10 @@ #include "efi-header.S" -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) +#define __PHYS_OFFSET KERNEL_START -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 +#if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB #endif /* @@ -55,7 +51,7 @@ * x0 = physical address to the FDT blob. * * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). + * __pa(PAGE_OFFSET). * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described @@ -77,7 +73,7 @@ _head: b primary_entry // branch to kernel start, magic .long 0 // reserved #endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved @@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * Map the kernel image (starting with PHYS_OFFSET). */ adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) + mov_q x5, KIMAGE_VADDR // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD adrp x6, _end // runtime __pa(_end) @@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" SYM_DATA_START(kimage_vaddr) - .quad _text - TEXT_OFFSET + .quad _text SYM_DATA_END(kimage_vaddr) EXPORT_SYMBOL(kimage_vaddr) .popsection diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7d38c660372..7bc3ba897901 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -62,7 +62,6 @@ */ #define HEAD_SYMBOLS \ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ - DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7cba7623fcec..82801d98a2b7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -105,7 +105,7 @@ SECTIONS *(.eh_frame) } - . = KIMAGE_VADDR + TEXT_OFFSET; + . = KIMAGE_VADDR; .head.text : { _text = .; @@ -274,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 296b18fbd7a2..5a80cf6bd606 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -64,7 +64,6 @@ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # # For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e5bfac79e5ac..fc178398f1ac 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -77,7 +77,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); - *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align(); + *reserve_size = kernel_memsize; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* @@ -91,7 +91,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) { + if (IS_ALIGNED((u64)_text, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -111,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align(); + *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); return EFI_SUCCESS; -- cgit v1.2.3 From 44fdf4ed2693eb05dbfa83beaa6fe5fbd0c2f6d0 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 4 Sep 2020 17:57:38 +0800 Subject: arm64: perf: Remove unnecessary event_idx check event_idx is obtained from armv8pmu_get_event_idx(), and this idx must be between ARMV8_IDX_CYCLE_COUNTER and cpu_pmu->num_events. So it's unnecessary to do this check. Let's remove it. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1599213458-28394-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 86e2328b0c2e..04d0ceb72a67 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -310,8 +310,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* @@ -368,12 +366,6 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); @@ -443,15 +435,11 @@ static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else value = armv8pmu_read_hw_counter(event); @@ -480,16 +468,12 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, static void armv8pmu_write_counter(struct perf_event *event, u64 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; value = armv8pmu_bias_long_counter(event, value); - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); else armv8pmu_write_hw_counter(event, value); -- cgit v1.2.3 From c048ddf86cdd066db283ce838a0049d2bbefb9e5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 4 Sep 2020 14:00:59 +0530 Subject: arm64/mm/ptdump: Add address markers for BPF regions Kernel virtual region [BPF_JIT_REGION_START..BPF_JIT_REGION_END] is missing from address_markers[], hence relevant page table entries are not displayed with /sys/kernel/debug/kernel_page_tables. This adds those missing markers. While here, also rename arch/arm64/mm/dump.c which sounds bit ambiguous, as arch/arm64/mm/ptdump.c instead. Signed-off-by: Anshuman Khandual Reviewed-by: Steven Price Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Steven Price Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599208259-11191-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/Makefile | 2 +- arch/arm64/mm/dump.c | 387 ------------------------------------------------ arch/arm64/mm/ptdump.c | 389 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 388 deletions(-) delete mode 100644 arch/arm64/mm/dump.c create mode 100644 arch/arm64/mm/ptdump.c diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index d91030f0ffee..2a1d275cd4d7 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c deleted file mode 100644 index 0b8da1cc1c07..000000000000 --- a/arch/arm64/mm/dump.c +++ /dev/null @@ -1,387 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2014, The Linux Foundation. All rights reserved. - * Debug helper to dump the current kernel pagetables of the system - * so that we can see what the various memory ranges are set to. - * - * Derived from x86 and arm implementation: - * (C) Copyright 2008 Intel Corporation - * - * Author: Arjan van de Ven - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -enum address_markers_idx { - PAGE_OFFSET_NR = 0, - PAGE_END_NR, -#ifdef CONFIG_KASAN - KASAN_START_NR, -#endif -}; - -static struct addr_marker address_markers[] = { - { PAGE_OFFSET, "Linear Mapping start" }, - { 0 /* PAGE_END */, "Linear Mapping end" }, -#ifdef CONFIG_KASAN - { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, - { KASAN_SHADOW_END, "Kasan shadow end" }, -#endif - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() area" }, - { VMALLOC_END, "vmalloc() end" }, - { FIXADDR_START, "Fixmap start" }, - { FIXADDR_TOP, "Fixmap end" }, - { PCI_IO_START, "PCI I/O start" }, - { PCI_IO_END, "PCI I/O end" }, -#ifdef CONFIG_SPARSEMEM_VMEMMAP - { VMEMMAP_START, "vmemmap start" }, - { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, -#endif - { -1, NULL }, -}; - -#define pt_dump_seq_printf(m, fmt, args...) \ -({ \ - if (m) \ - seq_printf(m, fmt, ##args); \ -}) - -#define pt_dump_seq_puts(m, fmt) \ -({ \ - if (m) \ - seq_printf(m, fmt); \ -}) - -/* - * The page dumper groups page table entries of the same type into a single - * description. It uses pg_state to track the range information while - * iterating over the pte entries. When the continuity is broken it then - * dumps out a description of the range. - */ -struct pg_state { - struct ptdump_state ptdump; - struct seq_file *seq; - const struct addr_marker *marker; - unsigned long start_address; - int level; - u64 current_prot; - bool check_wx; - unsigned long wx_pages; - unsigned long uxn_pages; -}; - -struct prot_bits { - u64 mask; - u64 val; - const char *set; - const char *clear; -}; - -static const struct prot_bits pte_bits[] = { - { - .mask = PTE_VALID, - .val = PTE_VALID, - .set = " ", - .clear = "F", - }, { - .mask = PTE_USER, - .val = PTE_USER, - .set = "USR", - .clear = " ", - }, { - .mask = PTE_RDONLY, - .val = PTE_RDONLY, - .set = "ro", - .clear = "RW", - }, { - .mask = PTE_PXN, - .val = PTE_PXN, - .set = "NX", - .clear = "x ", - }, { - .mask = PTE_SHARED, - .val = PTE_SHARED, - .set = "SHD", - .clear = " ", - }, { - .mask = PTE_AF, - .val = PTE_AF, - .set = "AF", - .clear = " ", - }, { - .mask = PTE_NG, - .val = PTE_NG, - .set = "NG", - .clear = " ", - }, { - .mask = PTE_CONT, - .val = PTE_CONT, - .set = "CON", - .clear = " ", - }, { - .mask = PTE_TABLE_BIT, - .val = PTE_TABLE_BIT, - .set = " ", - .clear = "BLK", - }, { - .mask = PTE_UXN, - .val = PTE_UXN, - .set = "UXN", - .clear = " ", - }, { - .mask = PTE_GP, - .val = PTE_GP, - .set = "GP", - .clear = " ", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), - .set = "DEVICE/nGnRnE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), - .set = "DEVICE/nGnRE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_GRE), - .set = "DEVICE/GRE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_NORMAL_NC), - .set = "MEM/NORMAL-NC", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_NORMAL), - .set = "MEM/NORMAL", - } -}; - -struct pg_level { - const struct prot_bits *bits; - const char *name; - size_t num; - u64 mask; -}; - -static struct pg_level pg_level[] = { - { /* pgd */ - .name = "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* p4d */ - .name = "P4D", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pud */ - .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pmd */ - .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pte */ - .name = "PTE", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, -}; - -static void dump_prot(struct pg_state *st, const struct prot_bits *bits, - size_t num) -{ - unsigned i; - - for (i = 0; i < num; i++, bits++) { - const char *s; - - if ((st->current_prot & bits->mask) == bits->val) - s = bits->set; - else - s = bits->clear; - - if (s) - pt_dump_seq_printf(st->seq, " %s", s); - } -} - -static void note_prot_uxn(struct pg_state *st, unsigned long addr) -{ - if (!st->check_wx) - return; - - if ((st->current_prot & PTE_UXN) == PTE_UXN) - return; - - WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n", - (void *)st->start_address, (void *)st->start_address); - - st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; -} - -static void note_prot_wx(struct pg_state *st, unsigned long addr) -{ - if (!st->check_wx) - return; - if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY) - return; - if ((st->current_prot & PTE_PXN) == PTE_PXN) - return; - - WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n", - (void *)st->start_address, (void *)st->start_address); - - st->wx_pages += (addr - st->start_address) / PAGE_SIZE; -} - -static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val) -{ - struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); - static const char units[] = "KMGTPE"; - u64 prot = 0; - - if (level >= 0) - prot = val & pg_level[level].mask; - - if (st->level == -1) { - st->level = level; - st->current_prot = prot; - st->start_address = addr; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } else if (prot != st->current_prot || level != st->level || - addr >= st->marker[1].start_address) { - const char *unit = units; - unsigned long delta; - - if (st->current_prot) { - note_prot_uxn(st, addr); - note_prot_wx(st, addr); - } - - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", - st->start_address, addr); - - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (st->current_prot && pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); - - if (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - - st->start_address = addr; - st->current_prot = prot; - st->level = level; - } - - if (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - -} - -void ptdump_walk(struct seq_file *s, struct ptdump_info *info) -{ - unsigned long end = ~0UL; - struct pg_state st; - - if (info->base_addr < TASK_SIZE_64) - end = TASK_SIZE_64; - - st = (struct pg_state){ - .seq = s, - .marker = info->markers, - .ptdump = { - .note_page = note_page, - .range = (struct ptdump_range[]){ - {info->base_addr, end}, - {0, 0} - } - } - }; - - ptdump_walk_pgd(&st.ptdump, info->mm, NULL); -} - -static void ptdump_initialize(void) -{ - unsigned i, j; - - for (i = 0; i < ARRAY_SIZE(pg_level); i++) - if (pg_level[i].bits) - for (j = 0; j < pg_level[i].num; j++) - pg_level[i].mask |= pg_level[i].bits[j].mask; -} - -static struct ptdump_info kernel_ptdump_info = { - .mm = &init_mm, - .markers = address_markers, - .base_addr = PAGE_OFFSET, -}; - -void ptdump_check_wx(void) -{ - struct pg_state st = { - .seq = NULL, - .marker = (struct addr_marker[]) { - { 0, NULL}, - { -1, NULL}, - }, - .level = -1, - .check_wx = true, - .ptdump = { - .note_page = note_page, - .range = (struct ptdump_range[]) { - {PAGE_OFFSET, ~0UL}, - {0, 0} - } - } - }; - - ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - - if (st.wx_pages || st.uxn_pages) - pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", - st.wx_pages, st.uxn_pages); - else - pr_info("Checked W+X mappings: passed, no W+X pages found\n"); -} - -static int ptdump_init(void) -{ - address_markers[PAGE_END_NR].start_address = PAGE_END; -#ifdef CONFIG_KASAN - address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; -#endif - ptdump_initialize(); - ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); - return 0; -} -device_initcall(ptdump_init); diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c new file mode 100644 index 000000000000..265284dc942d --- /dev/null +++ b/arch/arm64/mm/ptdump.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +enum address_markers_idx { + PAGE_OFFSET_NR = 0, + PAGE_END_NR, +#ifdef CONFIG_KASAN + KASAN_START_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, +#ifdef CONFIG_KASAN + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, + { KASAN_SHADOW_END, "Kasan shadow end" }, +#endif + { BPF_JIT_REGION_START, "BPF start" }, + { BPF_JIT_REGION_END, "BPF end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { VMEMMAP_START, "vmemmap start" }, + { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, +#endif + { -1, NULL }, +}; + +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_CONT, + .val = PTE_CONT, + .set = "CON", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT, + .val = PTE_TABLE_BIT, + .set = " ", + .clear = "BLK", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + .clear = " ", + }, { + .mask = PTE_GP, + .val = PTE_GP, + .set = "GP", + .clear = " ", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + const char *name; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { /* pgd */ + .name = "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .name = "PTE", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + pt_dump_seq_printf(st->seq, " %s", s); + } +} + +static void note_prot_uxn(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if ((st->current_prot & PTE_UXN) == PTE_UXN) + return; + + WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY) + return; + if ((st->current_prot & PTE_PXN) == PTE_PXN) + return; + + WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + u64 val) +{ + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + static const char units[] = "KMGTPE"; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; + + if (st->level == -1) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + note_prot_uxn(st, addr); + note_prot_wx(st, addr); + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); + + if (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) +{ + unsigned long end = ~0UL; + struct pg_state st; + + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; + + st = (struct pg_state){ + .seq = s, + .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); +} + +static void ptdump_initialize(void) +{ + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; +} + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = PAGE_OFFSET, +}; + +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, + .level = -1, + .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + + if (st.wx_pages || st.uxn_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", + st.wx_pages, st.uxn_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int ptdump_init(void) +{ + address_markers[PAGE_END_NR].start_address = PAGE_END; +#ifdef CONFIG_KASAN + address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; +#endif + ptdump_initialize(); + ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); + return 0; +} +device_initcall(ptdump_init); -- cgit v1.2.3 From 3102bc0e6ac752cc5df896acb557d779af4d82a1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Sat, 29 Aug 2020 14:00:16 +0100 Subject: arm64: topology: Stop using MPIDR for topology information In the absence of ACPI or DT topology data, we fallback to haphazardly decoding *something* out of MPIDR. Sadly, the contents of that register are mostly unusable due to the implementation leniancy and things like Aff0 having to be capped to 15 (despite being encoded on 8 bits). Consider a simple system with a single package of 32 cores, all under the same LLC. We ought to be shoving them in the same core_sibling mask, but MPIDR is going to look like: | CPU | 0 | ... | 15 | 16 | ... | 31 | |------+---+-----+----+----+-----+----+ | Aff0 | 0 | ... | 15 | 0 | ... | 15 | | Aff1 | 0 | ... | 0 | 1 | ... | 1 | | Aff2 | 0 | ... | 0 | 0 | ... | 0 | Which will eventually yield core_sibling(0-15) == 0-15 core_sibling(16-31) == 16-31 NUMA woes ========= If we try to play games with this and set up NUMA boundaries within those groups of 16 cores via e.g. QEMU: # Node0: 0-9; Node1: 10-19 $ qemu-system-aarch64 \ -smp 20 -numa node,cpus=0-9,nodeid=0 -numa node,cpus=10-19,nodeid=1 The scheduler's MC domain (all CPUs with same LLC) is going to be built via arch_topology.c::cpu_coregroup_mask() In there we try to figure out a sensible mask out of the topology information we have. In short, here we'll pick the smallest of NUMA or core sibling mask. node_mask(CPU9) == 0-9 core_sibling(CPU9) == 0-15 MC mask for CPU9 will thus be 0-9, not a problem. node_mask(CPU10) == 10-19 core_sibling(CPU10) == 0-15 MC mask for CPU10 will thus be 10-19, not a problem. node_mask(CPU16) == 10-19 core_sibling(CPU16) == 16-19 MC mask for CPU16 will thus be 16-19... Uh oh. CPUs 16-19 are in two different unique MC spans, and the scheduler has no idea what to make of that. That triggers the WARN_ON() added by commit ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") Fixing MPIDR-derived topology ============================= We could try to come up with some cleverer scheme to figure out which of the available masks to pick, but really if one of those masks resulted from MPIDR then it should be discarded because it's bound to be bogus. I was hoping to give MPIDR a chance for SMT, to figure out which threads are in the same core using Aff1-3 as core ID, but Sudeep and Robin pointed out to me that there are systems out there where *all* cores have non-zero values in their higher affinity fields (e.g. RK3288 has "5" in all of its cores' MPIDR.Aff1), which would expose a bogus core ID to userspace. Stop using MPIDR for topology information. When no other source of topology information is available, mark each CPU as its own core and its NUMA node as its LLC domain. Signed-off-by: Valentin Schneider Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200829130016.26106-1-valentin.schneider@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0801a0f3c156..ff1dd1dbfe64 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -36,21 +36,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, -- cgit v1.2.3 From b65399f6111b03df870d8daa75b8d140c0de93f4 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:02 +0530 Subject: arm64/mm: Change THP helpers to comply with generic MM semantics pmd_present() and pmd_trans_huge() are expected to behave in the following manner during various phases of a given PMD. It is derived from a previous detailed discussion on this topic [1] and present THP documentation [2]. pmd_present(pmd): - Returns true if pmd refers to system RAM with a valid pmd_page(pmd) - Returns false if pmd refers to a migration or swap entry pmd_trans_huge(pmd): - Returns true if pmd refers to system RAM and is a trans huge mapping ------------------------------------------------------------------------- | PMD states | pmd_present | pmd_trans_huge | ------------------------------------------------------------------------- | Mapped | Yes | Yes | ------------------------------------------------------------------------- | Splitting | Yes | Yes | ------------------------------------------------------------------------- | Migration/Swap | No | No | ------------------------------------------------------------------------- The problem: PMD is first invalidated with pmdp_invalidate() before it's splitting. This invalidation clears PMD_SECT_VALID as below. PMD Split -> pmdp_invalidate() -> pmd_mkinvalid -> Clears PMD_SECT_VALID Once PMD_SECT_VALID gets cleared, it results in pmd_present() return false on the PMD entry. It will need another bit apart from PMD_SECT_VALID to re- affirm pmd_present() as true during the THP split process. To comply with above mentioned semantics, pmd_trans_huge() should also check pmd_present() first before testing presence of an actual transparent huge mapping. The solution: Ideally PMD_TYPE_SECT should have been used here instead. But it shares the bit position with PMD_SECT_VALID which is used for THP invalidation. Hence it will not be there for pmd_present() check after pmdp_invalidate(). A new software defined PMD_PRESENT_INVALID (bit 59) can be set on the PMD entry during invalidation which can help pmd_present() return true and in recognizing the fact that it still points to memory. This bit is transient. During the split process it will be overridden by a page table page representing normal pages in place of erstwhile huge page. Other pmdp_invalidate() callers always write a fresh PMD value on the entry overriding this transient PMD_PRESENT_INVALID bit, which makes it safe. [1]: https://lkml.org/lkml/2018/10/17/231 [2]: https://www.kernel.org/doc/Documentation/vm/transhuge.txt Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 7 +++++++ arch/arm64/include/asm/pgtable.h | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..2df4b75fce3c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -19,6 +19,13 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +/* + * This bit indicates that the entry is present i.e. pmd_page() + * still points to a valid huge page in memory even if the pmd + * has been invalidated. + */ +#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..d8258ae8fce0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -145,6 +145,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + static inline pte_t pte_wrprotect(pte_t pte) { pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -363,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -381,7 +402,14 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -- cgit v1.2.3 From 53fa117bb33c9ebc4c0746b3830e273f5e9b97b7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:03 +0530 Subject: arm64/mm: Enable THP migration In certain page migration situations, a THP page can be migrated without being split into it's constituent subpages. This saves time required to split a THP and put it back together when required. But it also saves an wider address range translation covered by a single TLB entry, reducing future page fault costs. A previous patch changed platform THP helpers per generic memory semantics, clearing the path for THP migration support. This adds two more THP helpers required to create PMD migration swap entries. Now enable THP migration via ARCH_ENABLE_THP_MIGRATION. Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ arch/arm64/include/asm/pgtable.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..e21b94061780 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1876,6 +1876,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on HUGETLB_PAGE && MIGRATION +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends