aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2026-06-14 12:17:33 +0100
committerWill Deacon <will@kernel.org>2026-06-14 12:17:33 +0100
commitdf3daf49a8a05751b34ec40c46ac6d9c47e9ec22 (patch)
treef0161987e703ba6408a554397b40e184b5ea9419
parent917578e25d7585e8f55a48643ad3c2711290cdbc (diff)
parentf102131c842d1e1d95bc7b818ab93944195da7e9 (diff)
Merge branch 'for-next/mm' into for-next/core
* for-next/mm: (24 commits) Revert "arm64: mm: Unmap kernel data/bss entirely from the linear map" Revert "arm64: mm: Defer remap of linear alias of data/bss" arm64/mm: Rename ptdesc_t arm64: mm: Defer remap of linear alias of data/bss KVM: arm64: Omit tag sync on stage-2 mappings of the zero page arm64: Avoid double evaluation of __ptep_get() kasan: Move generic KASAN page tables out of BSS too arm64: Rename page table BSS section to .bss..pgtbl arm64: mm: Unmap kernel data/bss entirely from the linear map arm64: mm: Map the kernel data/bss read-only in the linear map mm: Make empty_zero_page[] const sh: Drop cache flush of the zero page at boot powerpc/code-patching: Avoid r/w mapping of the zero page arm64: mm: Don't abuse memblock NOMAP to check for overlaps arm64: Move fixmap and kasan page tables to end of kernel image arm64: mm: Permit contiguous attribute for preliminary mappings arm64: kfence: Avoid NOMAP tricks when mapping the early pool arm64: mm: Permit contiguous descriptors to be manipulated arm64: mm: Preserve non-contiguous descriptors when mapping DRAM arm64: mm: Preserve existing table mappings when mapping DRAM ...
-rw-r--r--arch/arm64/include/asm/io.h2
-rw-r--r--arch/arm64/include/asm/linkage.h2
-rw-r--r--arch/arm64/include/asm/pgtable-types.h14
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/include/asm/ptdump.h8
-rw-r--r--arch/arm64/include/asm/tlbflush.h4
-rw-r--r--arch/arm64/kernel/efi.c4
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c2
-rw-r--r--arch/arm64/kernel/pi/map_range.c4
-rw-r--r--arch/arm64/kernel/pi/pi.h2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S8
-rw-r--r--arch/arm64/kvm/mmu.c5
-rw-r--r--arch/arm64/mm/fixmap.c6
-rw-r--r--arch/arm64/mm/kasan_init.c2
-rw-r--r--arch/arm64/mm/mmap.c4
-rw-r--r--arch/arm64/mm/mmu.c145
-rw-r--r--arch/arm64/mm/pageattr.c2
-rw-r--r--arch/arm64/mm/ptdump.c2
-rw-r--r--arch/powerpc/lib/code-patching.c52
-rw-r--r--arch/sh/mm/init.c3
-rw-r--r--include/linux/linkage.h4
-rw-r--r--include/linux/pgtable.h2
-rw-r--r--mm/kasan/init.c10
-rw-r--r--mm/mm_init.c2
24 files changed, 132 insertions, 161 deletions
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 8cbd1e96fd50..21c8e400107c 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -270,7 +270,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size,
pgprot_t user_prot)
{
pgprot_t prot;
- ptdesc_t user_prot_val = pgprot_val(user_prot);
+ ptval_t user_prot_val = pgprot_val(user_prot);
if (WARN_ON_ONCE(!(user_prot_val & PTE_USER)))
return NULL;
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 73eabc82a6bb..d1f7a16729d2 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -45,4 +45,6 @@
#define _THIS_IP_ ({ unsigned long __ip; asm volatile("adr %0, ." : "=r" (__ip)); __ip; })
+#define __bss_pgtbl __section(".bss..pgtbl") __aligned(PAGE_SIZE)
+
#endif
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 265e8301d7ba..2f2f5527930f 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -17,13 +17,13 @@
* Generic page table descriptor format from which
* all level specific descriptors can be derived.
*/
-typedef u64 ptdesc_t;
+typedef u64 ptval_t;
-typedef ptdesc_t pteval_t;
-typedef ptdesc_t pmdval_t;
-typedef ptdesc_t pudval_t;
-typedef ptdesc_t p4dval_t;
-typedef ptdesc_t pgdval_t;
+typedef ptval_t pteval_t;
+typedef ptval_t pmdval_t;
+typedef ptval_t pudval_t;
+typedef ptval_t p4dval_t;
+typedef ptval_t pgdval_t;
/*
* These are used to make use of C type-checking..
@@ -54,7 +54,7 @@ typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )
-typedef struct { ptdesc_t pgprot; } pgprot_t;
+typedef struct { ptval_t pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4dfa42b7d053..c9e4e00a9af2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1007,7 +1007,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
{
- BUG_ON(!pgtable_l4_enabled());
+ VM_WARN_ON_ONCE(!pgtable_l4_enabled());
return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
}
@@ -1130,7 +1130,7 @@ static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
{
- BUG_ON(!pgtable_l5_enabled());
+ VM_WARN_ON_ONCE(!pgtable_l5_enabled());
return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
}
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index baff24004459..5b374a6ab34a 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -26,8 +26,8 @@ struct ptdump_info {
};
struct ptdump_prot_bits {
- ptdesc_t mask;
- ptdesc_t val;
+ ptval_t mask;
+ ptval_t val;
const char *set;
const char *clear;
};
@@ -36,7 +36,7 @@ struct ptdump_pg_level {
const struct ptdump_prot_bits *bits;
char name[4];
int num;
- ptdesc_t mask;
+ ptval_t mask;
};
/*
@@ -53,7 +53,7 @@ struct ptdump_pg_state {
const struct mm_struct *mm;
unsigned long start_address;
int level;
- ptdesc_t current_prot;
+ ptval_t current_prot;
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c0bf5b398041..d52ac8c17190 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -725,9 +725,9 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
sme_dvmsync_add_pending(batch, mm);
}
-static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
+static inline bool __pte_flags_need_flush(ptval_t oldval, ptval_t newval)
{
- ptdesc_t diff = oldval ^ newval;
+ ptval_t diff = oldval ^ newval;
/* invalid to valid transition requires no flush */
if (!(oldval & PTE_VALID))
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index a81cb4aa4738..30cd7f804398 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -31,7 +31,7 @@ static bool region_is_misaligned(const efi_memory_desc_t *md)
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
-static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
+static __init ptval_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
@@ -85,7 +85,7 @@ static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- ptdesc_t prot_val = create_mapping_protection(md);
+ ptval_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index a852264958c3..fb44cbdd2f29 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -165,7 +165,7 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttb
static void __init remap_idmap_for_lpa2(void)
{
/* clear the bits that change meaning once LPA2 is turned on */
- ptdesc_t mask = PTE_SHARED;
+ ptval_t mask = PTE_SHARED;
/*
* We have to clear bits [9:8] in all block or page descriptors in the
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index de52cd85c691..761b14893f74 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -31,7 +31,7 @@ void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
- ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ ptval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
u64 lmask = (PAGE_SIZE << lshift) - 1;
@@ -88,7 +88,7 @@ void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
}
}
-asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
+asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptval_t clrmask)
{
phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
pgprot_t text_prot = PAGE_KERNEL_ROX;
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index aec3172d4003..5dfd8484d200 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -35,4 +35,4 @@ void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
-asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
+asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptval_t clrmask);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e1ac876200a3..d3ed59abab38 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -349,9 +349,15 @@ SECTIONS
_edata = .;
/* start of zero-init region */
- BSS_SECTION(SBSS_ALIGN, 0, 0)
+ BSS_SECTION(SBSS_ALIGN, 0, PAGE_SIZE)
__pi___bss_start = __bss_start;
+ /* page table BSS starts here - preceding data/BSS is omitted from the linear map */
+ .pgtbl : ALIGN(PAGE_SIZE) {
+ *(.bss..pgtbl)
+ }
+ ASSERT(ADDR(.pgtbl) == __bss_stop, ".pgtbl must follow BSS")
+
. = ALIGN(PAGE_SIZE);
__pi_init_pg_dir = .;
. += INIT_DIR_SIZE;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d089c107d9b7..445d6cf035c9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1479,6 +1479,11 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
if (!kvm_has_mte(kvm))
return;
+ if (is_zero_pfn(pfn)) {
+ WARN_ON_ONCE(nr_pages != 1);
+ return;
+ }
+
if (folio_test_hugetlb(folio)) {
/* Hugetlb has MTE flags set on head page only */
if (folio_try_hugetlb_mte_tagging(folio)) {
diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c
index c5c5425791da..f66a0016dd02 100644
--- a/arch/arm64/mm/fixmap.c
+++ b/arch/arm64/mm/fixmap.c
@@ -31,9 +31,9 @@ static_assert(NR_BM_PMD_TABLES == 1);
#define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT)
-static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss;
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __bss_pgtbl;
+static pmd_t bm_pmd[PTRS_PER_PMD] __bss_pgtbl __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __bss_pgtbl __maybe_unused;
static inline pte_t *fixmap_pte(unsigned long addr)
{
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index abeb81bf6ebd..3fcad956fdf7 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -214,7 +214,7 @@ asmlinkage void __init kasan_early_init(void)
* shadow pud_t[]/p4d_t[], which could end up getting corrupted
* when the linear region is mapped.
*/
- static pte_t tbl[PTRS_PER_PTE] __page_aligned_bss;
+ static pte_t tbl[PTRS_PER_PTE] __bss_pgtbl;
pgd_t *pgdp = pgd_offset_k(KASAN_SHADOW_START);
set_pgd(pgdp, __pgd(__pa_symbol(tbl) | PGD_TYPE_TABLE));
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 92b2f5097a96..32e0771d6477 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -34,7 +34,7 @@ static pgprot_t protection_map[16] __ro_after_init = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
};
-static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
+static ptval_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO;
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
@@ -87,7 +87,7 @@ arch_initcall(adjust_protection_map);
pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
{
- ptdesc_t prot;
+ ptval_t prot;
/* Short circuit GCS to avoid bloating the table. */
if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index dd85e093ffdb..354bedf0e638 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -134,10 +134,6 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new)
if (pte_pfn(__pte(old)) != pte_pfn(__pte(new)))
return false;
- /* live contiguous mappings may not be manipulated at all */
- if ((old | new) & PTE_CONT)
- return false;
-
/* Transitioning from Non-Global to Global is unsafe */
if (old & ~new & PTE_NG)
return false;
@@ -187,6 +183,17 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
} while (ptep++, addr += PAGE_SIZE, addr != end);
}
+static bool pte_range_has_valid_noncont(pte_t *ptep)
+{
+ for (int i = 0; i < CONT_PTES; i++) {
+ pte_t pte = __ptep_get(&ptep[i]);
+
+ if (pte_valid(pte) && !pte_cont(pte))
+ return true;
+ }
+ return false;
+}
+
static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
@@ -224,7 +231,8 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
/* use a contiguous mapping if the range is suitably aligned */
if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
- (flags & NO_CONT_MAPPINGS) == 0)
+ (flags & NO_CONT_MAPPINGS) == 0 &&
+ !pte_range_has_valid_noncont(ptep))
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
init_pte(ptep, addr, next, phys, __prot);
@@ -256,8 +264,9 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
/* try section mapping first */
if (((addr | next | phys) & ~PMD_MASK) == 0 &&
- (flags & NO_BLOCK_MAPPINGS) == 0) {
- pmd_set_huge(pmdp, phys, prot);
+ (flags & NO_BLOCK_MAPPINGS) == 0 &&
+ !pmd_table(old_pmd)) {
+ WARN_ON(!pmd_set_huge(pmdp, phys, prot));
/*
* After the PMD entry has been populated once, we
@@ -273,8 +282,8 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (ret)
return ret;
- BUG_ON(pmd_val(old_pmd) != 0 &&
- pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
+ VM_WARN_ON_ONCE(pmd_val(old_pmd) != 0 &&
+ pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
}
phys += next - addr;
} while (pmdp++, addr = next, addr != end);
@@ -282,6 +291,17 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
return 0;
}
+static bool pmd_range_has_valid_noncont(pmd_t *pmdp)
+{
+ for (int i = 0; i < CONT_PMDS; i++) {
+ pte_t pte = pmd_pte(READ_ONCE(pmdp[i]));
+
+ if (pte_valid(pte) && !pte_cont(pte))
+ return true;
+ }
+ return false;
+}
+
static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
@@ -323,7 +343,8 @@ static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
/* use a contiguous mapping if the range is suitably aligned */
if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
- (flags & NO_CONT_MAPPINGS) == 0)
+ (flags & NO_CONT_MAPPINGS) == 0 &&
+ !pmd_range_has_valid_noncont(pmdp))
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
@@ -379,8 +400,9 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
*/
if (pud_sect_supported() &&
((addr | next | phys) & ~PUD_MASK) == 0 &&
- (flags & NO_BLOCK_MAPPINGS) == 0) {
- pud_set_huge(pudp, phys, prot);
+ (flags & NO_BLOCK_MAPPINGS) == 0 &&
+ !pud_table(old_pud)) {
+ WARN_ON(!pud_set_huge(pudp, phys, prot));
/*
* After the PUD entry has been populated once, we
@@ -394,8 +416,8 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
if (ret)
goto out;
- BUG_ON(pud_val(old_pud) != 0 &&
- pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
+ VM_WARN_ON_ONCE(pud_val(old_pud) != 0 &&
+ pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
}
phys += next - addr;
} while (pudp++, addr = next, addr != end);
@@ -445,8 +467,8 @@ static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
if (ret)
goto out;
- BUG_ON(p4d_val(old_p4d) != 0 &&
- p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp)));
+ VM_WARN_ON_ONCE(p4d_val(old_p4d) != 0 &&
+ p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp)));
phys += next - addr;
} while (p4dp++, addr = next, addr != end);
@@ -1000,8 +1022,7 @@ void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
- NO_CONT_MAPPINGS);
+ early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, 0);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -1028,18 +1049,17 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
return;
}
- early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
- NO_CONT_MAPPINGS);
+ early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, 0);
/* flush the TLBs after updating live kernel mappings */
flush_tlb_kernel_range(virt, virt + size);
}
-static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
- phys_addr_t end, pgprot_t prot, int flags)
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end,
+ pgprot_t prot, int flags)
{
- early_create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
- prot, early_pgtable_alloc, flags);
+ early_create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
+ end - start, prot, early_pgtable_alloc, flags);
}
void __init mark_linear_text_alias_ro(void)
@@ -1067,36 +1087,24 @@ static int __init parse_kfence_early_init(char *arg)
}
early_param("kfence.sample_interval", parse_kfence_early_init);
-static phys_addr_t __init arm64_kfence_alloc_pool(void)
+static void __init arm64_kfence_map_pool(void)
{
phys_addr_t kfence_pool;
if (!kfence_early_init)
- return 0;
+ return;
kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
if (!kfence_pool) {
pr_err("failed to allocate kfence pool\n");
kfence_early_init = false;
- return 0;
- }
-
- /* Temporarily mark as NOMAP. */
- memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
-
- return kfence_pool;
-}
-
-static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
-{
- if (!kfence_pool)
return;
+ }
/* KFENCE pool needs page-level mapping. */
- __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
+ __map_memblock(kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
pgprot_tagged(PAGE_KERNEL),
- NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
- memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS | NO_EXEC_MAPPINGS);
__kfence_pool = phys_to_virt(kfence_pool);
}
@@ -1128,18 +1136,18 @@ bool arch_kfence_init_pool(void)
}
#else /* CONFIG_KFENCE */
-static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
-static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { }
+static inline void arm64_kfence_map_pool(void) { }
#endif /* CONFIG_KFENCE */
-static void __init map_mem(pgd_t *pgdp)
+static void __init map_mem(void)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
phys_addr_t kernel_start = __pa_symbol(_text);
- phys_addr_t kernel_end = __pa_symbol(__init_begin);
+ phys_addr_t init_begin = __pa_symbol(__init_begin);
+ phys_addr_t init_end = __pa_symbol(__init_end);
+ phys_addr_t kernel_end = __pa_symbol(__bss_stop);
phys_addr_t start, end;
- phys_addr_t early_kfence_pool;
int flags = NO_EXEC_MAPPINGS;
u64 i;
@@ -1156,7 +1164,7 @@ static void __init map_mem(pgd_t *pgdp)
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
- early_kfence_pool = arm64_kfence_alloc_pool();
+ arm64_kfence_map_pool();
linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map();
@@ -1164,40 +1172,37 @@ static void __init map_mem(pgd_t *pgdp)
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
- * Take care not to create a writable alias for the
- * read-only text and rodata sections of the kernel image.
- * So temporarily mark them as NOMAP to skip mappings in
- * the following for-loop
+ * Map the linear alias of the [_text, __init_begin) interval first
+ * so that its write permissions can be removed later without the need
+ * to split any block mappings created by the loop below.
+ *
+ * Write permissions are needed for alternatives patching, and will be
+ * removed later by mark_linear_text_alias_ro() above. This makes the
+ * contents of the region accessible to subsystems such as hibernate,
+ * but protects it from inadvertent modification or execution.
*/
- memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+ __map_memblock(kernel_start, init_begin, pgprot_tagged(PAGE_KERNEL),
+ flags);
+
+ /* Map the kernel data/bss so it can be remapped later */
+ __map_memblock(init_end, kernel_end, pgprot_tagged(PAGE_KERNEL),
+ flags);
/* map all the memory banks */
for_each_mem_range(i, &start, &end) {
- if (start >= end)
- break;
/*
* The linear map must allow allocation tags reading/writing
* if MTE is present. Otherwise, it has the same attributes as
* PAGE_KERNEL.
*/
- __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL),
+ __map_memblock(start, end, pgprot_tagged(PAGE_KERNEL),
flags);
}
- /*
- * Map the linear alias of the [_text, __init_begin) interval
- * as non-executable now, and remove the write permission in
- * mark_linear_text_alias_ro() below (which will be called after
- * alternative patching has completed). This makes the contents
- * of the region accessible to subsystems such as hibernate,
- * but protects it from inadvertent modification or execution.
- * Note that contiguous mappings cannot be remapped in this way,
- * so we should avoid them here.
- */
- __map_memblock(pgdp, kernel_start, kernel_end,
- PAGE_KERNEL, NO_CONT_MAPPINGS);
- memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
- arm64_kfence_map_pool(early_kfence_pool, pgdp);
+ /* Map the kernel data/bss read-only in the linear map */
+ __map_memblock(init_end, kernel_end, PAGE_KERNEL_RO, flags);
+ flush_tlb_kernel_range((unsigned long)lm_alias(__init_end),
+ (unsigned long)lm_alias(__bss_stop));
}
void mark_rodata_ro(void)
@@ -1419,7 +1424,7 @@ static void __init create_idmap(void)
void __init paging_init(void)
{
- map_mem(swapper_pg_dir);
+ map_mem();
memblock_allow_resize();
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index ce035e1b4eaf..bbe98ac9ad8c 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -21,7 +21,7 @@ struct page_change_data {
pgprot_t clear_mask;
};
-static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk)
+static ptval_t set_pageattr_masks(ptval_t val, struct mm_walk *walk)
{
struct page_change_data *masks = walk->private;
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index ab9899ca1e5f..1c20144700d7 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -194,7 +194,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
- ptdesc_t prot = 0;
+ ptval_t prot = 0;
/* check if the current level has been folded dynamically */
if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) ||
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index f84e0337cc02..44ff9f684bef 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -60,9 +60,6 @@ struct patch_context {
static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
-static int map_patch_area(void *addr, unsigned long text_poke_addr);
-static void unmap_patch_area(unsigned long addr);
-
static bool mm_patch_enabled(void)
{
return IS_ENABLED(CONFIG_SMP) && radix_enabled();
@@ -117,11 +114,11 @@ static int text_area_cpu_up(unsigned int cpu)
// Map/unmap the area to ensure all page tables are pre-allocated
addr = (unsigned long)area->addr;
- err = map_patch_area(empty_zero_page, addr);
+ err = map_kernel_page(addr, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
if (err)
return err;
- unmap_patch_area(addr);
+ unmap_kernel_page(addr);
this_cpu_write(cpu_patching_context.area, area);
this_cpu_write(cpu_patching_context.addr, addr);
@@ -233,51 +230,6 @@ static unsigned long get_patch_pfn(void *addr)
return __pa_symbol(addr) >> PAGE_SHIFT;
}
-/*
- * This can be called for kernel text or a module.
- */
-static int map_patch_area(void *addr, unsigned long text_poke_addr)
-{
- unsigned long pfn = get_patch_pfn(addr);
-
- return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
-}
-
-static void unmap_patch_area(unsigned long addr)
-{
- pte_t *ptep;
- pmd_t *pmdp;
- pud_t *pudp;
- p4d_t *p4dp;
- pgd_t *pgdp;
-
- pgdp = pgd_offset_k(addr);
- if (WARN_ON(pgd_none(*pgdp)))
- return;
-
- p4dp = p4d_offset(pgdp, addr);
- if (WARN_ON(p4d_none(*p4dp)))
- return;
-
- pudp = pud_offset(p4dp, addr);
- if (WARN_ON(pud_none(*pudp)))
- return;
-
- pmdp = pmd_offset(pudp, addr);
- if (WARN_ON(pmd_none(*pmdp)))
- return;
-
- ptep = pte_offset_kernel(pmdp, addr);
- if (WARN_ON(pte_none(*ptep)))
- return;
-
- /*
- * In hash, pte_clear flushes the tlb, in radix, we have to
- */
- pte_clear(&init_mm, addr, ptep);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-}
-
static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword)
{
int err;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 4e40d5e96be9..110308bdef01 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -331,9 +331,6 @@ void __init mem_init(void)
/* Set this up early, so we can take care of the zero page */
cpu_cache_init();
- /* clear the zero-page */
- __flush_wback_region(empty_zero_page, PAGE_SIZE);
-
vsyscall_init();
pr_info("virtual kernel memory layout:\n"
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index b11660b706c5..53fe1f48fd28 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -39,6 +39,10 @@
#define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE)
#define __page_aligned_bss __section(".bss..page_aligned") __aligned(PAGE_SIZE)
+#ifndef __bss_pgtbl
+#define __bss_pgtbl __page_aligned_bss
+#endif
+
/*
* For assembly routines.
*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index cdd68ed3ae1a..67aa23814010 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1993,7 +1993,7 @@ static inline unsigned long zero_pfn(unsigned long addr)
return zero_page_pfn;
}
-extern uint8_t empty_zero_page[PAGE_SIZE];
+extern const uint8_t empty_zero_page[PAGE_SIZE];
extern struct page *__zero_page;
static inline struct page *_zero_page(unsigned long addr)
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index 9c880f607c6a..66a883887987 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -26,10 +26,10 @@
* - Latter it reused it as zero shadow to cover large ranges of memory
* that allowed to access, but not handled by kasan (vmalloc/vmemmap ...).
*/
-unsigned char kasan_early_shadow_page[PAGE_SIZE] __page_aligned_bss;
+unsigned char kasan_early_shadow_page[PAGE_SIZE] __bss_pgtbl;
#if CONFIG_PGTABLE_LEVELS > 4
-p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
+p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D] __bss_pgtbl;
static inline bool kasan_p4d_table(pgd_t pgd)
{
return pgd_page(pgd) == virt_to_page(lm_alias(kasan_early_shadow_p4d));
@@ -41,7 +41,7 @@ static inline bool kasan_p4d_table(pgd_t pgd)
}
#endif
#if CONFIG_PGTABLE_LEVELS > 3
-pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD] __page_aligned_bss;
+pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD] __bss_pgtbl;
static inline bool kasan_pud_table(p4d_t p4d)
{
return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud));
@@ -53,7 +53,7 @@ static inline bool kasan_pud_table(p4d_t p4d)
}