diff options
| author | Will Deacon <will@kernel.org> | 2026-06-14 12:17:33 +0100 |
|---|---|---|
| committer | Will Deacon <will@kernel.org> | 2026-06-14 12:17:33 +0100 |
| commit | df3daf49a8a05751b34ec40c46ac6d9c47e9ec22 (patch) | |
| tree | f0161987e703ba6408a554397b40e184b5ea9419 | |
| parent | 917578e25d7585e8f55a48643ad3c2711290cdbc (diff) | |
| parent | f102131c842d1e1d95bc7b818ab93944195da7e9 (diff) | |
Merge branch 'for-next/mm' into for-next/core
* for-next/mm: (24 commits)
Revert "arm64: mm: Unmap kernel data/bss entirely from the linear map"
Revert "arm64: mm: Defer remap of linear alias of data/bss"
arm64/mm: Rename ptdesc_t
arm64: mm: Defer remap of linear alias of data/bss
KVM: arm64: Omit tag sync on stage-2 mappings of the zero page
arm64: Avoid double evaluation of __ptep_get()
kasan: Move generic KASAN page tables out of BSS too
arm64: Rename page table BSS section to .bss..pgtbl
arm64: mm: Unmap kernel data/bss entirely from the linear map
arm64: mm: Map the kernel data/bss read-only in the linear map
mm: Make empty_zero_page[] const
sh: Drop cache flush of the zero page at boot
powerpc/code-patching: Avoid r/w mapping of the zero page
arm64: mm: Don't abuse memblock NOMAP to check for overlaps
arm64: Move fixmap and kasan page tables to end of kernel image
arm64: mm: Permit contiguous attribute for preliminary mappings
arm64: kfence: Avoid NOMAP tricks when mapping the early pool
arm64: mm: Permit contiguous descriptors to be manipulated
arm64: mm: Preserve non-contiguous descriptors when mapping DRAM
arm64: mm: Preserve existing table mappings when mapping DRAM
...
| -rw-r--r-- | arch/arm64/include/asm/io.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/linkage.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/pgtable-types.h | 14 | ||||
| -rw-r--r-- | arch/arm64/include/asm/pgtable.h | 4 | ||||
| -rw-r--r-- | arch/arm64/include/asm/ptdump.h | 8 | ||||
| -rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 4 | ||||
| -rw-r--r-- | arch/arm64/kernel/efi.c | 4 | ||||
| -rw-r--r-- | arch/arm64/kernel/pi/map_kernel.c | 2 | ||||
| -rw-r--r-- | arch/arm64/kernel/pi/map_range.c | 4 | ||||
| -rw-r--r-- | arch/arm64/kernel/pi/pi.h | 2 | ||||
| -rw-r--r-- | arch/arm64/kernel/vmlinux.lds.S | 8 | ||||
| -rw-r--r-- | arch/arm64/kvm/mmu.c | 5 | ||||
| -rw-r--r-- | arch/arm64/mm/fixmap.c | 6 | ||||
| -rw-r--r-- | arch/arm64/mm/kasan_init.c | 2 | ||||
| -rw-r--r-- | arch/arm64/mm/mmap.c | 4 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 145 | ||||
| -rw-r--r-- | arch/arm64/mm/pageattr.c | 2 | ||||
| -rw-r--r-- | arch/arm64/mm/ptdump.c | 2 | ||||
| -rw-r--r-- | arch/powerpc/lib/code-patching.c | 52 | ||||
| -rw-r--r-- | arch/sh/mm/init.c | 3 | ||||
| -rw-r--r-- | include/linux/linkage.h | 4 | ||||
| -rw-r--r-- | include/linux/pgtable.h | 2 | ||||
| -rw-r--r-- | mm/kasan/init.c | 10 | ||||
| -rw-r--r-- | mm/mm_init.c | 2 |
24 files changed, 132 insertions, 161 deletions
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 8cbd1e96fd50..21c8e400107c 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -270,7 +270,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t phys, size_t size, pgprot_t user_prot) { pgprot_t prot; - ptdesc_t user_prot_val = pgprot_val(user_prot); + ptval_t user_prot_val = pgprot_val(user_prot); if (WARN_ON_ONCE(!(user_prot_val & PTE_USER))) return NULL; diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 73eabc82a6bb..d1f7a16729d2 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -45,4 +45,6 @@ #define _THIS_IP_ ({ unsigned long __ip; asm volatile("adr %0, ." : "=r" (__ip)); __ip; }) +#define __bss_pgtbl __section(".bss..pgtbl") __aligned(PAGE_SIZE) + #endif diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 265e8301d7ba..2f2f5527930f 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -17,13 +17,13 @@ * Generic page table descriptor format from which * all level specific descriptors can be derived. */ -typedef u64 ptdesc_t; +typedef u64 ptval_t; -typedef ptdesc_t pteval_t; -typedef ptdesc_t pmdval_t; -typedef ptdesc_t pudval_t; -typedef ptdesc_t p4dval_t; -typedef ptdesc_t pgdval_t; +typedef ptval_t pteval_t; +typedef ptval_t pmdval_t; +typedef ptval_t pudval_t; +typedef ptval_t p4dval_t; +typedef ptval_t pgdval_t; /* * These are used to make use of C type-checking.. @@ -54,7 +54,7 @@ typedef struct { pgdval_t pgd; } pgd_t; #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) -typedef struct { ptdesc_t pgprot; } pgprot_t; +typedef struct { ptval_t pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4dfa42b7d053..c9e4e00a9af2 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1007,7 +1007,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) { - BUG_ON(!pgtable_l4_enabled()); + VM_WARN_ON_ONCE(!pgtable_l4_enabled()); return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); } @@ -1130,7 +1130,7 @@ static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) { - BUG_ON(!pgtable_l5_enabled()); + VM_WARN_ON_ONCE(!pgtable_l5_enabled()); return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); } diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index baff24004459..5b374a6ab34a 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -26,8 +26,8 @@ struct ptdump_info { }; struct ptdump_prot_bits { - ptdesc_t mask; - ptdesc_t val; + ptval_t mask; + ptval_t val; const char *set; const char *clear; }; @@ -36,7 +36,7 @@ struct ptdump_pg_level { const struct ptdump_prot_bits *bits; char name[4]; int num; - ptdesc_t mask; + ptval_t mask; }; /* @@ -53,7 +53,7 @@ struct ptdump_pg_state { const struct mm_struct *mm; unsigned long start_address; int level; - ptdesc_t current_prot; + ptval_t current_prot; bool check_wx; unsigned long wx_pages; unsigned long uxn_pages; diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c0bf5b398041..d52ac8c17190 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -725,9 +725,9 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b sme_dvmsync_add_pending(batch, mm); } -static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval) +static inline bool __pte_flags_need_flush(ptval_t oldval, ptval_t newval) { - ptdesc_t diff = oldval ^ newval; + ptval_t diff = oldval ^ newval; /* invalid to valid transition requires no flush */ if (!(oldval & PTE_VALID)) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a81cb4aa4738..30cd7f804398 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -31,7 +31,7 @@ static bool region_is_misaligned(const efi_memory_desc_t *md) * executable, everything else can be mapped with the XN bits * set. Also take the new (optional) RO/XP bits into account. */ -static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md) +static __init ptval_t create_mapping_protection(efi_memory_desc_t *md) { u64 attr = md->attribute; u32 type = md->type; @@ -85,7 +85,7 @@ static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md) int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - ptdesc_t prot_val = create_mapping_protection(md); + ptval_t prot_val = create_mapping_protection(md); bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index a852264958c3..fb44cbdd2f29 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -165,7 +165,7 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttb static void __init remap_idmap_for_lpa2(void) { /* clear the bits that change meaning once LPA2 is turned on */ - ptdesc_t mask = PTE_SHARED; + ptval_t mask = PTE_SHARED; /* * We have to clear bits [9:8] in all block or page descriptors in the diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c index de52cd85c691..761b14893f74 100644 --- a/arch/arm64/kernel/pi/map_range.c +++ b/arch/arm64/kernel/pi/map_range.c @@ -31,7 +31,7 @@ void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, u64 va_offset) { u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; - ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; + ptval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; int lshift = (3 - level) * PTDESC_TABLE_SHIFT; u64 lmask = (PAGE_SIZE << lshift) - 1; @@ -88,7 +88,7 @@ void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, } } -asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask) +asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptval_t clrmask) { phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */ pgprot_t text_prot = PAGE_KERNEL_ROX; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index aec3172d4003..5dfd8484d200 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -35,4 +35,4 @@ void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt); -asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask); +asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptval_t clrmask); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e1ac876200a3..d3ed59abab38 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -349,9 +349,15 @@ SECTIONS _edata = .; /* start of zero-init region */ - BSS_SECTION(SBSS_ALIGN, 0, 0) + BSS_SECTION(SBSS_ALIGN, 0, PAGE_SIZE) __pi___bss_start = __bss_start; + /* page table BSS starts here - preceding data/BSS is omitted from the linear map */ + .pgtbl : ALIGN(PAGE_SIZE) { + *(.bss..pgtbl) + } + ASSERT(ADDR(.pgtbl) == __bss_stop, ".pgtbl must follow BSS") + . = ALIGN(PAGE_SIZE); __pi_init_pg_dir = .; . += INIT_DIR_SIZE; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d089c107d9b7..445d6cf035c9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1479,6 +1479,11 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, if (!kvm_has_mte(kvm)) return; + if (is_zero_pfn(pfn)) { + WARN_ON_ONCE(nr_pages != 1); + return; + } + if (folio_test_hugetlb(folio)) { /* Hugetlb has MTE flags set on head page only */ if (folio_try_hugetlb_mte_tagging(folio)) { diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index c5c5425791da..f66a0016dd02 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -31,9 +31,9 @@ static_assert(NR_BM_PMD_TABLES == 1); #define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT) -static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __bss_pgtbl; +static pmd_t bm_pmd[PTRS_PER_PMD] __bss_pgtbl __maybe_unused; +static pud_t bm_pud[PTRS_PER_PUD] __bss_pgtbl __maybe_unused; static inline pte_t *fixmap_pte(unsigned long addr) { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index abeb81bf6ebd..3fcad956fdf7 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,7 +214,7 @@ asmlinkage void __init kasan_early_init(void) * shadow pud_t[]/p4d_t[], which could end up getting corrupted * when the linear region is mapped. */ - static pte_t tbl[PTRS_PER_PTE] __page_aligned_bss; + static pte_t tbl[PTRS_PER_PTE] __bss_pgtbl; pgd_t *pgdp = pgd_offset_k(KASAN_SHADOW_START); set_pgd(pgdp, __pgd(__pa_symbol(tbl) | PGD_TYPE_TABLE)); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 92b2f5097a96..32e0771d6477 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -34,7 +34,7 @@ static pgprot_t protection_map[16] __ro_after_init = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC }; -static ptdesc_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO; +static ptval_t gcs_page_prot __ro_after_init = _PAGE_GCS_RO; /* * You really shouldn't be using read() or write() on /dev/mem. This might go @@ -87,7 +87,7 @@ arch_initcall(adjust_protection_map); pgprot_t vm_get_page_prot(vm_flags_t vm_flags) { - ptdesc_t prot; + ptval_t prot; /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index dd85e093ffdb..354bedf0e638 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -134,10 +134,6 @@ bool pgattr_change_is_safe(pteval_t old, pteval_t new) if (pte_pfn(__pte(old)) != pte_pfn(__pte(new))) return false; - /* live contiguous mappings may not be manipulated at all */ - if ((old | new) & PTE_CONT) - return false; - /* Transitioning from Non-Global to Global is unsafe */ if (old & ~new & PTE_NG) return false; @@ -187,6 +183,17 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, } while (ptep++, addr += PAGE_SIZE, addr != end); } +static bool pte_range_has_valid_noncont(pte_t *ptep) +{ + for (int i = 0; i < CONT_PTES; i++) { + pte_t pte = __ptep_get(&ptep[i]); + + if (pte_valid(pte) && !pte_cont(pte)) + return true; + } + return false; +} + static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, @@ -224,7 +231,8 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, /* use a contiguous mapping if the range is suitably aligned */ if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && - (flags & NO_CONT_MAPPINGS) == 0) + (flags & NO_CONT_MAPPINGS) == 0 && + !pte_range_has_valid_noncont(ptep)) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); init_pte(ptep, addr, next, phys, __prot); @@ -256,8 +264,9 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~PMD_MASK) == 0 && - (flags & NO_BLOCK_MAPPINGS) == 0) { - pmd_set_huge(pmdp, phys, prot); + (flags & NO_BLOCK_MAPPINGS) == 0 && + !pmd_table(old_pmd)) { + WARN_ON(!pmd_set_huge(pmdp, phys, prot)); /* * After the PMD entry has been populated once, we @@ -273,8 +282,8 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, if (ret) return ret; - BUG_ON(pmd_val(old_pmd) != 0 && - pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); + VM_WARN_ON_ONCE(pmd_val(old_pmd) != 0 && + pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); } phys += next - addr; } while (pmdp++, addr = next, addr != end); @@ -282,6 +291,17 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, return 0; } +static bool pmd_range_has_valid_noncont(pmd_t *pmdp) +{ + for (int i = 0; i < CONT_PMDS; i++) { + pte_t pte = pmd_pte(READ_ONCE(pmdp[i])); + + if (pte_valid(pte) && !pte_cont(pte)) + return true; + } + return false; +} + static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, @@ -323,7 +343,8 @@ static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, /* use a contiguous mapping if the range is suitably aligned */ if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && - (flags & NO_CONT_MAPPINGS) == 0) + (flags & NO_CONT_MAPPINGS) == 0 && + !pmd_range_has_valid_noncont(pmdp)) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); @@ -379,8 +400,9 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, */ if (pud_sect_supported() && ((addr | next | phys) & ~PUD_MASK) == 0 && - (flags & NO_BLOCK_MAPPINGS) == 0) { - pud_set_huge(pudp, phys, prot); + (flags & NO_BLOCK_MAPPINGS) == 0 && + !pud_table(old_pud)) { + WARN_ON(!pud_set_huge(pudp, phys, prot)); /* * After the PUD entry has been populated once, we @@ -394,8 +416,8 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, if (ret) goto out; - BUG_ON(pud_val(old_pud) != 0 && - pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); + VM_WARN_ON_ONCE(pud_val(old_pud) != 0 && + pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); } phys += next - addr; } while (pudp++, addr = next, addr != end); @@ -445,8 +467,8 @@ static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, if (ret) goto out; - BUG_ON(p4d_val(old_p4d) != 0 && - p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); + VM_WARN_ON_ONCE(p4d_val(old_p4d) != 0 && + p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); phys += next - addr; } while (p4dp++, addr = next, addr != end); @@ -1000,8 +1022,7 @@ void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, 0); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -1028,18 +1049,17 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt, return; } - early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + early_create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, 0); /* flush the TLBs after updating live kernel mappings */ flush_tlb_kernel_range(virt, virt + size); } -static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, - phys_addr_t end, pgprot_t prot, int flags) +static void __init __map_memblock(phys_addr_t start, phys_addr_t end, + pgprot_t prot, int flags) { - early_create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, - prot, early_pgtable_alloc, flags); + early_create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), + end - start, prot, early_pgtable_alloc, flags); } void __init mark_linear_text_alias_ro(void) @@ -1067,36 +1087,24 @@ static int __init parse_kfence_early_init(char *arg) } early_param("kfence.sample_interval", parse_kfence_early_init); -static phys_addr_t __init arm64_kfence_alloc_pool(void) +static void __init arm64_kfence_map_pool(void) { phys_addr_t kfence_pool; if (!kfence_early_init) - return 0; + return; kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); if (!kfence_pool) { pr_err("failed to allocate kfence pool\n"); kfence_early_init = false; - return 0; - } - - /* Temporarily mark as NOMAP. */ - memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); - - return kfence_pool; -} - -static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) -{ - if (!kfence_pool) return; + } /* KFENCE pool needs page-level mapping. */ - __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, + __map_memblock(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, pgprot_tagged(PAGE_KERNEL), - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS | NO_EXEC_MAPPINGS); __kfence_pool = phys_to_virt(kfence_pool); } @@ -1128,18 +1136,18 @@ bool arch_kfence_init_pool(void) } #else /* CONFIG_KFENCE */ -static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } -static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } +static inline void arm64_kfence_map_pool(void) { } #endif /* CONFIG_KFENCE */ -static void __init map_mem(pgd_t *pgdp) +static void __init map_mem(void) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_text); - phys_addr_t kernel_end = __pa_symbol(__init_begin); + phys_addr_t init_begin = __pa_symbol(__init_begin); + phys_addr_t init_end = __pa_symbol(__init_end); + phys_addr_t kernel_end = __pa_symbol(__bss_stop); phys_addr_t start, end; - phys_addr_t early_kfence_pool; int flags = NO_EXEC_MAPPINGS; u64 i; @@ -1156,7 +1164,7 @@ static void __init map_mem(pgd_t *pgdp) BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) && pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1); - early_kfence_pool = arm64_kfence_alloc_pool(); + arm64_kfence_map_pool(); linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map(); @@ -1164,40 +1172,37 @@ static void __init map_mem(pgd_t *pgdp) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* - * Take care not to create a writable alias for the - * read-only text and rodata sections of the kernel image. - * So temporarily mark them as NOMAP to skip mappings in - * the following for-loop + * Map the linear alias of the [_text, __init_begin) interval first + * so that its write permissions can be removed later without the need + * to split any block mappings created by the loop below. + * + * Write permissions are needed for alternatives patching, and will be + * removed later by mark_linear_text_alias_ro() above. This makes the + * contents of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. */ - memblock_mark_nomap(kernel_start, kernel_end - kernel_start); + __map_memblock(kernel_start, init_begin, pgprot_tagged(PAGE_KERNEL), + flags); + + /* Map the kernel data/bss so it can be remapped later */ + __map_memblock(init_end, kernel_end, pgprot_tagged(PAGE_KERNEL), + flags); /* map all the memory banks */ for_each_mem_range(i, &start, &end) { - if (start >= end) - break; /* * The linear map must allow allocation tags reading/writing * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + __map_memblock(start, end, pgprot_tagged(PAGE_KERNEL), flags); } - /* - * Map the linear alias of the [_text, __init_begin) interval - * as non-executable now, and remove the write permission in - * mark_linear_text_alias_ro() below (which will be called after - * alternative patching has completed). This makes the contents - * of the region accessible to subsystems such as hibernate, - * but protects it from inadvertent modification or execution. - * Note that contiguous mappings cannot be remapped in this way, - * so we should avoid them here. - */ - __map_memblock(pgdp, kernel_start, kernel_end, - PAGE_KERNEL, NO_CONT_MAPPINGS); - memblock_clear_nomap(kernel_start, kernel_end - kernel_start); - arm64_kfence_map_pool(early_kfence_pool, pgdp); + /* Map the kernel data/bss read-only in the linear map */ + __map_memblock(init_end, kernel_end, PAGE_KERNEL_RO, flags); + flush_tlb_kernel_range((unsigned long)lm_alias(__init_end), + (unsigned long)lm_alias(__bss_stop)); } void mark_rodata_ro(void) @@ -1419,7 +1424,7 @@ static void __init create_idmap(void) void __init paging_init(void) { - map_mem(swapper_pg_dir); + map_mem(); memblock_allow_resize(); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index ce035e1b4eaf..bbe98ac9ad8c 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -21,7 +21,7 @@ struct page_change_data { pgprot_t clear_mask; }; -static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk) +static ptval_t set_pageattr_masks(ptval_t val, struct mm_walk *walk) { struct page_change_data *masks = walk->private; diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index ab9899ca1e5f..1c20144700d7 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -194,7 +194,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump); struct ptdump_pg_level *pg_level = st->pg_level; static const char units[] = "KMGTPE"; - ptdesc_t prot = 0; + ptval_t prot = 0; /* check if the current level has been folded dynamically */ if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) || diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index f84e0337cc02..44ff9f684bef 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -60,9 +60,6 @@ struct patch_context { static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); -static int map_patch_area(void *addr, unsigned long text_poke_addr); -static void unmap_patch_area(unsigned long addr); - static bool mm_patch_enabled(void) { return IS_ENABLED(CONFIG_SMP) && radix_enabled(); @@ -117,11 +114,11 @@ static int text_area_cpu_up(unsigned int cpu) // Map/unmap the area to ensure all page tables are pre-allocated addr = (unsigned long)area->addr; - err = map_patch_area(empty_zero_page, addr); + err = map_kernel_page(addr, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); if (err) return err; - unmap_patch_area(addr); + unmap_kernel_page(addr); this_cpu_write(cpu_patching_context.area, area); this_cpu_write(cpu_patching_context.addr, addr); @@ -233,51 +230,6 @@ static unsigned long get_patch_pfn(void *addr) return __pa_symbol(addr) >> PAGE_SHIFT; } -/* - * This can be called for kernel text or a module. - */ -static int map_patch_area(void *addr, unsigned long text_poke_addr) -{ - unsigned long pfn = get_patch_pfn(addr); - - return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); -} - -static void unmap_patch_area(unsigned long addr) -{ - pte_t *ptep; - pmd_t *pmdp; - pud_t *pudp; - p4d_t *p4dp; - pgd_t *pgdp; - - pgdp = pgd_offset_k(addr); - if (WARN_ON(pgd_none(*pgdp))) - return; - - p4dp = p4d_offset(pgdp, addr); - if (WARN_ON(p4d_none(*p4dp))) - return; - - pudp = pud_offset(p4dp, addr); - if (WARN_ON(pud_none(*pudp))) - return; - - pmdp = pmd_offset(pudp, addr); - if (WARN_ON(pmd_none(*pmdp))) - return; - - ptep = pte_offset_kernel(pmdp, addr); - if (WARN_ON(pte_none(*ptep))) - return; - - /* - * In hash, pte_clear flushes the tlb, in radix, we have to - */ - pte_clear(&init_mm, addr, ptep); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); -} - static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword) { int err; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 4e40d5e96be9..110308bdef01 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -331,9 +331,6 @@ void __init mem_init(void) /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); - /* clear the zero-page */ - __flush_wback_region(empty_zero_page, PAGE_SIZE); - vsyscall_init(); pr_info("virtual kernel memory layout:\n" diff --git a/include/linux/linkage.h b/include/linux/linkage.h index b11660b706c5..53fe1f48fd28 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -39,6 +39,10 @@ #define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE) #define __page_aligned_bss __section(".bss..page_aligned") __aligned(PAGE_SIZE) +#ifndef __bss_pgtbl +#define __bss_pgtbl __page_aligned_bss +#endif + /* * For assembly routines. * diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdd68ed3ae1a..67aa23814010 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1993,7 +1993,7 @@ static inline unsigned long zero_pfn(unsigned long addr) return zero_page_pfn; } -extern uint8_t empty_zero_page[PAGE_SIZE]; +extern const uint8_t empty_zero_page[PAGE_SIZE]; extern struct page *__zero_page; static inline struct page *_zero_page(unsigned long addr) diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 9c880f607c6a..66a883887987 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -26,10 +26,10 @@ * - Latter it reused it as zero shadow to cover large ranges of memory * that allowed to access, but not handled by kasan (vmalloc/vmemmap ...). */ -unsigned char kasan_early_shadow_page[PAGE_SIZE] __page_aligned_bss; +unsigned char kasan_early_shadow_page[PAGE_SIZE] __bss_pgtbl; #if CONFIG_PGTABLE_LEVELS > 4 -p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss; +p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D] __bss_pgtbl; static inline bool kasan_p4d_table(pgd_t pgd) { return pgd_page(pgd) == virt_to_page(lm_alias(kasan_early_shadow_p4d)); @@ -41,7 +41,7 @@ static inline bool kasan_p4d_table(pgd_t pgd) } #endif #if CONFIG_PGTABLE_LEVELS > 3 -pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD] __page_aligned_bss; +pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD] __bss_pgtbl; static inline bool kasan_pud_table(p4d_t p4d) { return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud)); @@ -53,7 +53,7 @@ static inline bool kasan_pud_table(p4d_t p4d) } |
