// SPDX-License-Identifier: GPL-2.0
/*
* mm/mprotect.c
*
* (C) Copyright 1994 Linus Torvalds
* (C) Copyright 2002 Christoph Hellwig
*
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/mempolicy.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/perf_event.h>
#include <linux/pkeys.h>
#include <linux/ksm.h>
#include <linux/uaccess.h>
#include <linux/mm_inline.h>
#include <linux/pgtable.h>
#include <linux/userfaultfd_k.h>
#include <uapi/linux/mman.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include "internal.h"
static bool maybe_change_pte_writable(struct vm_area_struct *vma, pte_t pte)
{
if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
return false;
/* Don't touch entries that are not even readable. */
if (pte_protnone(pte))
return false;
/* Do we need write faults for softdirty tracking? */
if (pte_needs_soft_dirty_wp(vma, pte))
return false;
/* Do we need write faults for uffd-wp tracking? */
if (userfaultfd_pte_wp(vma, pte))
return false;
return true;
}
static bool can_change_private_pte_writable(struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
struct page *page;
if (!maybe_change_pte_writable(vma, pte))
return false;
/*
* Writable MAP_PRIVATE mapping: We can only special-case on
* exclusive anonymous pages, because we know that our
* write-fault handler similarly would map them writable without
* any additional checks while holding the PT lock.
*/
page = vm_normal_page(vma, addr, pte);
return page && PageAnon(page) && PageAnonExclusive(page);
}
static bool can_change_shared_pte_writable(struct vm_area_struct *vma,
pte_t pte)
{
if (!maybe_change_pte_writable(vma, pte))
return false;
VM_WARN_ON_ONCE(is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte));
/*
* Writable MAP_SHARED mapping: "clean" might indicate that the FS still
* needs a real write-fault for writenotify
* (see vma_wants_writenotify()). If "dirty", the assumption is that the
* FS was already notified and we can simply mark the PTE writable
* just like the write-fault handler would do.
*/
return pte_dirty(pte);
}
bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
pte_t pte)
{
if (!(vma->vm_flags & VM_SHARED))
return can_change_private_pte_writable(vma, addr, pte);
return can_change_shared_pte_writable(vma, pte);
}
static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep,
pte_t pte, int max_nr_ptes, fpb_t flags)
{
/* No underlying folio, so cannot batch */
if (!folio)
return 1;
if (!folio_test_large(folio))
return 1;
return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags);
}
/* Set nr_ptes number of ptes, starting from idx */
static void prot_commit_flush_ptes(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes,
int idx, bool set_write, struct mmu_gather *tlb)
{
/*
* Advance the position in the batch by idx; note that if idx > 0,
* then the nr_ptes passed here is <= batch size - idx.
*/
addr += idx * PAGE_SIZE;
ptep += idx;
oldpte = pte_advance_pfn(oldpte, idx);
ptent = pte_advance_pfn(ptent, idx);
if (set_write)
ptent = pte_mkwrite(ptent, vma);
modify_prot_commit_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes);
if (pte_needs_flush(oldpte, ptent))
tlb_flush_pte_range(tlb, addr, nr_ptes * PAGE_SIZE);
}
/*
* Get max length of consecutive ptes pointing to PageAnonExclusive() pages or
* !PageAnonExclusive() pages, starting from start_idx. Caller must enforce
* that the ptes point to consecutive pages of the same anon large folio.
*/
static int page_anon_exclusive_sub_batch(int start_idx, int max_len,
struct page *first_page, bool expected_anon_exclusive)
{
int idx;
for (idx = start_idx + 1; idx < start_idx + max_len; ++idx) {
if (expected_anon_exclusive != PageAnonExclusive(first_page + idx))
break;
}
return idx