// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/kvm_types.h>
#include <linux/miscdevice.h>
#include <linux/node.h>
#include <linux/pagemap.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/vmalloc.h>
#include <asm/msr.h>
#include <asm/sgx.h>
#include <asm/archrandom.h>
#include "driver.h"
#include "encl.h"
#include "encls.h"
struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
static int sgx_nr_epc_sections;
static struct task_struct *ksgxd_tsk;
static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
static DEFINE_XARRAY(sgx_epc_address_space);
/*
* These variables are part of the state of the reclaimer, and must be accessed
* with sgx_reclaimer_lock acquired.
*/
static LIST_HEAD(sgx_active_page_list);
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
/* Nodes with one or more EPC sections. */
static nodemask_t sgx_numa_mask;
/*
* Array with one list_head for each possible NUMA node. Each
* list contains all the sgx_epc_section's which are on that
* node.
*/
static struct sgx_numa_node *sgx_numa_nodes;
static LIST_HEAD(sgx_dirty_page_list);
/*
* Reset post-kexec EPC pages to the uninitialized state. The pages are removed
* from the input list, and made available for the page allocator. SECS pages
* prepending their children in the input list are left intact.
*
* Return 0 when sanitization was successful or kthread was stopped, and the
* number of unsanitized pages otherwise.
*/
static unsigned long __sgx_sanitize_pages(struct list_head *dirty_page_list)
{
unsigned long left_dirty = 0;
struct sgx_epc_page *page;
LIST_HEAD(dirty);
int ret;
/* dirty_page_list is thread-local, no need for a lock: */
while (!list_empty(dirty_page_list)) {
if (kthread_should_stop())
return 0;
page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
/*
* Checking page->poison without holding the node->lock
* is racy, but losing the race (i.e. poison is set just
* after the check) just means __eremove() will be uselessly
* called for a page that sgx_free_epc_page() will put onto
* the node->sgx_poison_page_list later.
*/
if (page->poison) {
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
struct sgx_numa_node *node = section->node;
spin_lock(&node->lock);
list_move(&page->list, &node->sgx_poison_page_list);
spin_unlock(&node->lock);
continue;
}
ret = __eremove(sgx_get_epc_virt_addr(page));
if (!ret) {
/*
* page is now sanitized. Make it available via the SGX
* page allocator:
*/
list_del(&page->list);
sgx_free_epc_page(page);
} else {
/* The page is not yet clean - move to the dirty list. */
list_move_tail(&page->list, &dirty);
left_dirty++;
}
cond_resched();
}
list_splice(&dirty, dirty_page_list);
return left_dirty;
}
static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
{
struct sgx_encl_page *page = epc_page->owner;
struct sgx_encl *encl = page->encl;
struct sgx_encl_mm *encl_mm;
bool ret = true;
int idx;
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
mmap_read_lock(encl_mm->mm);
ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page);
mmap_read_unlock(encl_mm->mm);
mmput_async(encl_mm->mm);
if (!ret)
break;
}
srcu_read_unlock(&encl->srcu, idx);
if (!ret)
return false;
return true;
}
static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
{
struct sgx_encl_page *page = epc_page->owner;
unsigned long addr = page->desc & PAGE_MASK;
struct sgx_encl *encl = page->encl;
int