aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
commite2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch)
treef7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /drivers
parent7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff)
parentc45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff)
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/numa/hmat.c7
-rw-r--r--drivers/base/memory.c38
-rw-r--r--drivers/block/zram/Kconfig9
-rw-r--r--drivers/block/zram/zcomp.c6
-rw-r--r--drivers/block/zram/zcomp.h2
-rw-r--r--drivers/block/zram/zram_drv.c619
-rw-r--r--drivers/block/zram/zram_drv.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c2
-rw-r--r--drivers/infiniband/core/umem.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c9
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c9
-rw-r--r--drivers/media/common/videobuf2/frame_vector.c2
-rw-r--r--drivers/media/pci/ivtv/ivtv-udma.c2
-rw-r--r--drivers/media/pci/ivtv/ivtv-yuv.c5
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-sg.c14
-rw-r--r--drivers/misc/habanalabs/common/memory.c3
19 files changed, 640 insertions, 131 deletions
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 6cceca64a6bc..605a0c7053be 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -780,11 +780,6 @@ static int hmat_callback(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block hmat_callback_nb = {
- .notifier_call = hmat_callback,
- .priority = 2,
-};
-
static __init void hmat_free_structures(void)
{
struct memory_target *target, *tnext;
@@ -867,7 +862,7 @@ static __init int hmat_init(void)
hmat_register_targets();
/* Keep the table and structures if the notifier may use them */
- if (!register_hotmemory_notifier(&hmat_callback_nb))
+ if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
return 0;
out_put:
hmat_free_structures();
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9aa0da991cfb..fe98fb8d94e5 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -175,6 +175,15 @@ int memory_notify(unsigned long val, void *v)
return blocking_notifier_call_chain(&memory_chain, val, v);
}
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+static unsigned long memblk_nr_poison(struct memory_block *mem);
+#else
+static inline unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+ return 0;
+}
+#endif
+
static int memory_block_online(struct memory_block *mem)
{
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -183,6 +192,9 @@ static int memory_block_online(struct memory_block *mem)
struct zone *zone;
int ret;
+ if (memblk_nr_poison(mem))
+ return -EHWPOISON;
+
zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
start_pfn, nr_pages);
@@ -864,6 +876,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
mem = find_memory_block_by_id(block_id);
if (WARN_ON_ONCE(!mem))
continue;
+ num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
unregister_memory_block_under_nodes(mem);
remove_memory_block(mem);
}
@@ -1164,3 +1177,28 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
}
return ret;
}
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+ const unsigned long block_id = pfn_to_block_id(pfn);
+ struct memory_block *mem = find_memory_block_by_id(block_id);
+
+ if (mem)
+ atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+ const unsigned long block_id = pfn_to_block_id(pfn);
+ struct memory_block *mem = find_memory_block_by_id(block_id);
+
+ if (mem)
+ atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+static unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+ return atomic_long_read(&mem->nr_hwpoison);
+}
+#endif
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index d4100b0c083e..0386b7da02aa 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING
/sys/kernel/debug/zram/zramX/block_state.
See Documentation/admin-guide/blockdev/zram.rst for more information.
+
+config ZRAM_MULTI_COMP
+ bool "Enable multiple compression streams"
+ depends on ZRAM
+ help
+ This will enable multi-compression streams, so that ZRAM can
+ re-compress pages using a potentially slower but more effective
+ compression algorithm. Note, that IDLE page recompression
+ requires ZRAM_MEMORY_TRACKING.
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 0916de952e09..55af4efd7983 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp)
* case of allocation error, or any other error potentially
* returned by zcomp_init().
*/
-struct zcomp *zcomp_create(const char *compress)
+struct zcomp *zcomp_create(const char *alg)
{
struct zcomp *comp;
int error;
@@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress)
* is not loaded yet. We must do it here, otherwise we are about to
* call /sbin/modprobe under CPU hot-plug lock.
*/
- if (!zcomp_available_algorithm(compress))
+ if (!zcomp_available_algorithm(alg))
return ERR_PTR(-EINVAL);
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
if (!comp)
return ERR_PTR(-ENOMEM);
- comp->name = compress;
+ comp->name = alg;
error = zcomp_init(comp);
if (error) {
kfree(comp);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 40f6420f4b2e..cdefdef93da8 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node);
ssize_t zcomp_available_show(const char *comp, char *buf);
bool zcomp_available_algorithm(const char *comp);
-struct zcomp *zcomp_create(const char *comp);
+struct zcomp *zcomp_create(const char *alg);
void zcomp_destroy(struct zcomp *comp);
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 966aab902d19..e290d6d97047 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec)
}
#endif
+static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
+{
+ prio &= ZRAM_COMP_PRIORITY_MASK;
+ /*
+ * Clear previous priority value first, in case if we recompress
+ * further an already recompressed page
+ */
+ zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
+ ZRAM_COMP_PRIORITY_BIT1);
+ zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
+}
+
+static inline u32 zram_get_priority(struct zram *zram, u32 index)
+{
+ u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
+
+ return prio & ZRAM_COMP_PRIORITY_MASK;
+}
+
/*
* Check if request is within bounds and aligned on zram logical blocks.
*/
@@ -188,16 +207,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
- unsigned long old_max, cur_max;
-
- old_max = atomic_long_read(&zram->stats.max_used_pages);
+ unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
do {
- cur_max = old_max;
- if (pages > cur_max)
- old_max = atomic_long_cmpxchg(
- &zram->stats.max_used_pages, cur_max, pages);
- } while (old_max != cur_max);
+ if (cur_max >= pages)
+ return;
+ } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
+ &cur_max, pages));
}
static inline void zram_fill_page(void *ptr, unsigned long len,
@@ -629,10 +645,10 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
#define PAGE_WB_SIG "page_index="
-#define PAGE_WRITEBACK 0
-#define HUGE_WRITEBACK (1<<0)
-#define IDLE_WRITEBACK (1<<1)
-
+#define PAGE_WRITEBACK 0
+#define HUGE_WRITEBACK (1<<0)
+#define IDLE_WRITEBACK (1<<1)
+#define INCOMPRESSIBLE_WRITEBACK (1<<2)
static ssize_t writeback_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
@@ -653,6 +669,8 @@ static ssize_t writeback_store(struct device *dev,
mode = HUGE_WRITEBACK;
else if (sysfs_streq(buf, "huge_idle"))
mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
+ else if (sysfs_streq(buf, "incompressible"))
+ mode = INCOMPRESSIBLE_WRITEBACK;
else {
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
return -EINVAL;
@@ -715,11 +733,15 @@ static ssize_t writeback_store(struct device *dev,
goto next;
if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
+ !zram_test_flag(zram, index, ZRAM_IDLE))
goto next;
if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+ if (mode & INCOMPRESSIBLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
+
/*
* Clearing ZRAM_UNDER_WB is duty of caller.
* IOW, zram_free_page never clear it.
@@ -753,8 +775,12 @@ static ssize_t writeback_store(struct device *dev,
zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
/*
- * Return last IO error unless every IO were
- * not suceeded.
+ * BIO errors are not fatal, we continue and simply
+ * attempt to writeback the remaining objects (pages).
+ * At the same time we need to signal user-space that
+ * some writes (at least one, but also could be all of
+ * them) were not successful and we do so by returning
+ * the most recent BIO error.
*/
ret = err;
continue;
@@ -920,13 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
- "%12zd %12lld.%06lu %c%c%c%c\n",
+ "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
index, (s64)ts.tv_sec,
ts.tv_nsec / NSEC_PER_USEC,
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
- zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+ zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
+ zram_get_priority(zram, index) ? 'r' : '.',
+ zram_test_flag(zram, index,
+ ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
if (count <= copied) {
zram_slot_unlock(zram, index);
@@ -1000,46 +1029,143 @@ static ssize_t max_comp_streams_store(struct device *dev,
return len;
}
-static ssize_t comp_algorithm_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
{
- size_t sz;
- struct zram *zram = dev_to_zram(dev);
+ /* Do not free statically defined compression algorithms */
+ if (zram->comp_algs[prio] != default_compressor)
+ kfree(zram->comp_algs[prio]);
+
+ zram->comp_algs[prio] = alg;
+}
+
+static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
+{
+ ssize_t sz;
down_read(&zram->init_lock);
- sz = zcomp_available_show(zram->compressor, buf);
+ sz = zcomp_available_show(zram->comp_algs[prio], buf);
up_read(&zram->init_lock);
return sz;
}
-static ssize_t comp_algorithm_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
{
- struct zram *zram = dev_to_zram(dev);
- char compressor[ARRAY_SIZE(zram->compressor)];
+ char *compressor;
size_t sz;
- strscpy(compressor, buf, sizeof(compressor));
+ sz = strlen(buf);
+ if (sz >= CRYPTO_MAX_ALG_NAME)
+ return -E2BIG;
+
+ compressor = kstrdup(buf, GFP_KERNEL);
+ if (!compressor)
+ return -ENOMEM;
+
/* ignore trailing newline */
- sz = strlen(compressor);
if (sz > 0 && compressor[sz - 1] == '\n')
compressor[sz - 1] = 0x00;
- if (!zcomp_available_algorithm(compressor))
+ if (!zcomp_available_algorithm(compressor)) {
+ kfree(compressor);
return -EINVAL;
+ }
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
+ kfree(compressor);
pr_info("Can't change algorithm for initialized device\n");
return -EBUSY;
}
- strcpy(zram->compressor, compressor);
+ comp_algorithm_set(zram, prio, compressor);
up_write(&zram->init_lock);
- return len;
+ return 0;
+}
+
+static ssize_t comp_algorithm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
+}
+
+static ssize_t comp_algorithm_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ int ret;
+
+ ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
+ return ret ? ret : len;
+}
+
+#ifdef CONFIG_ZRAM_MULTI_COMP
+static ssize_t recomp_algorithm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t sz = 0;
+ u32 prio;
+
+ for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
+ sz += __comp_algorithm_show(zram, prio, buf + sz);
+ }
+
+ return sz;
+}
+
+static ssize_t recomp_algorithm_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ int prio = ZRAM_SECONDARY_COMP;
+ char *args, *param, *val;
+ char *alg = NULL;
+ int ret;
+
+ args = skip_spaces(buf);
+ while (*args) {
+ args = next_arg(args, &param, &val);
+
+ if (!*val)
+ return -EINVAL;
+
+ if (!strcmp(param, "algo")) {
+ alg = val;
+ continue;
+ }
+
+ if (!strcmp(param, "priority")) {
+ ret = kstrtoint(val, 10, &prio);
+ if (ret)
+ return ret;
+ continue;
+ }
+ }
+
+ if (!alg)
+ return -EINVAL;
+
+ if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
+ return -EINVAL;
+
+ ret = __comp_algorithm_store(zram, prio, alg);
+ return ret ? ret : len;
}
+#endif
static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
@@ -1210,6 +1336,11 @@ static void zram_free_page(struct zram *zram, size_t index)
atomic64_dec(&zram->stats.huge_pages);
}
+ if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+
+ zram_set_priority(zram, index, 0);
+
if (zram_test_flag(zram, index, ZRAM_WB)) {
zram_clear_flag(zram, index, ZRAM_WB);
free_block_bdev(zram, zram_get_element(zram, index));
@@ -1242,32 +1373,37 @@ out:
~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
}
-static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
- struct bio *bio, bool partial_io)
+/*
+ * Reads a page from the writeback devices. Corresponding ZRAM slot
+ * should be unlocked.
+ */
+static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
+ u32 index, struct bio *bio, bool partial_io)
+{
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_len = PAGE_SIZE,
+ .bv_offset = 0,
+ };
+
+ return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
+ partial_io);
+}
+
+/*
+ * Reads (decompresses if needed) a page from zspool (zsmalloc).
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_read_from_zspool(struct zram *zram, struct page *page,
+ u32 index)
{
struct zcomp_strm *zstrm;
unsigned long handle;
unsigned int size;
void *src, *dst;
+ u32 prio;
int ret;
- zram_slot_lock(zram, index);
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- struct bio_vec bvec;
-
- zram_slot_unlock(zram, index);
- /* A null bio means rw_page was used, we must fallback to bio */
- if (!bio)
- return -EOPNOTSUPP;
-
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
- return read_from_bdev(zram, &bvec,
- zram_get_element(zram, index),
- bio, partial_io);
- }
-
handle = zram_get_handle(zram, index);
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
unsigned long value;
@@ -1277,14 +1413,15 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
mem = kmap_atomic(page);
zram_fill_page(mem, PAGE_SIZE, value);
kunmap_atomic(mem);
- zram_slot_unlock(zram, index);
return 0;
}
size = zram_get_obj_size(zram, index);
- if (size != PAGE_SIZE)
- zstrm = zcomp_stream_get(zram->comp);
+ if (size != PAGE_SIZE) {
+ prio = zram_get_priority(zram, index);
+ zstrm = zcomp_stream_get(zram->comps[prio]);
+ }
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
@@ -1296,20 +1433,43 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
dst = kmap_atomic(page);
ret = zcomp_decompress(zstrm, src, size, dst);
kunmap_atomic(dst);
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[prio]);
}
zs_unmap_object(zram->mem_pool, handle);
- zram_slot_unlock(zram, index);
+ return ret;
+}
+
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
+{
+ int ret;
+
+ zram_slot_lock(zram, index);
+ if (!zram_test_flag(zram, index, ZRAM_WB)) {
+ /* Slot should be locked through out the function call */
+ ret = zram_read_from_zspool(zram, page, index);
+ zram_slot_unlock(zram, index);
+ } else {
+ /* Slot should be unlocked before the function call */
+ zram_slot_unlock(zram, index);
+
+ /* A null bio means rw_page was used, we must fallback to bio */
+ if (!bio)
+ return -EOPNOTSUPP;
+
+ ret = zram_bvec_read_from_bdev(zram, page, index, bio,
+ partial_io);
+ }
/* Should NEVER happen. Return bio error if it does. */
- if (WARN_ON(ret))
+ if (WARN_ON(ret < 0))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset, struct bio *bio)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -1363,13 +1523,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
kunmap_atomic(mem);
compress_again:
- zstrm = zcomp_stream_get(zram->comp);
+ zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
pr_err("Compression failed! err=%d\n", ret);
zs_free(zram->mem_pool, handle);
return ret;
@@ -1390,19 +1550,19 @@ compress_again:
* if we have a 'non-null' handle here then we are coming
* from the slow path and handle has already been allocated.
*/
- if (IS_ERR((void *)handle))
+ if (IS_ERR_VALUE(handle))
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
__GFP_MOVABLE);
- if (IS_ERR((void *)handle)) {
- zcomp_stream_put(zram->comp);
+ if (IS_ERR_VALUE(handle)) {
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
atomic64_inc(&zram->stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- if (IS_ERR((void *)handle))
+ if (IS_ERR_VALUE(handle))
return PTR_ERR((void *)handle);
if (comp_len != PAGE_SIZE)
@@ -1414,14 +1574,14 @@ compress_again:
* zstrm buffer back. It is necessary that the dereferencing
* of the zstrm variable below occurs correctly.
*/
- zstrm = zcomp_stream_get(zram->comp);
+ zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
}
alloced_pages = zs_get_total_pages(zram->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
@@ -1435,7 +1595,7 @@ compress_again:
if (comp_len == PAGE_SIZE)
kunmap_atomic(src);
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
zs_unmap_object(zram->mem_pool, handle);
atomic64_add(comp_len, &zram->stats.compr_data_size);
out:
@@ -1504,6 +1664,274 @@ out:
return ret;
}
+#ifdef CONFIG_ZRAM_MULTI_COMP
+/*
+ * This function will decompress (unless it's ZRAM_HUGE) the page and then
+ * attempt to compress it using provided compression algorithm priority
+ * (which is potentially more effective).
+ *
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_recompress(struct zram *zram, u32 index, struct page *page,
+ u32 threshold, u32 prio, u32 prio_max)
+{
+ struct zcomp_strm *zstrm = NULL;
+ unsigned long handle_old;
+ unsigned long handle_new;
+ unsigned int comp_len_old;
+ unsigned int comp_len_new;
+ unsigned int class_index_old;
+ unsigned int class_index_new;
+ u32 num_recomps = 0;
+ void *src, *dst;
+ int ret;
+
+ handle_old = zram_get_handle(zram, index);
+ if (!handle_old)
+ return -EINVAL;
+
+ comp_len_old = zram_get_obj_size(zram, index);
+ /*
+ * Do not recompress objects that are already "small enough".
+ */
+ if (comp_len_old < threshold)
+ return 0;
+
+ ret = zram_read_from_zspool(zram, page, index);
+ if (ret)
+ return ret;
+
+ class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
+ /*
+ * Iterate the secondary comp algorithms list (in order of priority)
+ * and try to recompress the page.
+ */
+ for (; prio < prio_max; prio++) {
+ if (!zram->comps[prio])
+ continue;
+
+ /*
+ * Skip if the object is already re-compressed with a higher
+ * priority algorithm (or same algorithm).
+ */
+ if (prio <= zram_get_priority(zram, index))
+ continue;
+
+ num_recomps++;
+ zstrm = zcomp_stream_get(zram->comps[prio]);
+ src = kmap_atomic(page);
+ ret = zcomp_compress(zstrm, src, &comp_len_new);
+ kunmap_atomic(src);
+
+ if (ret) {
+ zcomp_stream_put(zram->comps[prio]);
+ return ret;
+ }
+
+ class_index_new = zs_lookup_class_index(zram->mem_pool,
+ comp_len_new);
+
+ /* Continue until we make progress */
+ if (class_index_new >= class_index_old ||
+ (threshold && comp_len_new >= threshold)) {
+ zcomp_stream_put(zram->comps[prio]);
+ continue;
+ }
+
+ /* Recompression was successful so break out */
+ break;
+ }
+
+ /*
+ * We did not try to recompress, e.g. when we have only one
+ * secondary algorithm and the page is already recompressed
+ * using that algorithm
+ */
+ if (!zstrm)
+ return 0;
+
+ if (class_index_new >= class_index_old) {
+ /*
+ * Secondary algorithms failed to re-compress the page
+ * in a way that would save memory, mark the object as
+ * incompressible so that we will not try to compress
+ * it again.
+ *
+ * We need to make sure that all secondary algorithms have
+ * failed, so we test if the number of recompressions matches
+ * the number of active secondary algorithms.
+ */
+ if (num_recomps == zram->num_active_comps - 1)
+ zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+ return 0;
+ }
+
+ /* Successful recompression but above threshold */
+ if (threshold && comp_len_new >= threshold)
+ return 0;
+
+ /*
+ * No direct reclaim (slow path) for handle allocation and no
+ * re-compression attempt (unlike in __zram_bvec_write()) since
+ * we already have stored that object in zsmalloc. If we cannot
+ * alloc memory for recompressed object then we bail out and
+ * simply keep the old (existing) object in zsmalloc.
+ */
+ handle_new = zs_malloc(zram->mem_pool, comp_len_new,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (IS_ERR_VALUE(handle_new)) {
+ zcomp_stream_put(zram->comps[prio]);
+ return PTR_ERR((void *)handle_new);
+ }
+
+ dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
+ memcpy(dst, zstrm->buffer, comp_len_new);
+ zcomp_stream_put(zram->comps[prio]);
+
+ zs_unmap_object(zram->mem_pool, handle_new);
+
+ zram_free_page(zram, index);
+ zram_set_handle(zram, index, handle_new);
+ zram_set_obj_size(zram, index, comp_len_new);
+ zram_set_priority(zram, index, prio);
+
+ atomic64_add(comp_len_new, &zram->stats.compr_data_size);
+ atomic64_inc(&zram->stats.pages_stored);
+
+ return 0;
+}
+
+#define RECOMPRESS_IDLE (1 << 0)
+#define RECOMPRESS_HUGE (1 << 1)
+
+static ssize_t recompress_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
+ struct zram *zram = dev_to_zram(dev);
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ char *args, *param, *val, *algo = NULL;
+ u32 mode = 0, threshold = 0;
+ unsigned long index;
+ struct page *page;
+ ssize_t ret;
+
+ args = skip_spaces(buf);
+ while (*args) {
+ args = next_arg(args, &param, &val);
+
+ if (!*val)
+ return -EINVAL;
+
+ if (!strcmp(param, "type")) {
+ if (!strcmp(val, "idle"))
+ mode = RECOMPRESS_IDLE;
+ if (!strcmp(val, "huge"))
+ mode = RECOMPRESS_HUGE;
+ if (!strcmp(val, "huge_idle"))
+ mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
+ continue;
+ }
+
+ if (!strcmp(param, "threshold")) {
+ /*
+ * We will re-compress only idle objects equal or
+ * greater in size than watermark.
+ */
+ ret = kstrtouint(val, 10, &threshold);
+ if (ret)
+ return ret;
+ continue;
+ }
+
+ if (!strcmp(param, "algo")) {
+ algo = val;
+ continue;
+ }
+ }
+
+ if (threshold >= PAGE_SIZE)
+ return -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ ret = -EINVAL;
+ goto release_init_lock;
+ }
+
+ if (algo) {
+ bool found = false;
+
+ for (; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+
+ if (!strcmp(zram->comp_algs[prio], algo)) {
+ prio_max = min(prio + 1, ZRAM_MAX_COMPS);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ ret = -EINVAL;
+ goto release_init_lock;
+ }
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ ret = len;
+ for (index = 0; index < nr_pages; index++) {
+ int err = 0;
+
+ zram_slot_lock(zram, index);
+