aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 14:21:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 14:21:25 -0700
commit8374cfe647a1f360be3228b949dd6d753c55c19c (patch)
treed7e5cf67dc5b3ea62901556a2baf2be090a1dc6a
parentc013d0af81f60cc7dbe357c4e2a925fb6738dbfe (diff)
parent9dd1cd3220eca534f2d47afad7ce85f4c40118d8 (diff)
Merge tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Refactor DM core's mempool allocation so that it clearer by not being split acorss files. - Improve DM core's BLK_STS_DM_REQUEUE and BLK_STS_AGAIN handling. - Optimize DM core's more common bio splitting by eliminating the use of bio cloning with bio_split+bio_chain. Shift that cloning cost to the relatively unlikely dm_io requeue case that only occurs during error handling. Introduces dm_io_rewind() that will clone a bio that reflects the subset of the original bio that must be requeued. - Remove DM core's dm_table_get_num_targets() wrapper and audit all dm_table_get_target() callers. - Fix potential for OOM with DM writecache target by setting a default MAX_WRITEBACK_JOBS (set to 256MiB or 1/16 of total system memory, whichever is smaller). - Fix DM writecache target's stats that are reported through DM-specific table info. - Fix use-after-free crash in dm_sm_register_threshold_callback(). - Refine DM core's Persistent Reservation handling in preparation for broader work Mike Christie is doing to add compatibility with Microsoft Windows Failover Cluster. - Fix various KASAN reported bugs in the DM raid target. - Fix DM raid target crash due to md_handle_request() bio splitting that recurses to block core without properly initializing the bio's bi_dev. - Fix some code comment typos and fix some Documentation formatting. * tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (29 commits) dm: fix dm-raid crash if md_handle_request() splits bio dm raid: fix address sanitizer warning in raid_resume dm raid: fix address sanitizer warning in raid_status dm: Start pr_preempt from the same starting path dm: Fix PR release handling for non All Registrants dm: Start pr_reserve from the same starting path dm: Allow dm_call_pr to be used for path searches dm: return early from dm_pr_call() if DM device is suspended dm thin: fix use-after-free crash in dm_sm_register_threshold_callback dm writecache: count number of blocks discarded, not number of discard bios dm writecache: count number of blocks written, not number of write bios dm writecache: count number of blocks read, not number of read bios dm writecache: return void from functions dm kcopyd: use __GFP_HIGHMEM when allocating pages dm writecache: set a default MAX_WRITEBACK_JOBS Documentation: dm writecache: Render status list as list Documentation: dm writecache: add blank line before optional parameters dm snapshot: fix typo in snapshot_map() comment dm raid: remove redundant "the" in parse_raid_params() comment dm cache: fix typo in 2 comment blocks ...
-rw-r--r--Documentation/admin-guide/device-mapper/writecache.rst18
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/dm-cache-metadata.h2
-rw-r--r--drivers/md/dm-cache-target.c2
-rw-r--r--drivers/md/dm-core.h23
-rw-r--r--drivers/md/dm-ima.c5
-rw-r--r--drivers/md/dm-io-rewind.c166
-rw-r--r--drivers/md/dm-ioctl.c6
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-rq.c1
-rw-r--r--drivers/md/dm-snap.c2
-rw-r--r--drivers/md/dm-table.c318
-rw-r--r--drivers/md/dm-thin-metadata.c7
-rw-r--r--drivers/md/dm-thin.c4
-rw-r--r--drivers/md/dm-verity-target.c7
-rw-r--r--drivers/md/dm-writecache.c43
-rw-r--r--drivers/md/dm-zone.c7
-rw-r--r--drivers/md/dm.c462
-rw-r--r--drivers/md/dm.h4
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/uapi/linux/dm-ioctl.h4
22 files changed, 694 insertions, 405 deletions
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index 10429779a91a..60c16b7fd5ac 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -20,6 +20,7 @@ Constructor parameters:
size)
5. the number of optional parameters (the parameters with an argument
count as two)
+
start_sector n (default: 0)
offset from the start of cache device in 512-byte sectors
high_watermark n (default: 50)
@@ -74,20 +75,21 @@ Constructor parameters:
the origin volume in the last n milliseconds
Status:
+
1. error indicator - 0 if there was no error, otherwise error number
2. the number of blocks
3. the number of free blocks
4. the number of blocks under writeback
-5. the number of read requests
-6. the number of read requests that hit the cache
-7. the number of write requests
-8. the number of write requests that hit uncommitted block
-9. the number of write requests that hit committed block
-10. the number of write requests that bypass the cache
-11. the number of write requests that are allocated in the cache
+5. the number of read blocks
+6. the number of read blocks that hit the cache
+7. the number of write blocks
+8. the number of write blocks that hit uncommitted block
+9. the number of write blocks that hit committed block
+10. the number of write blocks that bypass the cache
+11. the number of write blocks that are allocated in the cache
12. the number of write requests that are blocked on the freelist
13. the number of flush requests
-14. the number of discard requests
+14. the number of discarded blocks
Messages:
flush
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 0454b0885b01..270f694850ec 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -5,7 +5,7 @@
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
- dm-rq.o
+ dm-rq.o dm-io-rewind.o
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-historical-service-time-y += dm-ps-historical-service-time.o
dm-io-affinity-y += dm-ps-io-affinity.o
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 179ed5bf81a3..0905f2c1615e 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -131,7 +131,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd);
* hints will be lost.
*
* The hints are indexed by the cblock, but many policies will not
- * neccessarily have a fast way of accessing efficiently via cblock. So
+ * necessarily have a fast way of accessing efficiently via cblock. So
* rather than querying the policy for each cblock, we let it walk its data
* structures and fill in the hints in whatever order it wishes.
*/
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 28c5de8eca4a..54a8d5c9a44e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2775,7 +2775,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
/*
* The discard block size in the on disk metadata is not
- * neccessarily the same as we're currently using. So we have to
+ * necessarily the same as we're currently using. So we have to
* be careful to only set the discarded attribute if we know it
* covers a complete block of the new size.
*/
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index c954ff91870e..6c6bd24774f2 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
#define DM_RESERVED_MAX_IOS 1024
+struct dm_io;
+
struct dm_kobject_holder {
struct kobject kobj;
struct completion completion;
@@ -91,6 +93,14 @@ struct mapped_device {
spinlock_t deferred_lock;
struct bio_list deferred;
+ /*
+ * requeue work context is needed for cloning one new bio
+ * to represent the dm_io to be requeued, since each
+ * dm_io may point to the original bio from FS.
+ */
+ struct work_struct requeue_work;
+ struct dm_io *requeue_list;
+
void *interface_ptr;
/*
@@ -216,6 +226,13 @@ struct dm_table {
#endif
};
+static inline struct dm_target *dm_table_get_target(struct dm_table *t,
+ unsigned int index)
+{
+ BUG_ON(index >= t->num_targets);
+ return t->targets + index;
+}
+
/*
* One of these is allocated per clone bio.
*/
@@ -230,6 +247,9 @@ struct dm_target_io {
sector_t old_sector;
struct bio clone;
};
+#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
+#define DM_IO_BIO_OFFSET \
+ (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
/*
* dm_target_io flags
@@ -272,7 +292,6 @@ struct dm_io {
atomic_t io_count;
struct mapped_device *md;
- struct bio *split_bio;
/* The three fields represent mapped part of original bio */
struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
@@ -300,6 +319,8 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
io->flags |= (1U << bit);
}
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs);
+
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 1842d3a958ef..a1bd7cd52b1b 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -208,7 +208,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
if (!target_data_buf)
goto error;
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio))
goto error;
@@ -237,9 +237,6 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
- if (!ti)
- goto error;
-
last_target_measured = 0;
/*
diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c
new file mode 100644
index 000000000000..0db53ccb94ba
--- /dev/null
+++ b/drivers/md/dm-io-rewind.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022 Red Hat, Inc.
+ */
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+#include <linux/blk-integrity.h>
+
+#include "dm-core.h"
+
+static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned int bytes)
+{
+ int idx;
+
+ iter->bi_size += bytes;
+ if (bytes <= iter->bi_bvec_done) {
+ iter->bi_bvec_done -= bytes;
+ return true;
+ }
+
+ bytes -= iter->bi_bvec_done;
+ idx = iter->bi_idx - 1;
+
+ while (idx >= 0 && bytes && bytes > bv[idx].bv_len) {
+ bytes -= bv[idx].bv_len;
+ idx--;
+ }
+
+ if (WARN_ONCE(idx < 0 && bytes,
+ "Attempted to rewind iter beyond bvec's boundaries\n")) {
+ iter->bi_size -= bytes;
+ iter->bi_bvec_done = 0;
+ iter->bi_idx = 0;
+ return false;
+ }
+
+ iter->bi_idx = idx;
+ iter->bi_bvec_done = bv[idx].bv_len - bytes;
+ return true;
+}
+
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+/**
+ * dm_bio_integrity_rewind - Rewind integrity vector
+ * @bio: bio whose integrity vector to update
+ * @bytes_done: number of data bytes to rewind
+ *
+ * Description: This function calculates how many integrity bytes the
+ * number of completed data bytes correspond to and rewind the
+ * integrity vector accordingly.
+ */
+static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done)
+{
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
+
+ bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9);
+ dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes);
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+static inline void dm_bio_integrity_rewind(struct bio *bio,
+ unsigned int bytes_done)
+{
+ return;
+}
+
+#endif
+
+#if defined(CONFIG_BLK_INLINE_ENCRYPTION)
+
+/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */
+static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+ unsigned int dec)
+{
+ int i;
+
+ for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) {
+ u64 prev = dun[i];
+
+ dun[i] -= dec;
+ if (dun[i] > prev)
+ dec = 1;
+ else
+ dec = 0;
+ }
+}
+
+static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+
+ dm_bio_crypt_dun_decrement(bc->bc_dun,
+ bytes >> bc->bc_key->data_unit_size_bits);
+}
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes)
+{
+ return;
+}
+
+#endif
+
+static inline void dm_bio_rewind_iter(const struct bio *bio,
+ struct bvec_iter *iter, unsigned int bytes)
+{
+ iter->bi_sector -= bytes >> 9;
+
+ /* No advance means no rewind */
+ if (bio_no_advance_iter(bio))
+ iter->bi_size += bytes;
+ else
+ dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
+}
+
+/**
+ * dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes.
+ * @bio: bio to rewind
+ * @bytes: how many bytes to rewind
+ *
+ * WARNING:
+ * Caller must ensure that @bio has a fixed end sector, to allow
+ * rewinding from end of bio and restoring its original position.
+ * Caller is also responsibile for restoring bio's size.
+ */
+static void dm_bio_rewind(struct bio *bio, unsigned bytes)
+{
+ if (bio_integrity(bio))
+ dm_bio_integrity_rewind(bio, bytes);
+
+ if (bio_has_crypt_ctx(bio))
+ dm_bio_crypt_rewind(bio, bytes);
+
+ dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
+}
+
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
+{
+ struct bio *orig = io->orig_bio;
+ struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
+ GFP_NOIO, bs);
+ /*
+ * dm_bio_rewind can restore to previous position since the
+ * end sector is fixed for original bio, but we still need
+ * to restore bio's size manually (using io->sectors).
+ */
+ dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
+ orig->bi_iter.bi_size));
+ bio_trim(new_orig, 0, io->sectors);
+
+ bio_chain(new_orig, orig);
+ /*
+ * __bi_remaining was increased (by dm_split_and_process_bio),
+ * so must drop the one added in bio_chain.
+ */
+ atomic_dec(&orig->__bi_remaining);
+ io->orig_bio = new_orig;
+}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 87310fceb0d8..98976aaa9db9 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -832,7 +832,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
if (get_disk_ro(disk))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
param->flags |= DM_ACTIVE_PRESENT_FLAG;
@@ -845,7 +845,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
if (table) {
if (!(dm_table_get_mode(table) & FMODE_WRITE))
param->flags |= DM_READONLY_FLAG;
- param->target_count = dm_table_get_num_targets(table);
+ param->target_count = table->num_targets;
}
dm_put_live_table(md, srcu_idx);
}
@@ -1248,7 +1248,7 @@ static void retrieve_status(struct dm_table *table,
type = STATUSTYPE_INFO;
/* Get all the target info */
- num_targets = dm_table_get_num_targets(table);
+ num_targets = table->num_targets;
for (i = 0; i < num_targets; i++) {
struct dm_target *ti = dm_table_get_target(table, i);
size_t l;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9c8f3544e99d..4d3bbbea2e9a 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -219,7 +219,7 @@ static struct page_list *alloc_pl(gfp_t gfp)
if (!pl)
return NULL;
- pl->page = alloc_page(gfp);
+ pl->page = alloc_page(gfp | __GFP_HIGHMEM);
if (!pl->page) {
kfree(pl);
return NULL;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 66486b14ec33..1ec17c32867f 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1369,7 +1369,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
}
rs->md.bitmap_info.daemon_sleep = value;
} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) {
- /* Userspace passes new data_offset after having extended the the data image LV */
+ /* Userspace passes new data_offset after having extended the data image LV */
if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
rs->ti->error = "Only one data_offset argument pair allowed";
return -EINVAL;
@@ -3097,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
ti->num_flush_bios = 1;
+ ti->needs_bio_set_dev = true;
/* Restore any requested new layout for conversion decision */
rs_config_restore(rs, &rs_layout);
@@ -3509,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
{
struct raid_set *rs = ti->private;
struct mddev *mddev = &rs->md;
- struct r5conf *conf = mddev->private;
+ struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
unsigned long recovery;
unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -3819,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
r = &rs->dev[i].rdev;
/* HM FIXME: enhance journal device recovery processing */
if (test_bit(Journal, &r->flags))
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a83b98a8d2a9..4f49bbcce4f1 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -43,7 +43,6 @@ unsigned dm_get_reserved_rq_based_ios(void)
return __dm_get_module_param(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
}
-EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
static unsigned dm_get_blk_mq_nr_hw_queues(void)
{
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0d336b5ec571..d1c2f84d27e3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2026,7 +2026,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/*
* Write to snapshot - higher level takes care of RW/RO
* flags so we should only get this if we are
- * writeable.
+ * writable.
*/
if (bio_data_dir(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index df904b7e95ce..332f96b58252 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -6,6 +6,7 @@
*/
#include "dm-core.h"
+#include "dm-rq.h"
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -174,8 +175,6 @@ static void dm_table_destroy_crypto_profile(struct dm_table *t);
void dm_table_destroy(struct dm_table *t)
{
- unsigned int i;
-
if (!t)
return;
@@ -184,13 +183,13 @@ void dm_table_destroy(struct dm_table *t)
kvfree(t->index[t->depth - 2]);
/* free the targets */
- for (i = 0; i < t->num_targets; i++) {
- struct dm_target *tgt = t->targets + i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- if (tgt->type->dtr)
- tgt->type->dtr(tgt);
+ if (ti->type->dtr)
+ ti->type->dtr(ti);
- dm_put_target_type(tgt->type);
+ dm_put_target_type(ti->type);
}
kvfree(t->highs);
@@ -450,14 +449,14 @@ EXPORT_SYMBOL(dm_put_device);
/*
* Checks to see if the target joins onto the end of the table.
*/
-static int adjoin(struct dm_table *table, struct dm_target *ti)
+static int adjoin(struct dm_table *t, struct dm_target *ti)
{
struct dm_target *prev;
- if (!table->num_targets)
+ if (!t->num_targets)
return !ti->begin;
- prev = &table->targets[table->num_targets - 1];
+ prev = &t->targets[t->num_targets - 1];
return (ti->begin == (prev->begin + prev->len));
}
@@ -564,8 +563,8 @@ int dm_split_args(int *argc, char ***argvp, char *input)
* two or more targets, the size of each piece it gets split into must
* be compatible with the logical_block_size of the target processing it.
*/
-static int validate_hardware_logical_block_alignment(struct dm_table *table,
- struct queue_limits *limits)
+static int validate_hardware_logical_block_alignment(struct dm_table *t,
+ struct queue_limits *limits)
{
/*
* This function uses arithmetic modulo the logical_block_size
@@ -587,13 +586,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
struct dm_target *ti;
struct queue_limits ti_limits;
- unsigned i;
+ unsigned int i;
/*
* Check each entry in the table in turn.
*/
- for (i = 0; i < dm_table_get_num_targets(table); i++) {
- ti = dm_table_get_target(table, i);
+ for (i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
blk_set_stacking_limits(&ti_limits);
@@ -621,7 +620,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
if (remaining) {
DMWARN("%s: table line %u (start sect %llu len %llu) "
"not aligned to h/w logical block size %u",
- dm_device_name(table->md), i,
+ dm_device_name(t->md), i,
(unsigned long long) ti->begin,
(unsigned long long) ti->len,
limits->logical_block_size);
@@ -636,7 +635,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
{
int r = -EINVAL, argc;
char **argv;
- struct dm_target *tgt;
+ struct dm_target *ti;
if (t->singleton) {
DMERR("%s: target type %s must appear alone in table",
@@ -646,87 +645,87 @@ int dm_table_add_target(struct dm_table *t, const char *type,
BUG_ON(t->num_targets >= t->num_allocated);
- tgt = t->targets + t->num_targets;
- memset(tgt, 0, sizeof(*tgt));
+ ti = t->targets + t->num_targets;
+ memset(ti, 0, sizeof(*ti));
if (!len) {
DMERR("%s: zero-length target", dm_device_name(t->md));
return -EINVAL;
}
- tgt->type = dm_get_target_type(type);
- if (!tgt->type) {
+ ti->type = dm_get_target_type(type);
+ if (!ti->type) {
DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
return -EINVAL;
}
- if (dm_target_needs_singleton(tgt->type)) {
+ if (dm_target_needs_singleton(ti->type)) {
if (t->num_targets) {
- tgt->error = "singleton target type must appear alone in table";
+ ti->error = "singleton target type must appear alone in table";
goto bad;
}
t->singleton = true;
}
- if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
- tgt->error = "target type may not be included in a read-only table";
+ if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
+ ti->error = "target type may not be included in a read-only table";
goto bad;
}
if (t->immutable_target_type) {
- if (t->immutable_target_type != tgt->type) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ if (t->immutable_target_type != ti->type) {
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- } else if (dm_target_is_immutable(tgt->type)) {
+ } else if (dm_target_is_immutable(ti->type)) {
if (t->num_targets) {
- tgt->error = "immutable target type cannot be mixed with other target types";
+ ti->error = "immutable target type cannot be mixed with other target types";
goto bad;
}
- t->immutable_target_type = tgt->type;
+ t->immutable_target_type = ti->type;
}
- if (dm_target_has_integrity(tgt->type))
+ if (dm_target_has_integrity(ti->type))
t->integrity_added = 1;
- tgt->table = t;
- tgt->begin = start;
- tgt->len = len;
- tgt->error = "Unknown error";
+ ti->table = t;
+ ti->begin = start;
+ ti->len = len;
+ ti->error = "Unknown error";
/*
* Does this target adjoin the previous one ?
*/
- if (!adjoin(t, tgt)) {
- tgt->error = "Gap in table";
+ if (!adjoin(t, ti)) {
+ ti->error = "Gap in table";
goto bad;
}
r = dm_split_args(&argc, &argv, params);
if (r) {
- tgt->error = "couldn't split parameters";
+ ti->error = "couldn't split parameters";
goto bad;
}
- r = tgt->type->ctr(tgt, argc, argv);
+ r = ti->type->ctr(ti, argc, argv);
kfree(argv);
if (r)
goto bad;
- t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+ t->highs[t->num_targets++] = ti->begin + ti->len - 1;
- if (!tgt->num_discard_bios && tgt->discards_supported)
+ if (!ti->num_discard_bios && ti->discards_supported)
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
- if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
+ if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
return 0;
bad:
- DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r));
- dm_put_target_type(tgt->type);
+ DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
+ dm_put_target_type(ti->type);
return r;
}
@@ -825,14 +824,11 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
}
static bool dm_table_supports_dax(struct dm_table *t,
- iterate_devices_callout_fn iterate_fn)
+ iterate_devices_callout_fn iterate_fn)
{
- struct dm_target *ti;
- unsigned i;
-
/* Ensure that all targets support DAX. */
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->direct_access)
return false;
@@ -860,9 +856,8 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
static int dm_table_determine_type(struct dm_table *t)
{
- unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
- struct dm_target *tgt;
+ struct dm_target *ti;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
@@ -876,11 +871,11 @@ static int dm_table_determine_type(struct dm_table *t)
goto verify_rq_based;
}
- for (i = 0; i < t->num_targets; i++) {
- tgt = t->targets + i;
- if (dm_target_hybrid(tgt))
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
+ if (dm_target_hybrid(ti))
hybrid = 1;
- else if (dm_target_request_based(tgt))
+ else if (dm_target_request_based(ti))
request_based = 1;
else
bio_based = 1;
@@ -942,18 +937,18 @@ verify_rq_based:
return 0;
}
- tgt = dm_table_get_immutable_target(t);
- if (!tgt) {
+ ti = dm_table_get_immutable_target(t);
+ if (!ti) {
DMERR("table load rejected: immutable target is required");
return -EINVAL;
- } else if (tgt->max_io_len) {
+ } else if (ti->max_io_len) {
DMERR("table load rejected: immutable target that splits IO is not supported");
return -EINVAL;
}
/* Non-request-stackable devices can't be used for request-based dm */
- if (!tgt->type->iterate_devices ||
- !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
+ if (!ti->type->iterate_devices ||
+ !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
@@ -983,11 +978,9 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
{
- struct dm_target *ti;
- unsigned i;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
- for (i = 0; i < dm_table_get_num_targets(t); i++) {
- ti = dm_table_get_target(t, i);
if (dm_target_is_wildcard(ti->type))
return ti;
}
@@ -1010,32 +1003,56 @@ static bool dm_table_supports_poll(struct dm_table *t);
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
- unsigned per_io_data_size = 0;
- unsigned min_pool_size = 0;
- struct dm_target *ti;
- unsigned i;
- bool poll_supported = false;
+ unsigned int per_io_data_size = 0, front_pad, io_front_pad;
+ unsigned int min_pool_size = 0, pool_size;
+ struct dm_md_mempools *pools;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
- if (__table_type_bio_based(type)) {
- for (i = 0; i < t->num_targets; i++) {
- ti = t->targets + i;
- per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
- min_pool_size = max(min_pool_size, ti->num_flush_bios);
- }
- poll_supported = dm_table_supports_poll(t);
+ pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
+ if (!pools)
+ return -ENOMEM;
+
+ if (type == DM_TYPE_REQUEST_BASED) {
+ pool_size = dm_get_reserved_rq_based_ios();
+ front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
+ goto init_bs;
}
- t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
- t->integrity_supported, poll_supported);
- if (!t-&