diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 14:21:25 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 14:21:25 -0700 |
| commit | 8374cfe647a1f360be3228b949dd6d753c55c19c (patch) | |
| tree | d7e5cf67dc5b3ea62901556a2baf2be090a1dc6a | |
| parent | c013d0af81f60cc7dbe357c4e2a925fb6738dbfe (diff) | |
| parent | 9dd1cd3220eca534f2d47afad7ce85f4c40118d8 (diff) | |
Merge tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Refactor DM core's mempool allocation so that it clearer by not being
split acorss files.
- Improve DM core's BLK_STS_DM_REQUEUE and BLK_STS_AGAIN handling.
- Optimize DM core's more common bio splitting by eliminating the use
of bio cloning with bio_split+bio_chain. Shift that cloning cost to
the relatively unlikely dm_io requeue case that only occurs during
error handling. Introduces dm_io_rewind() that will clone a bio that
reflects the subset of the original bio that must be requeued.
- Remove DM core's dm_table_get_num_targets() wrapper and audit all
dm_table_get_target() callers.
- Fix potential for OOM with DM writecache target by setting a default
MAX_WRITEBACK_JOBS (set to 256MiB or 1/16 of total system memory,
whichever is smaller).
- Fix DM writecache target's stats that are reported through
DM-specific table info.
- Fix use-after-free crash in dm_sm_register_threshold_callback().
- Refine DM core's Persistent Reservation handling in preparation for
broader work Mike Christie is doing to add compatibility with
Microsoft Windows Failover Cluster.
- Fix various KASAN reported bugs in the DM raid target.
- Fix DM raid target crash due to md_handle_request() bio splitting
that recurses to block core without properly initializing the bio's
bi_dev.
- Fix some code comment typos and fix some Documentation formatting.
* tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (29 commits)
dm: fix dm-raid crash if md_handle_request() splits bio
dm raid: fix address sanitizer warning in raid_resume
dm raid: fix address sanitizer warning in raid_status
dm: Start pr_preempt from the same starting path
dm: Fix PR release handling for non All Registrants
dm: Start pr_reserve from the same starting path
dm: Allow dm_call_pr to be used for path searches
dm: return early from dm_pr_call() if DM device is suspended
dm thin: fix use-after-free crash in dm_sm_register_threshold_callback
dm writecache: count number of blocks discarded, not number of discard bios
dm writecache: count number of blocks written, not number of write bios
dm writecache: count number of blocks read, not number of read bios
dm writecache: return void from functions
dm kcopyd: use __GFP_HIGHMEM when allocating pages
dm writecache: set a default MAX_WRITEBACK_JOBS
Documentation: dm writecache: Render status list as list
Documentation: dm writecache: add blank line before optional parameters
dm snapshot: fix typo in snapshot_map() comment
dm raid: remove redundant "the" in parse_raid_params() comment
dm cache: fix typo in 2 comment blocks
...
| -rw-r--r-- | Documentation/admin-guide/device-mapper/writecache.rst | 18 | ||||
| -rw-r--r-- | drivers/md/Makefile | 2 | ||||
| -rw-r--r-- | drivers/md/dm-cache-metadata.h | 2 | ||||
| -rw-r--r-- | drivers/md/dm-cache-target.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-core.h | 23 | ||||
| -rw-r--r-- | drivers/md/dm-ima.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-io-rewind.c | 166 | ||||
| -rw-r--r-- | drivers/md/dm-ioctl.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-kcopyd.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-raid.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm-rq.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 318 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-verity-target.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm-writecache.c | 43 | ||||
| -rw-r--r-- | drivers/md/dm-zone.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm.c | 462 | ||||
| -rw-r--r-- | drivers/md/dm.h | 4 | ||||
| -rw-r--r-- | include/linux/device-mapper.h | 7 | ||||
| -rw-r--r-- | include/uapi/linux/dm-ioctl.h | 4 |
22 files changed, 694 insertions, 405 deletions
diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst index 10429779a91a..60c16b7fd5ac 100644 --- a/Documentation/admin-guide/device-mapper/writecache.rst +++ b/Documentation/admin-guide/device-mapper/writecache.rst @@ -20,6 +20,7 @@ Constructor parameters: size) 5. the number of optional parameters (the parameters with an argument count as two) + start_sector n (default: 0) offset from the start of cache device in 512-byte sectors high_watermark n (default: 50) @@ -74,20 +75,21 @@ Constructor parameters: the origin volume in the last n milliseconds Status: + 1. error indicator - 0 if there was no error, otherwise error number 2. the number of blocks 3. the number of free blocks 4. the number of blocks under writeback -5. the number of read requests -6. the number of read requests that hit the cache -7. the number of write requests -8. the number of write requests that hit uncommitted block -9. the number of write requests that hit committed block -10. the number of write requests that bypass the cache -11. the number of write requests that are allocated in the cache +5. the number of read blocks +6. the number of read blocks that hit the cache +7. the number of write blocks +8. the number of write blocks that hit uncommitted block +9. the number of write blocks that hit committed block +10. the number of write blocks that bypass the cache +11. the number of write blocks that are allocated in the cache 12. the number of write requests that are blocked on the freelist 13. the number of flush requests -14. the number of discard requests +14. the number of discarded blocks Messages: flush diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 0454b0885b01..270f694850ec 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -5,7 +5,7 @@ dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ - dm-rq.o + dm-rq.o dm-io-rewind.o dm-multipath-y += dm-path-selector.o dm-mpath.o dm-historical-service-time-y += dm-ps-historical-service-time.o dm-io-affinity-y += dm-ps-io-affinity.o diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 179ed5bf81a3..0905f2c1615e 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -131,7 +131,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd); * hints will be lost. * * The hints are indexed by the cblock, but many policies will not - * neccessarily have a fast way of accessing efficiently via cblock. So + * necessarily have a fast way of accessing efficiently via cblock. So * rather than querying the policy for each cblock, we let it walk its data * structures and fill in the hints in whatever order it wishes. */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 28c5de8eca4a..54a8d5c9a44e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2775,7 +2775,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, /* * The discard block size in the on disk metadata is not - * neccessarily the same as we're currently using. So we have to + * necessarily the same as we're currently using. So we have to * be careful to only set the discarded attribute if we know it * covers a complete block of the new size. */ diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index c954ff91870e..6c6bd24774f2 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -22,6 +22,8 @@ #define DM_RESERVED_MAX_IOS 1024 +struct dm_io; + struct dm_kobject_holder { struct kobject kobj; struct completion completion; @@ -91,6 +93,14 @@ struct mapped_device { spinlock_t deferred_lock; struct bio_list deferred; + /* + * requeue work context is needed for cloning one new bio + * to represent the dm_io to be requeued, since each + * dm_io may point to the original bio from FS. + */ + struct work_struct requeue_work; + struct dm_io *requeue_list; + void *interface_ptr; /* @@ -216,6 +226,13 @@ struct dm_table { #endif }; +static inline struct dm_target *dm_table_get_target(struct dm_table *t, + unsigned int index) +{ + BUG_ON(index >= t->num_targets); + return t->targets + index; +} + /* * One of these is allocated per clone bio. */ @@ -230,6 +247,9 @@ struct dm_target_io { sector_t old_sector; struct bio clone; }; +#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone)) +#define DM_IO_BIO_OFFSET \ + (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio)) /* * dm_target_io flags @@ -272,7 +292,6 @@ struct dm_io { atomic_t io_count; struct mapped_device *md; - struct bio *split_bio; /* The three fields represent mapped part of original bio */ struct bio *orig_bio; unsigned int sector_offset; /* offset to end of orig_bio */ @@ -300,6 +319,8 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit) io->flags |= (1U << bit); } +void dm_io_rewind(struct dm_io *io, struct bio_set *bs); + static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) { return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 1842d3a958ef..a1bd7cd52b1b 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -208,7 +208,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl if (!target_data_buf) goto error; - num_targets = dm_table_get_num_targets(table); + num_targets = table->num_targets; if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio)) goto error; @@ -237,9 +237,6 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); - if (!ti) - goto error; - last_target_measured = 0; /* diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c new file mode 100644 index 000000000000..0db53ccb94ba --- /dev/null +++ b/drivers/md/dm-io-rewind.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022 Red Hat, Inc. + */ + +#include <linux/bio.h> +#include <linux/blk-crypto.h> +#include <linux/blk-integrity.h> + +#include "dm-core.h" + +static inline bool dm_bvec_iter_rewind(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned int bytes) +{ + int idx; + + iter->bi_size += bytes; + if (bytes <= iter->bi_bvec_done) { + iter->bi_bvec_done -= bytes; + return true; + } + + bytes -= iter->bi_bvec_done; + idx = iter->bi_idx - 1; + + while (idx >= 0 && bytes && bytes > bv[idx].bv_len) { + bytes -= bv[idx].bv_len; + idx--; + } + + if (WARN_ONCE(idx < 0 && bytes, + "Attempted to rewind iter beyond bvec's boundaries\n")) { + iter->bi_size -= bytes; + iter->bi_bvec_done = 0; + iter->bi_idx = 0; + return false; + } + + iter->bi_idx = idx; + iter->bi_bvec_done = bv[idx].bv_len - bytes; + return true; +} + +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +/** + * dm_bio_integrity_rewind - Rewind integrity vector + * @bio: bio whose integrity vector to update + * @bytes_done: number of data bytes to rewind + * + * Description: This function calculates how many integrity bytes the + * number of completed data bytes correspond to and rewind the + * integrity vector accordingly. + */ +static void dm_bio_integrity_rewind(struct bio *bio, unsigned int bytes_done) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); + + bip->bip_iter.bi_sector -= bio_integrity_intervals(bi, bytes_done >> 9); + dm_bvec_iter_rewind(bip->bip_vec, &bip->bip_iter, bytes); +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +static inline void dm_bio_integrity_rewind(struct bio *bio, + unsigned int bytes_done) +{ + return; +} + +#endif + +#if defined(CONFIG_BLK_INLINE_ENCRYPTION) + +/* Decrements @dun by @dec, treating @dun as a multi-limb integer. */ +static void dm_bio_crypt_dun_decrement(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + unsigned int dec) +{ + int i; + + for (i = 0; dec && i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) { + u64 prev = dun[i]; + + dun[i] -= dec; + if (dun[i] > prev) + dec = 1; + else + dec = 0; + } +} + +static void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes) +{ + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + dm_bio_crypt_dun_decrement(bc->bc_dun, + bytes >> bc->bc_key->data_unit_size_bits); +} + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline void dm_bio_crypt_rewind(struct bio *bio, unsigned int bytes) +{ + return; +} + +#endif + +static inline void dm_bio_rewind_iter(const struct bio *bio, + struct bvec_iter *iter, unsigned int bytes) +{ + iter->bi_sector -= bytes >> 9; + + /* No advance means no rewind */ + if (bio_no_advance_iter(bio)) + iter->bi_size += bytes; + else + dm_bvec_iter_rewind(bio->bi_io_vec, iter, bytes); +} + +/** + * dm_bio_rewind - update ->bi_iter of @bio by rewinding @bytes. + * @bio: bio to rewind + * @bytes: how many bytes to rewind + * + * WARNING: + * Caller must ensure that @bio has a fixed end sector, to allow + * rewinding from end of bio and restoring its original position. + * Caller is also responsibile for restoring bio's size. + */ +static void dm_bio_rewind(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + dm_bio_integrity_rewind(bio, bytes); + + if (bio_has_crypt_ctx(bio)) + dm_bio_crypt_rewind(bio, bytes); + + dm_bio_rewind_iter(bio, &bio->bi_iter, bytes); +} + +void dm_io_rewind(struct dm_io *io, struct bio_set *bs) +{ + struct bio *orig = io->orig_bio; + struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig, + GFP_NOIO, bs); + /* + * dm_bio_rewind can restore to previous position since the + * end sector is fixed for original bio, but we still need + * to restore bio's size manually (using io->sectors). + */ + dm_bio_rewind(new_orig, ((io->sector_offset << 9) - + orig->bi_iter.bi_size)); + bio_trim(new_orig, 0, io->sectors); + + bio_chain(new_orig, orig); + /* + * __bi_remaining was increased (by dm_split_and_process_bio), + * so must drop the one added in bio_chain. + */ + atomic_dec(&orig->__bi_remaining); + io->orig_bio = new_orig; +} diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 87310fceb0d8..98976aaa9db9 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -832,7 +832,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) { if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; - param->target_count = dm_table_get_num_targets(table); + param->target_count = table->num_targets; } param->flags |= DM_ACTIVE_PRESENT_FLAG; @@ -845,7 +845,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (table) { if (!(dm_table_get_mode(table) & FMODE_WRITE)) param->flags |= DM_READONLY_FLAG; - param->target_count = dm_table_get_num_targets(table); + param->target_count = table->num_targets; } dm_put_live_table(md, srcu_idx); } @@ -1248,7 +1248,7 @@ static void retrieve_status(struct dm_table *table, type = STATUSTYPE_INFO; /* Get all the target info */ - num_targets = dm_table_get_num_targets(table); + num_targets = table->num_targets; for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); size_t l; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 9c8f3544e99d..4d3bbbea2e9a 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -219,7 +219,7 @@ static struct page_list *alloc_pl(gfp_t gfp) if (!pl) return NULL; - pl->page = alloc_page(gfp); + pl->page = alloc_page(gfp | __GFP_HIGHMEM); if (!pl->page) { kfree(pl); return NULL; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 66486b14ec33..1ec17c32867f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1369,7 +1369,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, } rs->md.bitmap_info.daemon_sleep = value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) { - /* Userspace passes new data_offset after having extended the the data image LV */ + /* Userspace passes new data_offset after having extended the data image LV */ if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) { rs->ti->error = "Only one data_offset argument pair allowed"; return -EINVAL; @@ -3097,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; ti->num_flush_bios = 1; + ti->needs_bio_set_dev = true; /* Restore any requested new layout for conversion decision */ rs_config_restore(rs, &rs_layout); @@ -3509,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; - struct r5conf *conf = mddev->private; + struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL; int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0; unsigned long recovery; unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -3819,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); - for (i = 0; i < mddev->raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { r = &rs->dev[i].rdev; /* HM FIXME: enhance journal device recovery processing */ if (test_bit(Journal, &r->flags)) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a83b98a8d2a9..4f49bbcce4f1 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -43,7 +43,6 @@ unsigned dm_get_reserved_rq_based_ios(void) return __dm_get_module_param(&reserved_rq_based_ios, RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS); } -EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); static unsigned dm_get_blk_mq_nr_hw_queues(void) { diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 0d336b5ec571..d1c2f84d27e3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2026,7 +2026,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) /* * Write to snapshot - higher level takes care of RW/RO * flags so we should only get this if we are - * writeable. + * writable. */ if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index df904b7e95ce..332f96b58252 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -6,6 +6,7 @@ */ #include "dm-core.h" +#include "dm-rq.h" #include <linux/module.h> #include <linux/vmalloc.h> @@ -174,8 +175,6 @@ static void dm_table_destroy_crypto_profile(struct dm_table *t); void dm_table_destroy(struct dm_table *t) { - unsigned int i; - if (!t) return; @@ -184,13 +183,13 @@ void dm_table_destroy(struct dm_table *t) kvfree(t->index[t->depth - 2]); /* free the targets */ - for (i = 0; i < t->num_targets; i++) { - struct dm_target *tgt = t->targets + i; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); - if (tgt->type->dtr) - tgt->type->dtr(tgt); + if (ti->type->dtr) + ti->type->dtr(ti); - dm_put_target_type(tgt->type); + dm_put_target_type(ti->type); } kvfree(t->highs); @@ -450,14 +449,14 @@ EXPORT_SYMBOL(dm_put_device); /* * Checks to see if the target joins onto the end of the table. */ -static int adjoin(struct dm_table *table, struct dm_target *ti) +static int adjoin(struct dm_table *t, struct dm_target *ti) { struct dm_target *prev; - if (!table->num_targets) + if (!t->num_targets) return !ti->begin; - prev = &table->targets[table->num_targets - 1]; + prev = &t->targets[t->num_targets - 1]; return (ti->begin == (prev->begin + prev->len)); } @@ -564,8 +563,8 @@ int dm_split_args(int *argc, char ***argvp, char *input) * two or more targets, the size of each piece it gets split into must * be compatible with the logical_block_size of the target processing it. */ -static int validate_hardware_logical_block_alignment(struct dm_table *table, - struct queue_limits *limits) +static int validate_hardware_logical_block_alignment(struct dm_table *t, + struct queue_limits *limits) { /* * This function uses arithmetic modulo the logical_block_size @@ -587,13 +586,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, struct dm_target *ti; struct queue_limits ti_limits; - unsigned i; + unsigned int i; /* * Check each entry in the table in turn. */ - for (i = 0; i < dm_table_get_num_targets(table); i++) { - ti = dm_table_get_target(table, i); + for (i = 0; i < t->num_targets; i++) { + ti = dm_table_get_target(t, i); blk_set_stacking_limits(&ti_limits); @@ -621,7 +620,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, if (remaining) { DMWARN("%s: table line %u (start sect %llu len %llu) " "not aligned to h/w logical block size %u", - dm_device_name(table->md), i, + dm_device_name(t->md), i, (unsigned long long) ti->begin, (unsigned long long) ti->len, limits->logical_block_size); @@ -636,7 +635,7 @@ int dm_table_add_target(struct dm_table *t, const char *type, { int r = -EINVAL, argc; char **argv; - struct dm_target *tgt; + struct dm_target *ti; if (t->singleton) { DMERR("%s: target type %s must appear alone in table", @@ -646,87 +645,87 @@ int dm_table_add_target(struct dm_table *t, const char *type, BUG_ON(t->num_targets >= t->num_allocated); - tgt = t->targets + t->num_targets; - memset(tgt, 0, sizeof(*tgt)); + ti = t->targets + t->num_targets; + memset(ti, 0, sizeof(*ti)); if (!len) { DMERR("%s: zero-length target", dm_device_name(t->md)); return -EINVAL; } - tgt->type = dm_get_target_type(type); - if (!tgt->type) { + ti->type = dm_get_target_type(type); + if (!ti->type) { DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); return -EINVAL; } - if (dm_target_needs_singleton(tgt->type)) { + if (dm_target_needs_singleton(ti->type)) { if (t->num_targets) { - tgt->error = "singleton target type must appear alone in table"; + ti->error = "singleton target type must appear alone in table"; goto bad; } t->singleton = true; } - if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { - tgt->error = "target type may not be included in a read-only table"; + if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) { + ti->error = "target type may not be included in a read-only table"; goto bad; } if (t->immutable_target_type) { - if (t->immutable_target_type != tgt->type) { - tgt->error = "immutable target type cannot be mixed with other target types"; + if (t->immutable_target_type != ti->type) { + ti->error = "immutable target type cannot be mixed with other target types"; goto bad; } - } else if (dm_target_is_immutable(tgt->type)) { + } else if (dm_target_is_immutable(ti->type)) { if (t->num_targets) { - tgt->error = "immutable target type cannot be mixed with other target types"; + ti->error = "immutable target type cannot be mixed with other target types"; goto bad; } - t->immutable_target_type = tgt->type; + t->immutable_target_type = ti->type; } - if (dm_target_has_integrity(tgt->type)) + if (dm_target_has_integrity(ti->type)) t->integrity_added = 1; - tgt->table = t; - tgt->begin = start; - tgt->len = len; - tgt->error = "Unknown error"; + ti->table = t; + ti->begin = start; + ti->len = len; + ti->error = "Unknown error"; /* * Does this target adjoin the previous one ? */ - if (!adjoin(t, tgt)) { - tgt->error = "Gap in table"; + if (!adjoin(t, ti)) { + ti->error = "Gap in table"; goto bad; } r = dm_split_args(&argc, &argv, params); if (r) { - tgt->error = "couldn't split parameters"; + ti->error = "couldn't split parameters"; goto bad; } - r = tgt->type->ctr(tgt, argc, argv); + r = ti->type->ctr(ti, argc, argv); kfree(argv); if (r) goto bad; - t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; + t->highs[t->num_targets++] = ti->begin + ti->len - 1; - if (!tgt->num_discard_bios && tgt->discards_supported) + if (!ti->num_discard_bios && ti->discards_supported) DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); - if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key)) + if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key)) static_branch_enable(&swap_bios_enabled); return 0; bad: - DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, tgt->error, ERR_PTR(r)); - dm_put_target_type(tgt->type); + DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r)); + dm_put_target_type(ti->type); return r; } @@ -825,14 +824,11 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de } static bool dm_table_supports_dax(struct dm_table *t, - iterate_devices_callout_fn iterate_fn) + iterate_devices_callout_fn iterate_fn) { - struct dm_target *ti; - unsigned i; - /* Ensure that all targets support DAX. */ - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); if (!ti->type->direct_access) return false; @@ -860,9 +856,8 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, static int dm_table_determine_type(struct dm_table *t) { - unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - struct dm_target *tgt; + struct dm_target *ti; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); @@ -876,11 +871,11 @@ static int dm_table_determine_type(struct dm_table *t) goto verify_rq_based; } - for (i = 0; i < t->num_targets; i++) { - tgt = t->targets + i; - if (dm_target_hybrid(tgt)) + for (unsigned int i = 0; i < t->num_targets; i++) { + ti = dm_table_get_target(t, i); + if (dm_target_hybrid(ti)) hybrid = 1; - else if (dm_target_request_based(tgt)) + else if (dm_target_request_based(ti)) request_based = 1; else bio_based = 1; @@ -942,18 +937,18 @@ verify_rq_based: return 0; } - tgt = dm_table_get_immutable_target(t); - if (!tgt) { + ti = dm_table_get_immutable_target(t); + if (!ti) { DMERR("table load rejected: immutable target is required"); return -EINVAL; - } else if (tgt->max_io_len) { + } else if (ti->max_io_len) { DMERR("table load rejected: immutable target that splits IO is not supported"); return -EINVAL; } /* Non-request-stackable devices can't be used for request-based dm */ - if (!tgt->type->iterate_devices || - !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) { + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) { DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL; } @@ -983,11 +978,9 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t) struct dm_target *dm_table_get_wildcard_target(struct dm_table *t) { - struct dm_target *ti; - unsigned i; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); if (dm_target_is_wildcard(ti->type)) return ti; } @@ -1010,32 +1003,56 @@ static bool dm_table_supports_poll(struct dm_table *t); static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) { enum dm_queue_mode type = dm_table_get_type(t); - unsigned per_io_data_size = 0; - unsigned min_pool_size = 0; - struct dm_target *ti; - unsigned i; - bool poll_supported = false; + unsigned int per_io_data_size = 0, front_pad, io_front_pad; + unsigned int min_pool_size = 0, pool_size; + struct dm_md_mempools *pools; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - if (__table_type_bio_based(type)) { - for (i = 0; i < t->num_targets; i++) { - ti = t->targets + i; - per_io_data_size = max(per_io_data_size, ti->per_io_data_size); - min_pool_size = max(min_pool_size, ti->num_flush_bios); - } - poll_supported = dm_table_supports_poll(t); + pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); + if (!pools) + return -ENOMEM; + + if (type == DM_TYPE_REQUEST_BASED) { + pool_size = dm_get_reserved_rq_based_ios(); + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + goto init_bs; } - t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size, - t->integrity_supported, poll_supported); - if (!t-& |
