diff options
| author | Jens Axboe <axboe@fb.com> | 2017-02-17 14:06:45 -0700 |
|---|---|---|
| committer | Jens Axboe <axboe@fb.com> | 2017-02-17 14:06:45 -0700 |
| commit | 6010720da8aab51f33beee63b73cf88016e9b250 (patch) | |
| tree | a4c5a7f645998e86a1f49cb05f8e0c4e51448294 | |
| parent | 2fe1e8a7b2f4dcac3fcb07ff06b0ae7396201fd6 (diff) | |
| parent | 8a9ae523282f324989850fcf41312b42a2fb9296 (diff) | |
Merge branch 'for-4.11/block' into for-4.11/linus-merge
Signed-off-by: Jens Axboe <axboe@fb.com>
68 files changed, 7340 insertions, 2832 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex index c06233fe52ac..8f85b0e41046 100644 --- a/Documentation/cdrom/cdrom-standard.tex +++ b/Documentation/cdrom/cdrom-standard.tex @@ -249,7 +249,6 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr unsigned\ long);\cr \noalign{\medskip} &const\ int& capability;& capability flags \cr - &int& n_minors;& number of active minor devices \cr \};\cr } $$ @@ -258,13 +257,7 @@ it should add a function pointer to this $struct$. When a particular function is not implemented, however, this $struct$ should contain a NULL instead. The $capability$ flags specify the capabilities of the \cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive -is registered with the \UCD. The value $n_minors$ should be a positive -value indicating the number of minor devices that are supported by -the low-level device driver, normally~1. Although these two variables -are `informative' rather than `operational,' they are included in -$cdrom_device_ops$ because they describe the capability of the {\em -driver\/} rather than the {\em drive}. Nomenclature has always been -difficult in computer programming. +is registered with the \UCD. Note that most functions have fewer parameters than their $blkdev_fops$ counterparts. This is because very little of the diff --git a/MAINTAINERS b/MAINTAINERS index 527d13759ecc..864e1fd31f0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8612,10 +8612,10 @@ S: Maintained F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) -M: Markus Pargmann <mpa@pengutronix.de> +M: Josef Bacik <jbacik@fb.com> S: Maintained +L: linux-block@vger.kernel.org L: nbd-general@lists.sourceforge.net -T: git git://git.pengutronix.de/git/mpa/linux-nbd.git F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c F: include/uapi/linux/nbd.h @@ -11089,6 +11089,17 @@ L: linux-mmc@vger.kernel.org S: Maintained F: drivers/mmc/host/sdhci-spear.c +SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER +M: Scott Bauer <scott.bauer@intel.com> +M: Jonathan Derrick <jonathan.derrick@intel.com> +M: Rafael Antognolli <rafael.antognolli@intel.com> +L: linux-block@vger.kernel.org +S: Supported +F: block/sed* +F: block/opal_proto.h +F: include/linux/sed* +F: include/uapi/linux/sed* + SECURITY SUBSYSTEM M: James Morris <james.l.morris@oracle.com> M: "Serge E. Hallyn" <serge@hallyn.com> diff --git a/block/Kconfig b/block/Kconfig index 8bf114a3858a..1aef809affae 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -147,6 +147,25 @@ config BLK_WBT_MQ Multiqueue currently doesn't have support for IO scheduling, enabling this option is recommended. +config BLK_DEBUG_FS + bool "Block layer debugging information in debugfs" + default y + depends on DEBUG_FS + ---help--- + Include block layer debugging information in debugfs. This information + is mostly useful for kernel developers, but it doesn't incur any cost + at runtime. + + Unless you are building a kernel for a tiny system, you should + say Y here. + +config BLK_SED_OPAL + bool "Logic for interfacing with Opal enabled SEDs" + ---help--- + Builds Logic for interfacing with Opal enabled controllers. + Enabling this option enables users to setup/unlock/lock + Locking ranges for SED devices using the Opal protocol. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 421bef9c4c48..0715ce93daef 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -63,6 +63,56 @@ config DEFAULT_IOSCHED default "cfq" if DEFAULT_CFQ default "noop" if DEFAULT_NOOP +config MQ_IOSCHED_DEADLINE + tristate "MQ deadline I/O scheduler" + default y + ---help--- + MQ version of the deadline IO scheduler. + +config MQ_IOSCHED_NONE + bool + default y + +choice + prompt "Default single-queue blk-mq I/O scheduler" + default DEFAULT_SQ_NONE + help + Select the I/O scheduler which will be used by default for blk-mq + managed block devices with a single queue. + + config DEFAULT_SQ_DEADLINE + bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y + + config DEFAULT_SQ_NONE + bool "None" + +endchoice + +config DEFAULT_SQ_IOSCHED + string + default "mq-deadline" if DEFAULT_SQ_DEADLINE + default "none" if DEFAULT_SQ_NONE + +choice + prompt "Default multi-queue blk-mq I/O scheduler" + default DEFAULT_MQ_NONE + help + Select the I/O scheduler which will be used by default for blk-mq + managed block devices with multiple queues. + + config DEFAULT_MQ_DEADLINE + bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y + + config DEFAULT_MQ_NONE + bool "None" + +endchoice + +config DEFAULT_MQ_IOSCHED + string + default "mq-deadline" if DEFAULT_MQ_DEADLINE + default "none" if DEFAULT_MQ_NONE + endmenu endif diff --git a/block/Makefile b/block/Makefile index a827f988c4e6..6ba1b1bc9529 100644 --- a/block/Makefile +++ b/block/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ - blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ + blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ @@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o @@ -25,3 +26,5 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o +obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o +obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o diff --git a/block/bio.c b/block/bio.c index 2b375020fc49..d3c26d1cb1da 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1403,7 +1403,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, bio_set_flag(bio, BIO_USER_MAPPED); /* - * subtle -- if __bio_map_user() ended up bouncing a bio, + * subtle -- if bio_map_user_iov() ended up bouncing a bio, * it would normally disappear when its bi_end_io is run. * however, we need it for the unmap, so grab an extra * reference to it @@ -1445,8 +1445,8 @@ static void __bio_unmap_user(struct bio *bio) * bio_unmap_user - unmap a bio * @bio: the bio being unmapped * - * Unmap a bio previously mapped by bio_map_user(). Must be called with - * a process context. + * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from + * process context. * * bio_unmap_user() may sleep. */ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8ba0af780e88..fb59a3edc778 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q, if (blkcg_policy_enabled(q, pol)) return 0; - blk_queue_bypass_start(q); + if (q->mq_ops) + blk_mq_freeze_queue(q); + else + blk_queue_bypass_start(q); pd_prealloc: if (!pd_prealloc) { pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); @@ -1261,7 +1264,10 @@ pd_prealloc: spin_unlock_irq(q->queue_lock); out_bypass_end: - blk_queue_bypass_end(q); + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; @@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q, if (!blkcg_policy_enabled(q, pol)) return; - blk_queue_bypass_start(q); + if (q->mq_ops) + blk_mq_freeze_queue(q); + else + blk_queue_bypass_start(q); + spin_lock_irq(q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); @@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q, } spin_unlock_irq(q->queue_lock); - blk_queue_bypass_end(q); + + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 61ba08c58b64..b2df55a65250 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-sched.h" #include "blk-wbt.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); @@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->cmd = rq->__cmd; rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; + rq->internal_tag = -1; rq->start_time = jiffies; set_start_time_ns(rq); rq->part = NULL; @@ -525,12 +527,14 @@ void blk_set_queue_dying(struct request_queue *q) else { struct request_list *rl; + spin_lock_irq(q->queue_lock); blk_queue_for_each_rl(rl, q) { if (rl->rq_pool) { wake_up(&rl->wait[BLK_RW_SYNC]); wake_up(&rl->wait[BLK_RW_ASYNC]); } } + spin_unlock_irq(q->queue_lock); } } EXPORT_SYMBOL_GPL(blk_set_queue_dying); @@ -1033,29 +1037,13 @@ static bool blk_rq_should_init_elevator(struct bio *bio) * Flush requests do not use the elevator so skip initialization. * This allows a request to share the flush and elevator data. */ - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) + if (op_is_flush(bio->bi_opf)) return false; return true; } /** - * rq_ioc - determine io_context for request allocation - * @bio: request being allocated is for this bio (can be %NULL) - * - * Determine io_context to use for request allocation for @bio. May return - * %NULL if %current->io_context doesn't exist. - */ -static struct io_context *rq_ioc(struct bio *bio) -{ -#ifdef CONFIG_BLK_CGROUP - if (bio && bio->bi_ioc) - return bio->bi_ioc; -#endif - return current->io_context; -} - -/** * __get_request - get a free request * @rl: request list to allocate from * @op: operation and flags @@ -1655,7 +1643,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { + if (op_is_flush(bio->bi_opf)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; @@ -1894,7 +1882,7 @@ generic_make_request_checks(struct bio *bio) * drivers without flush support don't have to worry * about them. */ - if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && + if (op_is_flush(bio->bi_opf) && !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!nr_sectors) { @@ -2143,7 +2131,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); - blk_mq_insert_request(rq, false, true, false); + blk_mq_sched_insert_request(rq, false, true, false, false); return 0; } @@ -2159,7 +2147,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) + if (op_is_flush(rq->cmd_flags)) where = ELEVATOR_INSERT_FLUSH; add_acct_request(q, rq, where); @@ -3270,7 +3258,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) /* * rq is already accounted, so use raw insert */ - if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) + if (op_is_flush(rq->cmd_flags)) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); diff --git a/block/blk-exec.c b/block/blk-exec.c index 3ecb00a6cf45..ed1f10165268 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -9,6 +9,7 @@ #include <linux/sched/sysctl.h> #include "blk.h" +#include "blk-mq-sched.h" /* * for max sense size @@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be reused after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(rq, at_head, true, false); + blk_mq_sched_insert_request(rq, at_head, true, false, false); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index 20b7c7a02f1c..4427896641ac 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -74,6 +74,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-mq-sched.h" /* FLUSH/FUA sequences */ enum { @@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) - blk_mq_run_hw_queue(hctx, true); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + + blk_mq_run_hw_queue(hctx, true); } /** @@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - if (q->mq_ops) { - blk_mq_insert_request(rq, false, true, false); - } else + if (q->mq_ops) + blk_mq_sched_insert_request(rq, false, true, false, false); + else list_add_tail(&rq->queuelist, &q->queue_head); return; } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 381cb50a673c..fe186a9eade9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->ops.elevator_exit_icq_fn) - et->ops.elevator_exit_icq_fn(icq); + if (et->uses_mq && et->ops.mq.exit_icq) + et->ops.mq.exit_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn) + et->ops.sq.elevator_exit_icq_fn(icq); icq->flags |= ICQ_EXITED; } @@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->ops.elevator_init_icq_fn) - et->ops.elevator_init_icq_fn(icq); + if (et->uses_mq && et->ops.mq.init_icq) + et->ops.mq.init_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn) + et->ops.sq.elevator_init_icq_fn(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(ioc, q); diff --git a/block/blk-merge.c b/block/blk-merge.c index 182398cb1524..6aa43dec5af4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_allow_rq_merge_fn) - if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) + if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) + if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0; return attempt_merge(q, rq, next); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c new file mode 100644 index 000000000000..5cd2b435a9f5 --- /dev/null +++ b/block/blk-mq-debugfs.c @@ -0,0 +1,756 @@ +/* + * Copyright (C) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/debugfs.h> + +#include <linux/blk-mq.h> +#include "blk-mq.h" +#include "blk-mq-tag.h" + +struct blk_mq_debugfs_attr { + const char *name; + umode_t mode; + const struct file_operations *fops; +}; + +static struct dentry *block_debugfs_root; + +static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, ops); + if (!ret) { + m = file->private_data; + m->private = inode->i_private; + } + return ret; +} + +static int hctx_state_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "0x%lx\n", hctx->state); + return 0; +} + +static int hctx_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_state_show, inode->i_private); +} + +static const struct file_operations hctx_state_fops = { + .open = hctx_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_flags_show(struct seq_file *m, void |
