From 7d121d701d58a92f26decb10da1d04a88b74519d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 6 Jan 2026 06:26:57 -0800 Subject: blk-rq-qos: Remove unlikely() hints from QoS checks The unlikely() annotations on QUEUE_FLAG_QOS_ENABLED checks are counterproductive. Writeback throttling (WBT) might be enabled by default, mainly because CONFIG_BLK_WBT_MQ defaults to 'y'. Branch profiling on Meta servers, which have WBT enabled, confirms 100% misprediction rates on these checks. Remove the unlikely() annotations to let the CPU's branch predictor learn the actual behavior, potentially improving I/O path performance. Signed-off-by: Breno Leitao Signed-off-by: Jens Axboe --- block/blk-rq-qos.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index b538f2c0febc..a747a504fe42 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -112,29 +112,26 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos); static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) __rq_qos_cleanup(q->rq_qos, bio); } static inline void rq_qos_done(struct request_queue *q, struct request *rq) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos && !blk_rq_is_passthrough(rq)) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && + q->rq_qos && !blk_rq_is_passthrough(rq)) __rq_qos_done(q->rq_qos, rq); } static inline void rq_qos_issue(struct request_queue *q, struct request *rq) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) __rq_qos_issue(q->rq_qos, rq); } static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) __rq_qos_requeue(q->rq_qos, rq); } @@ -162,8 +159,7 @@ static inline void rq_qos_done_bio(struct bio *bio) static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) { + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) { bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } @@ -172,16 +168,14 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) static inline void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) __rq_qos_track(q->rq_qos, rq, bio); } static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) { + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) { bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); } @@ -189,8 +183,7 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq, static inline void rq_qos_queue_depth_changed(struct request_queue *q) { - if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && - q->rq_qos) + if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos) __rq_qos_queue_depth_changed(q->rq_qos); } -- cgit v1.2.3 From 6acd4ac5f8f0ec9b946875553e52907700bcfc77 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 6 Jan 2026 13:08:37 -0700 Subject: block: don't merge bios with different app_tags nvme_set_app_tag() uses the app_tag value from the bio_integrity_payload of the struct request's first bio. This assumes all the request's bios have the same app_tag. However, it is possible for bios with different app_tag values to be merged into a single request. Add a check in blk_integrity_merge_{bio,rq}() to prevent the merging of bios/requests with different app_tag values if BIP_CHECK_APPTAG is set. Signed-off-by: Caleb Sander Mateos Fixes: 3d8b5a22d404 ("block: add support to pass user meta buffer") Signed-off-by: Jens Axboe --- block/blk-integrity.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 9b27963680dc..964eebbee14d 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -140,14 +140,21 @@ EXPORT_SYMBOL_GPL(blk_rq_integrity_map_user); bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, struct request *next) { + struct bio_integrity_payload *bip, *bip_next; + if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0) return true; if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0) return false; - if (bio_integrity(req->bio)->bip_flags != - bio_integrity(next->bio)->bip_flags) + bip = bio_integrity(req->bio); + bip_next = bio_integrity(next->bio); + if (bip->bip_flags != bip_next->bip_flags) + return false; + + if (bip->bip_flags & BIP_CHECK_APPTAG && + bip->app_tag != bip_next->app_tag) return false; if (req->nr_integrity_segments + next->nr_integrity_segments > @@ -163,15 +170,21 @@ bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, struct bio *bio) { + struct bio_integrity_payload *bip, *bip_bio = bio_integrity(bio); int nr_integrity_segs; - if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL) + if (blk_integrity_rq(req) == 0 && bip_bio == NULL) return true; - if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL) + if (blk_integrity_rq(req) == 0 || bip_bio == NULL) + return false; + + bip = bio_integrity(req->bio); + if (bip->bip_flags != bip_bio->bip_flags) return false; - if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags) + if (bip->bip_flags & BIP_CHECK_APPTAG && + bip->app_tag != bip_bio->app_tag) return false; nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); -- cgit v1.2.3 From 9670db22e7ab4aefe2b2619589a47fef9d3e0c7e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 6 Jan 2026 16:56:07 +0100 Subject: blk-mq: avoid stall during boot due to synchronize_rcu_expedited On the kernel 6.19-rc, I am experiencing 15-second boot stall in a virtual machine when probing a virtio-scsi disk: [ 1.011641] SCSI subsystem initialized [ 1.013972] virtio_scsi virtio6: 16/0/0 default/read/poll queues [ 1.015983] scsi host0: Virtio SCSI HBA [ 1.019578] ACPI: \_SB_.GSIA: Enabled at IRQ 16 [ 1.020225] ahci 0000:00:1f.2: AHCI vers 0001.0000, 32 command slots, 1.5 Gbps, SATA mode [ 1.020228] ahci 0000:00:1f.2: 6/6 ports implemented (port mask 0x3f) [ 1.020230] ahci 0000:00:1f.2: flags: 64bit ncq only [ 1.024688] scsi host1: ahci [ 1.025432] scsi host2: ahci [ 1.025966] scsi host3: ahci [ 1.026511] scsi host4: ahci [ 1.028371] scsi host5: ahci [ 1.028918] scsi host6: ahci [ 1.029266] ata1: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23100 irq 16 lpm-pol 1 [ 1.029305] ata2: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23180 irq 16 lpm-pol 1 [ 1.029316] ata3: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23200 irq 16 lpm-pol 1 [ 1.029327] ata4: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23280 irq 16 lpm-pol 1 [ 1.029341] ata5: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23300 irq 16 lpm-pol 1 [ 1.029356] ata6: SATA max UDMA/133 abar m4096@0xfea23000 port 0xfea23380 irq 16 lpm-pol 1 [ 1.118111] scsi 0:0:0:0: Direct-Access QEMU QEMU HARDDISK 2.5+ PQ: 0 ANSI: 5 [ 1.348916] ata1: SATA link down (SStatus 0 SControl 300) [ 1.350713] ata2: SATA link down (SStatus 0 SControl 300) [ 1.351025] ata6: SATA link down (SStatus 0 SControl 300) [ 1.351160] ata5: SATA link down (SStatus 0 SControl 300) [ 1.351326] ata3: SATA link down (SStatus 0 SControl 300) [ 1.351536] ata4: SATA link down (SStatus 0 SControl 300) [ 1.449153] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input2 [ 16.483477] sd 0:0:0:0: Power-on or device reset occurred [ 16.483691] sd 0:0:0:0: [sda] 2097152 512-byte logical blocks: (1.07 GB/1.00 GiB) [ 16.483762] sd 0:0:0:0: [sda] Write Protect is off [ 16.483877] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA [ 16.569225] sd 0:0:0:0: [sda] Attached SCSI disk I bisected it and it is caused by the commit 89e1fb7ceffd which introduces calls to synchronize_rcu_expedited. This commit replaces synchronize_rcu_expedited and kfree with a call to kfree_rcu_mightsleep, avoiding the 15-second delay. Signed-off-by: Mikulas Patocka Fixes: 89e1fb7ceffd ("blk-mq: fix potential uaf for 'queue_hw_ctx'") Reviewed-by: Uladzislau Rezki (Sony) Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index eff4f72ce83b..a29d8ac9d3e3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4553,8 +4553,7 @@ static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, * Make sure reading the old queue_hw_ctx from other * context concurrently won't trigger uaf. */ - synchronize_rcu_expedited(); - kfree(hctxs); + kfree_rcu_mightsleep(hctxs); hctxs = new_hctxs; } -- cgit v1.2.3