// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016-2025 Christoph Hellwig.
*/
#include <linux/bio-integrity.h>
#include <linux/blk-crypto.h>
#include <linux/fscrypt.h>
#include <linux/pagemap.h>
#include <linux/iomap.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/fserror.h>
#include "internal.h"
#include "trace.h"
#include "../internal.h"
/*
* Private flags for iomap_dio, must not overlap with the public ones in
* iomap.h:
*/
#define IOMAP_DIO_NO_INVALIDATE (1U << 26)
#define IOMAP_DIO_COMP_WORK (1U << 27)
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
#define IOMAP_DIO_NEED_SYNC (1U << 29)
#define IOMAP_DIO_WRITE (1U << 30)
#define IOMAP_DIO_USER_BACKED (1U << 31)
struct iomap_dio {
struct kiocb *iocb;
const struct iomap_dio_ops *dops;
loff_t i_size;
loff_t size;
atomic_t ref;
unsigned flags;
int error;
size_t done_before;
bool wait_for_completion;
union {
/* used during submission and for synchronous completion: */
struct {
struct iov_iter *iter;
struct task_struct *waiter;
} submit;
/* used for aio completion: */
struct {
struct work_struct work;
} aio;
};
};
static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf)
{
if (dio->dops && dio->dops->bio_set)
return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf,
GFP_KERNEL, dio->dops->bio_set);
return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL);
}
static void iomap_dio_submit_bio(const struct iomap_iter *iter,
struct iomap_dio *dio, struct bio *bio, loff_t pos)
{
struct kiocb *iocb = dio->iocb;
atomic_inc(&dio->ref);
/* Sync dio can't be polled reliably */
if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
bio_set_polled(bio, iocb);
WRITE_ONCE(iocb->private, bio);
}
if (dio->dops && dio->dops->submit_io) {
dio->dops->submit_io(iter, bio, pos);
} else {
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE);
blk_crypto_submit_bio(bio);
}
}
static inline enum fserror_type iomap_dio_err_type(const struct iomap_dio *dio)
{
if (dio->flags & IOMAP_DIO_WRITE)
return FSERR_DIRECTIO_WRITE;
return FSERR_DIRECTIO_READ;
}
static inline bool should_report_dio_fserror(const struct iomap_dio *dio)
{
switch (dio->error) {
case 0:
case -EAGAIN:
case -ENOTBLK:
/* don't send fsnotify for success or magic retry codes */
return false;
default:
return true;
}
}
ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
loff_t offset = iocb->ki_pos;
ssize_t ret = dio->error;
if (dops && dops->end_io)
ret = dops->end_io(iocb, dio->size, ret, dio->flags);
if (should_report_dio_fserror(dio))
fserror_report_io(file_inode(iocb->ki_filp),
iomap_dio_err_type(dio), offset, dio->size,
dio->error, GFP_NOFS);
if (likely(!ret)) {
ret = dio->size;
/* check for short read */
if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - offset;
}
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*
* And this page cache invalidation has to be after ->end_io(), as some
* filesystems convert unwritten extents to real allocations in
* ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) &&
!(dio->flags & IOMAP_DIO_NO_INVALIDATE))
kiocb_invalidate_post_direct_write(iocb, dio->size);
inode_dio_end(file_inode(iocb->ki_filp));
if (ret > 0) {
iocb->ki_pos += ret;
/*
* If this is a DSYNC write, make sure we push it to stable
* storage now that we've written data.
*/
if (dio->flags & IOMAP_DIO_NEED_SYNC)
ret = generic_write_sync(iocb, ret);
if (ret > 0)
ret += dio->done_before;
}
trace_iomap_dio_complete(iocb, dio->error, ret);
kfree(dio);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
static void