blob: e177da87582f97a1db635f00b7b7b53f3300ffa4 [file] [log] [blame] [edit]
// SPDX-License-Identifier: MIT or GPL-2.0-only
#include <config.h>
#include <poll.h>
#include <sys/epoll.h>
#include "ublksrv_tgt.h"
static bool user_copy;
static bool block_device;
static bool backing_supports_discard(char *name)
{
int fd;
char buf[512];
int len;
len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
basename(name));
buf[len] = 0;
fd = open(buf, O_RDONLY);
if (fd > 0) {
char val[128];
int ret = pread(fd, val, 128, 0);
unsigned long long bytes = 0;
close(fd);
if (ret > 0)
bytes = strtol(val, NULL, 10);
if (bytes > 0)
return true;
}
return false;
}
static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
const char *jbuf)
{
struct ublksrv_tgt_info *tgt = &dev->tgt;
const struct ublksrv_ctrl_dev_info *info =
ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
int fd, ret;
long direct_io = 0;
struct ublk_params p;
char file[PATH_MAX];
ublk_assert(jbuf);
ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
if (ret < 0) {
ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
__func__, ret);
return ret;
}
ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
&direct_io);
if (ret) {
ublk_err( "%s: read target direct_io failed %d\n",
__func__, ret);
return ret;
}
ret = ublksrv_json_read_params(&p, jbuf);
if (ret) {
ublk_err( "%s: read ublk params failed %d\n",
__func__, ret);
return ret;
}
fd = open(file, O_RDWR);
if (fd < 0) {
ublk_err( "%s: backing file %s can't be opened\n",
__func__, file);
return fd;
}
if (direct_io)
fcntl(fd, F_SETFL, O_DIRECT);
ublksrv_tgt_set_io_data_size(tgt);
tgt->dev_size = p.basic.dev_sectors << 9;
tgt->tgt_ring_depth = info->queue_depth;
tgt->nr_fds = 1;
tgt->fds[1] = fd;
user_copy = info->flags & UBLK_F_USER_COPY;
if (user_copy)
tgt->tgt_ring_depth *= 2;
return 0;
}
static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
{
const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
const struct ublksrv_ctrl_dev_info *info =
ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
return loop_setup_tgt(dev, type, true, jbuf);
}
static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
*argv[])
{
int buffered_io = 0;
const struct ublksrv_ctrl_dev_info *info =
ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
static const struct option lo_longopts[] = {
{ "file", 1, NULL, 'f' },
{ "buffered_io", no_argument, &buffered_io, 1},
{ NULL }
};
unsigned long long bytes;
struct stat st;
int fd, opt;
char *file = NULL;
int jbuf_size;
char *jbuf;
struct ublksrv_tgt_base_json tgt_json = {
.type = type,
};
struct ublk_params p = {
.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
.basic = {
.logical_bs_shift = 9,
.physical_bs_shift = 12,
.io_opt_shift = 12,
.io_min_shift = 9,
.max_sectors = info->max_io_buf_bytes >> 9,
},
.discard = {
.max_discard_sectors = UINT_MAX >> 9,
.max_discard_segments = 1,
},
};
bool can_discard = false;
strcpy(tgt_json.name, "loop");
if (type != UBLKSRV_TGT_TYPE_LOOP)
return -1;
while ((opt = getopt_long(argc, argv, "-:f:",
lo_longopts, NULL)) != -1) {
switch (opt) {
case 'f':
file = strdup(optarg);
break;
}
}
if (!file)
return -1;
fd = open(file, O_RDWR);
if (fd < 0) {
ublk_err( "%s: backing file %s can't be opened\n",
__func__, file);
return -2;
}
if (fstat(fd, &st) < 0)
return -2;
if (S_ISBLK(st.st_mode)) {
unsigned int bs, pbs;
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
return -1;
if (ioctl(fd, BLKSSZGET, &bs) != 0)
return -1;
if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
return -1;
block_device = true;
p.basic.logical_bs_shift = ilog2(bs);
p.basic.physical_bs_shift = ilog2(pbs);
can_discard = backing_supports_discard(file);
} else if (S_ISREG(st.st_mode)) {
block_device = false;
bytes = st.st_size;
can_discard = true;
p.basic.logical_bs_shift = ilog2(st.st_blksize);
p.basic.physical_bs_shift = ilog2(st.st_blksize);
} else {
bytes = 0;
}
/*
* in case of buffered io, use common bs/pbs so that all FS
* image can be supported
*/
if (buffered_io || !ublk_param_is_valid(&p) ||
fcntl(fd, F_SETFL, O_DIRECT)) {
p.basic.logical_bs_shift = 9;
p.basic.physical_bs_shift = 12;
buffered_io = 1;
}
tgt_json.dev_size = bytes;
p.basic.dev_sectors = bytes >> 9;
if (st.st_blksize && can_discard)
p.discard.discard_granularity = st.st_blksize;
else
p.types &= ~UBLK_PARAM_TYPE_DISCARD;
jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
close(fd);
return loop_setup_tgt(dev, type, false, jbuf);
}
static void loop_usage_for_add(void)
{
printf(" loop: -f backing_file [--buffered_io]\n");
printf(" default is direct IO to backing file\n");
}
static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
{
__u16 ublk_op = ublksrv_get_op(iod);
__u32 flags = ublksrv_get_flags(iod);
int mode = FALLOC_FL_KEEP_SIZE;
/* follow logic of linux kernel loop */
if (ublk_op == UBLK_IO_OP_DISCARD) {
mode |= FALLOC_FL_PUNCH_HOLE;
} else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
if (flags & UBLK_IO_F_NOUNMAP)
mode |= FALLOC_FL_ZERO_RANGE;
else
mode |= FALLOC_FL_PUNCH_HOLE;
} else {
mode |= FALLOC_FL_ZERO_RANGE;
}
return mode;
}
static void loop_queue_tgt_read(const struct ublksrv_queue *q,
const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
if (user_copy) {
struct io_uring_sqe *sqe, *sqe2;
__u64 pos = ublk_pos(q->q_id, tag, 0);
void *buf = ublksrv_queue_get_io_buf(q, tag);
ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
io_uring_prep_read(sqe, 1 /*fds[1]*/,
buf,
iod->nr_sectors << 9,
iod->start_sector << 9);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
io_uring_prep_write(sqe2, 0 /*fds[0]*/,
buf, iod->nr_sectors << 9, pos);
io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
} else {
struct io_uring_sqe *sqe;
void *buf = (void *)iod->addr;
ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
io_uring_prep_read(sqe, 1 /*fds[1]*/,
buf,
iod->nr_sectors << 9,
iod->start_sector << 9);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
}
}
static void loop_queue_tgt_write(const struct ublksrv_queue *q,
const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
if (user_copy) {
struct io_uring_sqe *sqe, *sqe2;
__u64 pos = ublk_pos(q->q_id, tag, 0);
void *buf = ublksrv_queue_get_io_buf(q, tag);
ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
io_uring_prep_read(sqe, 0 /*fds[0]*/,
buf, iod->nr_sectors << 9, pos);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
io_uring_prep_write(sqe2, 1 /*fds[1]*/,
buf, iod->nr_sectors << 9,
iod->start_sector << 9);
io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
} else {
struct io_uring_sqe *sqe;
void *buf = (void *)iod->addr;
ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
io_uring_prep_write(sqe, 1 /*fds[1]*/,
buf,
iod->nr_sectors << 9,
iod->start_sector << 9);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
}
}
static int loop_queue_tgt_io(const struct ublksrv_queue *q,
const struct ublk_io_data *data, int tag)
{
const struct ublksrv_io_desc *iod = data->iod;
struct io_uring_sqe *sqe;
unsigned ublk_op = ublksrv_get_op(iod);
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
iod->nr_sectors << 9,
iod->start_sector << 9,
IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
loop_fallocate_mode(iod),
iod->start_sector << 9,
iod->nr_sectors << 9);
io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
break;
case UBLK_IO_OP_READ:
loop_queue_tgt_read(q, iod, tag);
break;
case UBLK_IO_OP_WRITE:
loop_queue_tgt_write(q, iod, tag);
break;
default:
return -EINVAL;
}
ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
return 1;
}
static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
const struct ublk_io_data *data, int tag)
{
int ret;
struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
io->queued_tgt_io = 0;
again:
ret = loop_queue_tgt_io(q, data, tag);
if (ret > 0) {
if (io->queued_tgt_io)
ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
io->queued_tgt_io += 1;
co_await__suspend_always(tag);
io->queued_tgt_io -= 1;
if (io->tgt_io_cqe->res == -EAGAIN)
goto again;
ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
} else if (ret < 0) {
ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
} else {
ublk_err( "no sqe %d\n", tag);
}
}
static int loop_handle_io_async(const struct ublksrv_queue *q,
const struct ublk_io_data *data)
{
struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
__u64 r[2];
int res;
io_uring_submit(q->ring_ptr);
r[0] = data->iod->start_sector << 9;
r[1] = data->iod->nr_sectors << 9;
res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
ublksrv_complete_io(q, data->tag, res);
} else {
io->co = __loop_handle_io_async(q, data, data->tag);
}
return 0;
}
static void loop_tgt_io_done(const struct ublksrv_queue *q,
const struct ublk_io_data *data,
const struct io_uring_cqe *cqe)
{
int tag = user_data_to_tag(cqe->user_data);
struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
if (user_data_to_tgt_data(cqe->user_data))
return;
ublk_assert(tag == data->tag);
if (!io->queued_tgt_io)
ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
__func__, cqe->res, q->q_id,
user_data_to_tag(cqe->user_data),
user_data_to_op(cqe->user_data));
io->tgt_io_cqe = cqe;
io->co.resume();
}
static void loop_deinit_tgt(const struct ublksrv_dev *dev)
{
fsync(dev->tgt.fds[1]);
close(dev->tgt.fds[1]);
}
struct ublksrv_tgt_type loop_tgt_type = {
.handle_io_async = loop_handle_io_async,
.tgt_io_done = loop_tgt_io_done,
.usage_for_add = loop_usage_for_add,
.init_tgt = loop_init_tgt,
.deinit_tgt = loop_deinit_tgt,
.type = UBLKSRV_TGT_TYPE_LOOP,
.name = "loop",
.recovery_tgt = loop_recovery_tgt,
};
static void tgt_loop_init() __attribute__((constructor));
static void tgt_loop_init(void)
{
ublksrv_register_tgt_type(&loop_tgt_type);
}