| // SPDX-License-Identifier: MIT or GPL-2.0-only |
| |
| #include <config.h> |
| |
| #include <poll.h> |
| #include <sys/epoll.h> |
| #include "ublksrv_aio.h" |
| #include "ublksrv_tgt.h" |
| |
| static struct ublksrv_aio_ctx *aio_ctx = NULL; |
| static pthread_t io_thread; |
| |
| static bool loop_is_sync_io(struct ublksrv_queue *q, |
| const struct ublk_io *io, int tag) |
| { |
| const struct ublksrv_io_desc *iod = ublksrv_get_iod(q, tag); |
| unsigned ublk_op = ublksrv_get_op(iod); |
| |
| switch (ublk_op) { |
| case UBLK_IO_OP_FLUSH: |
| case UBLK_IO_OP_WRITE_ZEROES: |
| case UBLK_IO_OP_DISCARD: |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static int loop_sync_io_submitter(struct ublksrv_aio_ctx *ctx, |
| struct ublksrv_aio *req) |
| { |
| const struct ublksrv_io_desc *iod = &req->io; |
| unsigned ublk_op = ublksrv_get_op(iod); |
| void *buf = (void *)iod->addr; |
| unsigned len = iod->nr_sectors << 9; |
| unsigned long long offset = iod->start_sector << 9; |
| int mode = FALLOC_FL_KEEP_SIZE; |
| int ret; |
| |
| switch (ublk_op) { |
| case UBLK_IO_OP_FLUSH: |
| ret = fdatasync(req->fd); |
| break; |
| case UBLK_IO_OP_WRITE_ZEROES: |
| mode |= FALLOC_FL_ZERO_RANGE; |
| case UBLK_IO_OP_DISCARD: |
| mode |= FALLOC_FL_PUNCH_HOLE; |
| ret = fallocate(req->fd, mode, offset, len); |
| break; |
| case UBLK_IO_OP_READ: |
| case UBLK_IO_OP_WRITE: |
| default: |
| ublksrv_log(LOG_ERR, "%s: wrong op %d, fd %d, id %x\n", |
| __func__, ublk_op, req->fd, req->id); |
| return -EINVAL; |
| } |
| ublksrv_log(LOG_INFO, "%s: op %d, fd %d, id %x, off %llx len %u res %d %s\n", |
| __func__, ublk_op, req->fd, req->id, offset, len, ret, |
| strerror(errno)); |
| exit: |
| req->res = ret; |
| return 1; |
| } |
| |
| #define EPOLL_NR_EVENTS 1 |
| static void *loop_sync_io_handler_fn(void *data) |
| { |
| struct ublksrv_aio_ctx *ctx = (struct ublksrv_aio_ctx *)data; |
| |
| unsigned dev_id = ctx->dev->ctrl_dev->dev_info.dev_id; |
| struct epoll_event events[EPOLL_NR_EVENTS]; |
| int epoll_fd = epoll_create(EPOLL_NR_EVENTS); |
| struct epoll_event read_event; |
| int ret; |
| |
| if (epoll_fd < 0) { |
| syslog(LOG_ERR, "ublk dev %d create epoll fd failed\n", dev_id); |
| return NULL; |
| } |
| |
| ublksrv_log(LOG_INFO, "ublk dev %d aio context(sync io submitter) started tid %d\n", |
| dev_id, gettid()); |
| |
| read_event.events = EPOLLIN; |
| read_event.data.fd = ctx->efd; |
| ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, ctx->efd, &read_event); |
| |
| while (!ublksrv_aio_ctx_dead(ctx)) { |
| struct aio_list list; |
| |
| aio_list_init(&list); |
| |
| ublksrv_aio_submit_worker(ctx, loop_sync_io_submitter, &list); |
| |
| ublksrv_aio_complete_worker(ctx, &list); |
| |
| epoll_wait(epoll_fd, events, EPOLL_NR_EVENTS, -1); |
| } |
| |
| return NULL; |
| } |
| |
| static const char *loop_tgt_backfile(struct ublksrv_tgt_info *tgt) |
| { |
| return (const char *)tgt->tgt_data; |
| } |
| |
| static bool backing_supports_discard(char *name) |
| { |
| int fd; |
| char buf[512]; |
| int len; |
| |
| len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes", |
| basename(name)); |
| buf[len] = 0; |
| fd = open(buf, O_RDONLY); |
| if (fd > 0) { |
| char val[128]; |
| int ret = pread(fd, val, 128, 0); |
| unsigned long long bytes = 0; |
| |
| close(fd); |
| if (ret > 0) |
| bytes = strtol(val, NULL, 10); |
| |
| if (bytes > 0) |
| return true; |
| } |
| return false; |
| } |
| |
| static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char |
| *argv[]) |
| { |
| int buffered_io = 0; |
| struct ublksrv_tgt_info *tgt = &dev->tgt; |
| const struct ublksrv_ctrl_dev_info *info = &dev->ctrl_dev->dev_info; |
| static const struct option lo_longopts[] = { |
| { "file", 1, NULL, 'f' }, |
| { "buffered_io", no_argument, &buffered_io, 1}, |
| { NULL } |
| }; |
| unsigned long long bytes; |
| struct stat st; |
| int fd, opt; |
| char *file = NULL; |
| int jbuf_size, ret; |
| char *jbuf; |
| struct ublksrv_tgt_base_json tgt_json = { |
| .type = type, |
| }; |
| struct ublk_params p = { |
| .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD, |
| .basic = { |
| .logical_bs_shift = 9, |
| .physical_bs_shift = 12, |
| .io_opt_shift = 12, |
| .io_min_shift = 9, |
| .max_sectors = info->max_io_buf_bytes >> 9, |
| }, |
| |
| .discard = { |
| .max_discard_sectors = UINT_MAX >> 9, |
| .max_discard_segments = 1, |
| }, |
| }; |
| bool can_discard = false; |
| |
| strcpy(tgt_json.name, "loop"); |
| |
| if (type != UBLKSRV_TGT_TYPE_LOOP) |
| return -1; |
| |
| while ((opt = getopt_long(argc, argv, "-:f:", |
| lo_longopts, NULL)) != -1) { |
| switch (opt) { |
| case 'f': |
| file = strdup(optarg); |
| break; |
| } |
| } |
| |
| if (!file) |
| return -1; |
| |
| fd = open(file, O_RDWR); |
| if (fd < 0) { |
| syslog(LOG_ERR, "%s: backing file %s can't be opened\n", |
| __func__, file); |
| return -2; |
| } |
| |
| if (fstat(fd, &st) < 0) |
| return -2; |
| |
| if (S_ISBLK(st.st_mode)) { |
| unsigned int bs, pbs; |
| |
| if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) |
| return -1; |
| if (ioctl(fd, BLKSSZGET, &bs) != 0) |
| return -1; |
| if (ioctl(fd, BLKPBSZGET, &pbs) != 0) |
| return -1; |
| p.basic.logical_bs_shift = ilog2(bs); |
| p.basic.physical_bs_shift = ilog2(pbs); |
| can_discard = backing_supports_discard(file); |
| } else if (S_ISREG(st.st_mode)) { |
| bytes = st.st_size; |
| can_discard = true; |
| p.basic.logical_bs_shift = ilog2(st.st_blksize); |
| p.basic.physical_bs_shift = ilog2(st.st_blksize); |
| } else { |
| bytes = 0; |
| } |
| |
| /* |
| * in case of buffered io, use common bs/pbs so that all FS |
| * image can be supported |
| */ |
| if (buffered_io || fcntl(fd, F_SETFL, O_DIRECT)) { |
| p.basic.logical_bs_shift = 9; |
| p.basic.physical_bs_shift = 12; |
| buffered_io = 1; |
| } |
| |
| tgt->tgt_data = strdup(file); |
| tgt_json.dev_size = tgt->dev_size = bytes; |
| tgt->tgt_ring_depth = info->queue_depth; |
| tgt->nr_fds = 1; |
| tgt->fds[1] = fd; |
| p.basic.dev_sectors = bytes >> 9; |
| |
| if (st.st_blksize && can_discard) |
| p.discard.discard_granularity = st.st_blksize; |
| else |
| p.types &= ~UBLK_PARAM_TYPE_DISCARD; |
| |
| jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size); |
| ublksrv_json_write_dev_info(dev->ctrl_dev, jbuf, jbuf_size); |
| ublksrv_json_write_target_base_info(jbuf, jbuf_size, &tgt_json); |
| do { |
| ret = ublksrv_json_write_target_str_info(jbuf, jbuf_size, |
| "backing_file", file); |
| if (ret < 0) |
| jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size); |
| } while (ret < 0); |
| do { |
| ret = ublksrv_json_write_target_ulong_info(jbuf, jbuf_size, |
| "direct_io", !buffered_io); |
| if (ret < 0) |
| jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size); |
| } while (ret < 0); |
| |
| do { |
| ret = ublksrv_json_write_params(&p, jbuf, jbuf_size); |
| if (ret < 0) |
| jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size); |
| } while (ret < 0); |
| |
| aio_ctx = ublksrv_aio_ctx_init(dev, 0); |
| if (!aio_ctx) { |
| syslog(LOG_ERR, "dev %d call ublk_aio_ctx_init failed\n", |
| dev->ctrl_dev->dev_info.dev_id); |
| return -ENOMEM; |
| } |
| |
| if (pthread_create(&io_thread, NULL, loop_sync_io_handler_fn, |
| aio_ctx)) { |
| ublksrv_aio_ctx_deinit(aio_ctx); |
| aio_ctx = NULL; |
| } |
| |
| return 0; |
| } |
| |
| static void loop_usage_for_add(void) |
| { |
| printf(" loop: -f backing_file\n"); |
| } |
| |
| static int loop_queue_tgt_io(struct ublksrv_queue *q, struct ublk_io *io, |
| int tag) |
| { |
| const struct ublksrv_io_desc *iod = ublksrv_get_iod(q, tag); |
| unsigned io_op = ublksrv_convert_cmd_op(iod); |
| struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring); |
| |
| if (!sqe) |
| return 0; |
| |
| io_uring_prep_rw(io_op, sqe, 1, (void *)iod->addr, iod->nr_sectors << 9, |
| iod->start_sector << 9); |
| sqe->flags = IOSQE_FIXED_FILE; |
| /* bit63 marks us as tgt io */ |
| sqe->user_data = build_user_data(tag, io_op, 0, 1); |
| |
| q->tgt_io_inflight += 1; |
| |
| ublksrv_log(LOG_INFO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, |
| iod->op_flags, iod->start_sector, iod->nr_sectors << 9); |
| ublksrv_log(LOG_INFO, "%s: queue io op %d(%llu %llx %llx)" |
| " (qid %d tag %u, cmd_op %u target: %d, user_data %llx) iof %x\n", |
| __func__, io_op, sqe->off, sqe->len, sqe->addr, |
| q->q_id, tag, io_op, 1, sqe->user_data, io->flags); |
| |
| return 1; |
| } |
| |
| static co_io_job __loop_handle_io_async(struct ublksrv_queue *q, |
| struct ublk_io *io, int tag) |
| { |
| struct io_uring_cqe *cqe; |
| int ret; |
| |
| io->queued_tgt_io = 0; |
| again: |
| ret = loop_queue_tgt_io(q, io, tag); |
| if (ret) { |
| if (io->queued_tgt_io) |
| ublksrv_log(LOG_INFO, "bad queued_tgt_io %d\n", |
| io->queued_tgt_io); |
| io->queued_tgt_io += 1; |
| |
| co_io_job_submit_and_wait(); |
| io->queued_tgt_io -= 1; |
| |
| cqe = io->tgt_io_cqe; |
| if (cqe->res == -EAGAIN) |
| goto again; |
| |
| ublksrv_complete_io(q, tag, cqe->res); |
| } else { |
| ublksrv_log(LOG_INFO, "no sqe %d\n", tag); |
| } |
| } |
| |
| static int loop_handle_io_async(struct ublksrv_queue *q, int tag) |
| { |
| struct ublk_io_tgt *io = (struct ublk_io_tgt *)&q->ios[tag]; |
| bool sync = loop_is_sync_io(q, (struct ublk_io *)io, tag); |
| struct ublksrv_aio *req; |
| |
| if (!sync) { |
| io->co = __loop_handle_io_async(q, (struct ublk_io *)io, tag); |
| return 0; |
| } |
| |
| req = ublksrv_aio_alloc_req(aio_ctx, 0); |
| req->io = *ublksrv_get_iod(q, tag); |
| req->fd = q->dev->tgt.fds[1]; |
| req->id = ublksrv_aio_pid_tag(q->q_id, tag); |
| ublksrv_aio_submit_req(aio_ctx, q, req); |
| return 0; |
| } |
| |
| static void loop_tgt_io_done(struct ublksrv_queue *q, struct io_uring_cqe *cqe) |
| { |
| int tag = user_data_to_tag(cqe->user_data); |
| struct ublk_io *io = &q->ios[tag]; |
| |
| if (!io->queued_tgt_io) |
| syslog(LOG_WARNING, "%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n", |
| __func__, cqe->res, q->q_id, |
| user_data_to_tag(cqe->user_data), |
| user_data_to_op(cqe->user_data)); |
| io->tgt_io_cqe = cqe; |
| ((struct ublk_io_tgt *)io)->co.resume(); |
| } |
| |
| static void loop_deinit_tgt(struct ublksrv_dev *dev) |
| { |
| ublksrv_aio_ctx_shutdown(aio_ctx); |
| pthread_join(io_thread, NULL); |
| ublksrv_aio_ctx_deinit(aio_ctx); |
| |
| fsync(dev->tgt.fds[1]); |
| close(dev->tgt.fds[1]); |
| |
| free(dev->tgt.tgt_data); |
| } |
| |
| static void loop_handle_event(struct ublksrv_queue *q) |
| { |
| ublksrv_aio_handle_event(aio_ctx, q); |
| } |
| |
| struct ublksrv_tgt_type loop_tgt_type = { |
| .handle_io_async = loop_handle_io_async, |
| .tgt_io_done = loop_tgt_io_done, |
| .handle_event = loop_handle_event, |
| .usage_for_add = loop_usage_for_add, |
| .init_tgt = loop_init_tgt, |
| .deinit_tgt = loop_deinit_tgt, |
| .type = UBLKSRV_TGT_TYPE_LOOP, |
| .ublksrv_flags = UBLKSRV_F_NEED_EVENTFD, |
| .name = "loop", |
| }; |
| |
| static void tgt_loop_init() __attribute__((constructor)); |
| |
| static void tgt_loop_init(void) |
| { |
| ublksrv_register_tgt_type(&loop_tgt_type); |
| } |