Deleted Added
full compact
ctl_backend_block.c (267519) ctl_backend_block.c (267537)
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Edward Tomasz Napierala
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions, and the following disclaimer,
15 * without modification.
16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17 * substantially similar to the "NO WARRANTY" disclaimer below
18 * ("Disclaimer") and any redistribution must be conditioned upon
19 * including a substantially similar Disclaimer requirement for further
20 * binary redistribution.
21 *
22 * NO WARRANTY
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGES.
34 *
35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36 */
37/*
38 * CAM Target Layer driver backend for block devices.
39 *
40 * Author: Ken Merry <ken@FreeBSD.org>
41 */
42#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Edward Tomasz Napierala
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions, and the following disclaimer,
15 * without modification.
16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17 * substantially similar to the "NO WARRANTY" disclaimer below
18 * ("Disclaimer") and any redistribution must be conditioned upon
19 * including a substantially similar Disclaimer requirement for further
20 * binary redistribution.
21 *
22 * NO WARRANTY
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGES.
34 *
35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36 */
37/*
38 * CAM Target Layer driver backend for block devices.
39 *
40 * Author: Ken Merry <ken@FreeBSD.org>
41 */
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 267519 2014-06-15 20:14:11Z mav $");
43__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 267537 2014-06-16 11:00:14Z mav $");
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/types.h>
49#include <sys/kthread.h>
50#include <sys/bio.h>
51#include <sys/fcntl.h>
52#include <sys/limits.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/condvar.h>
56#include <sys/malloc.h>
57#include <sys/conf.h>
58#include <sys/ioccom.h>
59#include <sys/queue.h>
60#include <sys/sbuf.h>
61#include <sys/endian.h>
62#include <sys/uio.h>
63#include <sys/buf.h>
64#include <sys/taskqueue.h>
65#include <sys/vnode.h>
66#include <sys/namei.h>
67#include <sys/mount.h>
68#include <sys/disk.h>
69#include <sys/fcntl.h>
70#include <sys/filedesc.h>
71#include <sys/proc.h>
72#include <sys/pcpu.h>
73#include <sys/module.h>
74#include <sys/sdt.h>
75#include <sys/devicestat.h>
76#include <sys/sysctl.h>
77
78#include <geom/geom.h>
79
80#include <cam/cam.h>
81#include <cam/scsi/scsi_all.h>
82#include <cam/scsi/scsi_da.h>
83#include <cam/ctl/ctl_io.h>
84#include <cam/ctl/ctl.h>
85#include <cam/ctl/ctl_backend.h>
86#include <cam/ctl/ctl_frontend_internal.h>
87#include <cam/ctl/ctl_ioctl.h>
88#include <cam/ctl/ctl_scsi_all.h>
89#include <cam/ctl/ctl_error.h>
90
91/*
92 * The idea here is that we'll allocate enough S/G space to hold a 1MB
93 * I/O. If we get an I/O larger than that, we'll split it.
94 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/types.h>
49#include <sys/kthread.h>
50#include <sys/bio.h>
51#include <sys/fcntl.h>
52#include <sys/limits.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/condvar.h>
56#include <sys/malloc.h>
57#include <sys/conf.h>
58#include <sys/ioccom.h>
59#include <sys/queue.h>
60#include <sys/sbuf.h>
61#include <sys/endian.h>
62#include <sys/uio.h>
63#include <sys/buf.h>
64#include <sys/taskqueue.h>
65#include <sys/vnode.h>
66#include <sys/namei.h>
67#include <sys/mount.h>
68#include <sys/disk.h>
69#include <sys/fcntl.h>
70#include <sys/filedesc.h>
71#include <sys/proc.h>
72#include <sys/pcpu.h>
73#include <sys/module.h>
74#include <sys/sdt.h>
75#include <sys/devicestat.h>
76#include <sys/sysctl.h>
77
78#include <geom/geom.h>
79
80#include <cam/cam.h>
81#include <cam/scsi/scsi_all.h>
82#include <cam/scsi/scsi_da.h>
83#include <cam/ctl/ctl_io.h>
84#include <cam/ctl/ctl.h>
85#include <cam/ctl/ctl_backend.h>
86#include <cam/ctl/ctl_frontend_internal.h>
87#include <cam/ctl/ctl_ioctl.h>
88#include <cam/ctl/ctl_scsi_all.h>
89#include <cam/ctl/ctl_error.h>
90
91/*
92 * The idea here is that we'll allocate enough S/G space to hold a 1MB
93 * I/O. If we get an I/O larger than that, we'll split it.
94 */
95#define CTLBLK_MAX_IO_SIZE (1024 * 1024)
95#define CTLBLK_HALF_IO_SIZE (512 * 1024)
96#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2)
96#define CTLBLK_MAX_SEG MAXPHYS
97#define CTLBLK_MAX_SEG MAXPHYS
97#define CTLBLK_MAX_SEGS MAX(CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG, 1)
98#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
99#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2)
98
99#ifdef CTLBLK_DEBUG
100#define DPRINTF(fmt, args...) \
101 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
102#else
103#define DPRINTF(fmt, args...) do {} while(0)
104#endif
105
106#define PRIV(io) \
107 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
100
101#ifdef CTLBLK_DEBUG
102#define DPRINTF(fmt, args...) \
103 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
104#else
105#define DPRINTF(fmt, args...) do {} while(0)
106#endif
107
108#define PRIV(io) \
109 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
110#define ARGS(io) \
111 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
108
109SDT_PROVIDER_DEFINE(cbb);
110
111typedef enum {
112 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
113 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02,
114 CTL_BE_BLOCK_LUN_WAITING = 0x04,
115 CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08
116} ctl_be_block_lun_flags;
117
118typedef enum {
119 CTL_BE_BLOCK_NONE,
120 CTL_BE_BLOCK_DEV,
121 CTL_BE_BLOCK_FILE
122} ctl_be_block_type;
123
124struct ctl_be_block_devdata {
125 struct cdev *cdev;
126 struct cdevsw *csw;
127 int dev_ref;
128};
129
130struct ctl_be_block_filedata {
131 struct ucred *cred;
132};
133
134union ctl_be_block_bedata {
135 struct ctl_be_block_devdata dev;
136 struct ctl_be_block_filedata file;
137};
138
139struct ctl_be_block_io;
140struct ctl_be_block_lun;
141
142typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
143 struct ctl_be_block_io *beio);
144
145/*
146 * Backend LUN structure. There is a 1:1 mapping between a block device
147 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
148 */
149struct ctl_be_block_lun {
150 struct ctl_block_disk *disk;
151 char lunname[32];
152 char *dev_path;
153 ctl_be_block_type dev_type;
154 struct vnode *vn;
155 union ctl_be_block_bedata backend;
156 cbb_dispatch_t dispatch;
157 cbb_dispatch_t lun_flush;
158 cbb_dispatch_t unmap;
159 struct mtx lock;
160 uma_zone_t lun_zone;
161 uint64_t size_blocks;
162 uint64_t size_bytes;
163 uint32_t blocksize;
164 int blocksize_shift;
165 uint16_t pblockexp;
166 uint16_t pblockoff;
167 struct ctl_be_block_softc *softc;
168 struct devstat *disk_stats;
169 ctl_be_block_lun_flags flags;
170 STAILQ_ENTRY(ctl_be_block_lun) links;
171 struct ctl_be_lun ctl_be_lun;
172 struct taskqueue *io_taskqueue;
173 struct task io_task;
174 int num_threads;
175 STAILQ_HEAD(, ctl_io_hdr) input_queue;
176 STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
177 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
178};
179
180/*
181 * Overall softc structure for the block backend module.
182 */
183struct ctl_be_block_softc {
184 struct mtx lock;
185 int num_disks;
186 STAILQ_HEAD(, ctl_block_disk) disk_list;
187 int num_luns;
188 STAILQ_HEAD(, ctl_be_block_lun) lun_list;
189};
190
191static struct ctl_be_block_softc backend_block_softc;
192
193/*
194 * Per-I/O information.
195 */
196struct ctl_be_block_io {
197 union ctl_io *io;
198 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS];
199 struct iovec xiovecs[CTLBLK_MAX_SEGS];
200 int bio_cmd;
201 int bio_flags;
202 int num_segs;
203 int num_bios_sent;
204 int num_bios_done;
205 int send_complete;
206 int num_errors;
207 struct bintime ds_t0;
208 devstat_tag_type ds_tag_type;
209 devstat_trans_flags ds_trans_type;
210 uint64_t io_len;
211 uint64_t io_offset;
212 struct ctl_be_block_softc *softc;
213 struct ctl_be_block_lun *lun;
214 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
215};
216
217static int cbb_num_threads = 14;
218TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
219SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
220 "CAM Target Layer Block Backend");
221SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
222 &cbb_num_threads, 0, "Number of threads per backing file");
223
224static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
225static void ctl_free_beio(struct ctl_be_block_io *beio);
226static void ctl_complete_beio(struct ctl_be_block_io *beio);
227static int ctl_be_block_move_done(union ctl_io *io);
228static void ctl_be_block_biodone(struct bio *bio);
229static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
230 struct ctl_be_block_io *beio);
231static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
232 struct ctl_be_block_io *beio);
233static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
234 struct ctl_be_block_io *beio);
235static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
236 struct ctl_be_block_io *beio);
237static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
238 struct ctl_be_block_io *beio);
239static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
240 union ctl_io *io);
241static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
242 union ctl_io *io);
243static void ctl_be_block_worker(void *context, int pending);
244static int ctl_be_block_submit(union ctl_io *io);
245static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
246 int flag, struct thread *td);
247static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
248 struct ctl_lun_req *req);
249static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
250 struct ctl_lun_req *req);
251static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
252static int ctl_be_block_open(struct ctl_be_block_softc *softc,
253 struct ctl_be_block_lun *be_lun,
254 struct ctl_lun_req *req);
255static int ctl_be_block_create(struct ctl_be_block_softc *softc,
256 struct ctl_lun_req *req);
257static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
258 struct ctl_lun_req *req);
259static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
260 struct ctl_lun_req *req);
261static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
262 struct ctl_lun_req *req);
263static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
264 struct ctl_lun_req *req);
265static void ctl_be_block_lun_shutdown(void *be_lun);
266static void ctl_be_block_lun_config_status(void *be_lun,
267 ctl_lun_config_status status);
268static int ctl_be_block_config_write(union ctl_io *io);
269static int ctl_be_block_config_read(union ctl_io *io);
270static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
271int ctl_be_block_init(void);
272
273static struct ctl_backend_driver ctl_be_block_driver =
274{
275 .name = "block",
276 .flags = CTL_BE_FLAG_HAS_CONFIG,
277 .init = ctl_be_block_init,
278 .data_submit = ctl_be_block_submit,
279 .data_move_done = ctl_be_block_move_done,
280 .config_read = ctl_be_block_config_read,
281 .config_write = ctl_be_block_config_write,
282 .ioctl = ctl_be_block_ioctl,
283 .lun_info = ctl_be_block_lun_info
284};
285
286MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
287CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
288
289static uma_zone_t beio_zone;
290
291static struct ctl_be_block_io *
292ctl_alloc_beio(struct ctl_be_block_softc *softc)
293{
294 struct ctl_be_block_io *beio;
295
296 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
297 beio->softc = softc;
298 return (beio);
299}
300
301static void
302ctl_free_beio(struct ctl_be_block_io *beio)
303{
304 int duplicate_free;
305 int i;
306
307 duplicate_free = 0;
308
309 for (i = 0; i < beio->num_segs; i++) {
310 if (beio->sg_segs[i].addr == NULL)
311 duplicate_free++;
312
313 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
314 beio->sg_segs[i].addr = NULL;
112
113SDT_PROVIDER_DEFINE(cbb);
114
115typedef enum {
116 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
117 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02,
118 CTL_BE_BLOCK_LUN_WAITING = 0x04,
119 CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08
120} ctl_be_block_lun_flags;
121
122typedef enum {
123 CTL_BE_BLOCK_NONE,
124 CTL_BE_BLOCK_DEV,
125 CTL_BE_BLOCK_FILE
126} ctl_be_block_type;
127
128struct ctl_be_block_devdata {
129 struct cdev *cdev;
130 struct cdevsw *csw;
131 int dev_ref;
132};
133
134struct ctl_be_block_filedata {
135 struct ucred *cred;
136};
137
138union ctl_be_block_bedata {
139 struct ctl_be_block_devdata dev;
140 struct ctl_be_block_filedata file;
141};
142
143struct ctl_be_block_io;
144struct ctl_be_block_lun;
145
146typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
147 struct ctl_be_block_io *beio);
148
149/*
150 * Backend LUN structure. There is a 1:1 mapping between a block device
151 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
152 */
153struct ctl_be_block_lun {
154 struct ctl_block_disk *disk;
155 char lunname[32];
156 char *dev_path;
157 ctl_be_block_type dev_type;
158 struct vnode *vn;
159 union ctl_be_block_bedata backend;
160 cbb_dispatch_t dispatch;
161 cbb_dispatch_t lun_flush;
162 cbb_dispatch_t unmap;
163 struct mtx lock;
164 uma_zone_t lun_zone;
165 uint64_t size_blocks;
166 uint64_t size_bytes;
167 uint32_t blocksize;
168 int blocksize_shift;
169 uint16_t pblockexp;
170 uint16_t pblockoff;
171 struct ctl_be_block_softc *softc;
172 struct devstat *disk_stats;
173 ctl_be_block_lun_flags flags;
174 STAILQ_ENTRY(ctl_be_block_lun) links;
175 struct ctl_be_lun ctl_be_lun;
176 struct taskqueue *io_taskqueue;
177 struct task io_task;
178 int num_threads;
179 STAILQ_HEAD(, ctl_io_hdr) input_queue;
180 STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
181 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
182};
183
184/*
185 * Overall softc structure for the block backend module.
186 */
187struct ctl_be_block_softc {
188 struct mtx lock;
189 int num_disks;
190 STAILQ_HEAD(, ctl_block_disk) disk_list;
191 int num_luns;
192 STAILQ_HEAD(, ctl_be_block_lun) lun_list;
193};
194
195static struct ctl_be_block_softc backend_block_softc;
196
197/*
198 * Per-I/O information.
199 */
200struct ctl_be_block_io {
201 union ctl_io *io;
202 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS];
203 struct iovec xiovecs[CTLBLK_MAX_SEGS];
204 int bio_cmd;
205 int bio_flags;
206 int num_segs;
207 int num_bios_sent;
208 int num_bios_done;
209 int send_complete;
210 int num_errors;
211 struct bintime ds_t0;
212 devstat_tag_type ds_tag_type;
213 devstat_trans_flags ds_trans_type;
214 uint64_t io_len;
215 uint64_t io_offset;
216 struct ctl_be_block_softc *softc;
217 struct ctl_be_block_lun *lun;
218 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
219};
220
221static int cbb_num_threads = 14;
222TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
223SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
224 "CAM Target Layer Block Backend");
225SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
226 &cbb_num_threads, 0, "Number of threads per backing file");
227
228static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
229static void ctl_free_beio(struct ctl_be_block_io *beio);
230static void ctl_complete_beio(struct ctl_be_block_io *beio);
231static int ctl_be_block_move_done(union ctl_io *io);
232static void ctl_be_block_biodone(struct bio *bio);
233static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
234 struct ctl_be_block_io *beio);
235static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
236 struct ctl_be_block_io *beio);
237static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
238 struct ctl_be_block_io *beio);
239static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
240 struct ctl_be_block_io *beio);
241static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
242 struct ctl_be_block_io *beio);
243static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
244 union ctl_io *io);
245static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
246 union ctl_io *io);
247static void ctl_be_block_worker(void *context, int pending);
248static int ctl_be_block_submit(union ctl_io *io);
249static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
250 int flag, struct thread *td);
251static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
252 struct ctl_lun_req *req);
253static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
254 struct ctl_lun_req *req);
255static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
256static int ctl_be_block_open(struct ctl_be_block_softc *softc,
257 struct ctl_be_block_lun *be_lun,
258 struct ctl_lun_req *req);
259static int ctl_be_block_create(struct ctl_be_block_softc *softc,
260 struct ctl_lun_req *req);
261static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
262 struct ctl_lun_req *req);
263static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
264 struct ctl_lun_req *req);
265static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
266 struct ctl_lun_req *req);
267static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
268 struct ctl_lun_req *req);
269static void ctl_be_block_lun_shutdown(void *be_lun);
270static void ctl_be_block_lun_config_status(void *be_lun,
271 ctl_lun_config_status status);
272static int ctl_be_block_config_write(union ctl_io *io);
273static int ctl_be_block_config_read(union ctl_io *io);
274static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
275int ctl_be_block_init(void);
276
277static struct ctl_backend_driver ctl_be_block_driver =
278{
279 .name = "block",
280 .flags = CTL_BE_FLAG_HAS_CONFIG,
281 .init = ctl_be_block_init,
282 .data_submit = ctl_be_block_submit,
283 .data_move_done = ctl_be_block_move_done,
284 .config_read = ctl_be_block_config_read,
285 .config_write = ctl_be_block_config_write,
286 .ioctl = ctl_be_block_ioctl,
287 .lun_info = ctl_be_block_lun_info
288};
289
290MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
291CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
292
293static uma_zone_t beio_zone;
294
295static struct ctl_be_block_io *
296ctl_alloc_beio(struct ctl_be_block_softc *softc)
297{
298 struct ctl_be_block_io *beio;
299
300 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
301 beio->softc = softc;
302 return (beio);
303}
304
305static void
306ctl_free_beio(struct ctl_be_block_io *beio)
307{
308 int duplicate_free;
309 int i;
310
311 duplicate_free = 0;
312
313 for (i = 0; i < beio->num_segs; i++) {
314 if (beio->sg_segs[i].addr == NULL)
315 duplicate_free++;
316
317 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
318 beio->sg_segs[i].addr = NULL;
319
320 /* For compare we had two equal S/G lists. */
321 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
322 uma_zfree(beio->lun->lun_zone,
323 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
324 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
325 }
315 }
316
317 if (duplicate_free > 0) {
318 printf("%s: %d duplicate frees out of %d segments\n", __func__,
319 duplicate_free, beio->num_segs);
320 }
321
322 uma_zfree(beio_zone, beio);
323}
324
325static void
326ctl_complete_beio(struct ctl_be_block_io *beio)
327{
328 union ctl_io *io;
329 int io_len;
330
331 io = beio->io;
332
333 if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)
334 io_len = beio->io_len;
335 else
336 io_len = 0;
337
338 devstat_end_transaction(beio->lun->disk_stats,
339 /*bytes*/ io_len,
340 beio->ds_tag_type,
341 beio->ds_trans_type,
342 /*now*/ NULL,
343 /*then*/&beio->ds_t0);
344
345 if (beio->beio_cont != NULL) {
346 beio->beio_cont(beio);
347 } else {
348 ctl_free_beio(beio);
326 }
327
328 if (duplicate_free > 0) {
329 printf("%s: %d duplicate frees out of %d segments\n", __func__,
330 duplicate_free, beio->num_segs);
331 }
332
333 uma_zfree(beio_zone, beio);
334}
335
336static void
337ctl_complete_beio(struct ctl_be_block_io *beio)
338{
339 union ctl_io *io;
340 int io_len;
341
342 io = beio->io;
343
344 if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)
345 io_len = beio->io_len;
346 else
347 io_len = 0;
348
349 devstat_end_transaction(beio->lun->disk_stats,
350 /*bytes*/ io_len,
351 beio->ds_tag_type,
352 beio->ds_trans_type,
353 /*now*/ NULL,
354 /*then*/&beio->ds_t0);
355
356 if (beio->beio_cont != NULL) {
357 beio->beio_cont(beio);
358 } else {
359 ctl_free_beio(beio);
349 ctl_done(io);
360 ctl_data_submit_done(io);
350 }
351}
352
353static int
354ctl_be_block_move_done(union ctl_io *io)
355{
356 struct ctl_be_block_io *beio;
357 struct ctl_be_block_lun *be_lun;
361 }
362}
363
364static int
365ctl_be_block_move_done(union ctl_io *io)
366{
367 struct ctl_be_block_io *beio;
368 struct ctl_be_block_lun *be_lun;
369 struct ctl_lba_len_flags *lbalen;
358#ifdef CTL_TIME_IO
359 struct bintime cur_bt;
370#ifdef CTL_TIME_IO
371 struct bintime cur_bt;
360#endif
372#endif
373 int i;
361
362 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
363 be_lun = beio->lun;
364
365 DPRINTF("entered\n");
366
367#ifdef CTL_TIME_IO
368 getbintime(&cur_bt);
369 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
370 bintime_add(&io->io_hdr.dma_bt, &cur_bt);
371 io->io_hdr.num_dmas++;
372#endif
374
375 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
376 be_lun = beio->lun;
377
378 DPRINTF("entered\n");
379
380#ifdef CTL_TIME_IO
381 getbintime(&cur_bt);
382 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
383 bintime_add(&io->io_hdr.dma_bt, &cur_bt);
384 io->io_hdr.num_dmas++;
385#endif
386 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
373
374 /*
375 * We set status at this point for read commands, and write
376 * commands with errors.
377 */
387
388 /*
389 * We set status at this point for read commands, and write
390 * commands with errors.
391 */
378 if ((beio->bio_cmd == BIO_READ)
379 && (io->io_hdr.port_status == 0)
380 && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
381 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE))
382 ctl_set_success(&io->scsiio);
392 if ((io->io_hdr.port_status == 0) &&
393 ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) &&
394 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
395 lbalen = ARGS(beio->io);
396 if (lbalen->flags & CTL_LLF_READ) {
397 ctl_set_success(&io->scsiio);
398 } else if (lbalen->flags & CTL_LLF_COMPARE) {
399 /* We have two data blocks ready for comparison. */
400 for (i = 0; i < beio->num_segs; i++) {
401 if (memcmp(beio->sg_segs[i].addr,
402 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
403 beio->sg_segs[i].len) != 0)
404 break;
405 }
406 if (i < beio->num_segs)
407 ctl_set_sense(&io->scsiio,
408 /*current_error*/ 1,
409 /*sense_key*/ SSD_KEY_MISCOMPARE,
410 /*asc*/ 0x1D,
411 /*ascq*/ 0x00,
412 SSD_ELEM_NONE);
413 else
414 ctl_set_success(&io->scsiio);
415 }
416 }
383 else if ((io->io_hdr.port_status != 0)
384 && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
385 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
386 /*
387 * For hardware error sense keys, the sense key
388 * specific value is defined to be a retry count,
389 * but we use it to pass back an internal FETD
390 * error code. XXX KDM Hopefully the FETD is only
391 * using 16 bits for an error code, since that's
392 * all the space we have in the sks field.
393 */
394 ctl_set_internal_failure(&io->scsiio,
395 /*sks_valid*/ 1,
396 /*retry_count*/
397 io->io_hdr.port_status);
398 }
399
400 /*
401 * If this is a read, or a write with errors, it is done.
402 */
403 if ((beio->bio_cmd == BIO_READ)
404 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
405 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
406 ctl_complete_beio(beio);
407 return (0);
408 }
409
410 /*
411 * At this point, we have a write and the DMA completed
412 * successfully. We now have to queue it to the task queue to
413 * execute the backend I/O. That is because we do blocking
414 * memory allocations, and in the file backing case, blocking I/O.
415 * This move done routine is generally called in the SIM's
416 * interrupt context, and therefore we cannot block.
417 */
418 mtx_lock(&be_lun->lock);
419 /*
420 * XXX KDM make sure that links is okay to use at this point.
421 * Otherwise, we either need to add another field to ctl_io_hdr,
422 * or deal with resource allocation here.
423 */
424 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
425 mtx_unlock(&be_lun->lock);
426
427 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
428
429 return (0);
430}
431
432static void
433ctl_be_block_biodone(struct bio *bio)
434{
435 struct ctl_be_block_io *beio;
436 struct ctl_be_block_lun *be_lun;
437 union ctl_io *io;
438 int error;
439
440 beio = bio->bio_caller1;
441 be_lun = beio->lun;
442 io = beio->io;
443
444 DPRINTF("entered\n");
445
446 error = bio->bio_error;
447 mtx_lock(&be_lun->lock);
448 if (error != 0)
449 beio->num_errors++;
450
451 beio->num_bios_done++;
452
453 /*
454 * XXX KDM will this cause WITNESS to complain? Holding a lock
455 * during the free might cause it to complain.
456 */
457 g_destroy_bio(bio);
458
459 /*
460 * If the send complete bit isn't set, or we aren't the last I/O to
461 * complete, then we're done.
462 */
463 if ((beio->send_complete == 0)
464 || (beio->num_bios_done < beio->num_bios_sent)) {
465 mtx_unlock(&be_lun->lock);
466 return;
467 }
468
469 /*
470 * At this point, we've verified that we are the last I/O to
471 * complete, so it's safe to drop the lock.
472 */
473 mtx_unlock(&be_lun->lock);
474
475 /*
476 * If there are any errors from the backing device, we fail the
477 * entire I/O with a medium error.
478 */
479 if (beio->num_errors > 0) {
480 if (error == EOPNOTSUPP) {
481 ctl_set_invalid_opcode(&io->scsiio);
482 } else if (beio->bio_cmd == BIO_FLUSH) {
483 /* XXX KDM is there is a better error here? */
484 ctl_set_internal_failure(&io->scsiio,
485 /*sks_valid*/ 1,
486 /*retry_count*/ 0xbad2);
487 } else
488 ctl_set_medium_error(&io->scsiio);
489 ctl_complete_beio(beio);
490 return;
491 }
492
493 /*
417 else if ((io->io_hdr.port_status != 0)
418 && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
419 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
420 /*
421 * For hardware error sense keys, the sense key
422 * specific value is defined to be a retry count,
423 * but we use it to pass back an internal FETD
424 * error code. XXX KDM Hopefully the FETD is only
425 * using 16 bits for an error code, since that's
426 * all the space we have in the sks field.
427 */
428 ctl_set_internal_failure(&io->scsiio,
429 /*sks_valid*/ 1,
430 /*retry_count*/
431 io->io_hdr.port_status);
432 }
433
434 /*
435 * If this is a read, or a write with errors, it is done.
436 */
437 if ((beio->bio_cmd == BIO_READ)
438 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
439 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
440 ctl_complete_beio(beio);
441 return (0);
442 }
443
444 /*
445 * At this point, we have a write and the DMA completed
446 * successfully. We now have to queue it to the task queue to
447 * execute the backend I/O. That is because we do blocking
448 * memory allocations, and in the file backing case, blocking I/O.
449 * This move done routine is generally called in the SIM's
450 * interrupt context, and therefore we cannot block.
451 */
452 mtx_lock(&be_lun->lock);
453 /*
454 * XXX KDM make sure that links is okay to use at this point.
455 * Otherwise, we either need to add another field to ctl_io_hdr,
456 * or deal with resource allocation here.
457 */
458 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
459 mtx_unlock(&be_lun->lock);
460
461 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
462
463 return (0);
464}
465
466static void
467ctl_be_block_biodone(struct bio *bio)
468{
469 struct ctl_be_block_io *beio;
470 struct ctl_be_block_lun *be_lun;
471 union ctl_io *io;
472 int error;
473
474 beio = bio->bio_caller1;
475 be_lun = beio->lun;
476 io = beio->io;
477
478 DPRINTF("entered\n");
479
480 error = bio->bio_error;
481 mtx_lock(&be_lun->lock);
482 if (error != 0)
483 beio->num_errors++;
484
485 beio->num_bios_done++;
486
487 /*
488 * XXX KDM will this cause WITNESS to complain? Holding a lock
489 * during the free might cause it to complain.
490 */
491 g_destroy_bio(bio);
492
493 /*
494 * If the send complete bit isn't set, or we aren't the last I/O to
495 * complete, then we're done.
496 */
497 if ((beio->send_complete == 0)
498 || (beio->num_bios_done < beio->num_bios_sent)) {
499 mtx_unlock(&be_lun->lock);
500 return;
501 }
502
503 /*
504 * At this point, we've verified that we are the last I/O to
505 * complete, so it's safe to drop the lock.
506 */
507 mtx_unlock(&be_lun->lock);
508
509 /*
510 * If there are any errors from the backing device, we fail the
511 * entire I/O with a medium error.
512 */
513 if (beio->num_errors > 0) {
514 if (error == EOPNOTSUPP) {
515 ctl_set_invalid_opcode(&io->scsiio);
516 } else if (beio->bio_cmd == BIO_FLUSH) {
517 /* XXX KDM is there is a better error here? */
518 ctl_set_internal_failure(&io->scsiio,
519 /*sks_valid*/ 1,
520 /*retry_count*/ 0xbad2);
521 } else
522 ctl_set_medium_error(&io->scsiio);
523 ctl_complete_beio(beio);
524 return;
525 }
526
527 /*
494 * If this is a write, a flush or a delete, we're all done.
528 * If this is a write, a flush, a delete or verify, we're all done.
495 * If this is a read, we can now send the data to the user.
496 */
497 if ((beio->bio_cmd == BIO_WRITE)
498 || (beio->bio_cmd == BIO_FLUSH)
529 * If this is a read, we can now send the data to the user.
530 */
531 if ((beio->bio_cmd == BIO_WRITE)
532 || (beio->bio_cmd == BIO_FLUSH)
499 || (beio->bio_cmd == BIO_DELETE)) {
533 || (beio->bio_cmd == BIO_DELETE)
534 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
500 ctl_set_success(&io->scsiio);
501 ctl_complete_beio(beio);
502 } else {
503#ifdef CTL_TIME_IO
504 getbintime(&io->io_hdr.dma_start_bt);
505#endif
506 ctl_datamove(io);
507 }
508}
509
510static void
511ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
512 struct ctl_be_block_io *beio)
513{
514 union ctl_io *io;
515 struct mount *mountpoint;
516 int error, lock_flags;
517
518 DPRINTF("entered\n");
519
520 io = beio->io;
521
522 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
523
524 if (MNT_SHARED_WRITES(mountpoint)
525 || ((mountpoint == NULL)
526 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
527 lock_flags = LK_SHARED;
528 else
529 lock_flags = LK_EXCLUSIVE;
530
531 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
532
533 binuptime(&beio->ds_t0);
534 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
535
536 error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread);
537 VOP_UNLOCK(be_lun->vn, 0);
538
539 vn_finished_write(mountpoint);
540
541 if (error == 0)
542 ctl_set_success(&io->scsiio);
543 else {
544 /* XXX KDM is there is a better error here? */
545 ctl_set_internal_failure(&io->scsiio,
546 /*sks_valid*/ 1,
547 /*retry_count*/ 0xbad1);
548 }
549
550 ctl_complete_beio(beio);
551}
552
553SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
554SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
555SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
556SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
557
558static void
559ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
560 struct ctl_be_block_io *beio)
561{
562 struct ctl_be_block_filedata *file_data;
563 union ctl_io *io;
564 struct uio xuio;
565 struct iovec *xiovec;
566 int flags;
567 int error, i;
568
569 DPRINTF("entered\n");
570
571 file_data = &be_lun->backend.file;
572 io = beio->io;
573 flags = beio->bio_flags;
574
535 ctl_set_success(&io->scsiio);
536 ctl_complete_beio(beio);
537 } else {
538#ifdef CTL_TIME_IO
539 getbintime(&io->io_hdr.dma_start_bt);
540#endif
541 ctl_datamove(io);
542 }
543}
544
545static void
546ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
547 struct ctl_be_block_io *beio)
548{
549 union ctl_io *io;
550 struct mount *mountpoint;
551 int error, lock_flags;
552
553 DPRINTF("entered\n");
554
555 io = beio->io;
556
557 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
558
559 if (MNT_SHARED_WRITES(mountpoint)
560 || ((mountpoint == NULL)
561 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
562 lock_flags = LK_SHARED;
563 else
564 lock_flags = LK_EXCLUSIVE;
565
566 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
567
568 binuptime(&beio->ds_t0);
569 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
570
571 error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread);
572 VOP_UNLOCK(be_lun->vn, 0);
573
574 vn_finished_write(mountpoint);
575
576 if (error == 0)
577 ctl_set_success(&io->scsiio);
578 else {
579 /* XXX KDM is there is a better error here? */
580 ctl_set_internal_failure(&io->scsiio,
581 /*sks_valid*/ 1,
582 /*retry_count*/ 0xbad1);
583 }
584
585 ctl_complete_beio(beio);
586}
587
588SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
589SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
590SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
591SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
592
593static void
594ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
595 struct ctl_be_block_io *beio)
596{
597 struct ctl_be_block_filedata *file_data;
598 union ctl_io *io;
599 struct uio xuio;
600 struct iovec *xiovec;
601 int flags;
602 int error, i;
603
604 DPRINTF("entered\n");
605
606 file_data = &be_lun->backend.file;
607 io = beio->io;
608 flags = beio->bio_flags;
609
610 bzero(&xuio, sizeof(xuio));
575 if (beio->bio_cmd == BIO_READ) {
576 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
611 if (beio->bio_cmd == BIO_READ) {
612 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
613 xuio.uio_rw = UIO_READ;
577 } else {
578 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
614 } else {
615 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
579 }
580
581 bzero(&xuio, sizeof(xuio));
582 if (beio->bio_cmd == BIO_READ)
583 xuio.uio_rw = UIO_READ;
584 else
585 xuio.uio_rw = UIO_WRITE;
616 xuio.uio_rw = UIO_WRITE;
586
617 }
587 xuio.uio_offset = beio->io_offset;
588 xuio.uio_resid = beio->io_len;
589 xuio.uio_segflg = UIO_SYSSPACE;
590 xuio.uio_iov = beio->xiovecs;
591 xuio.uio_iovcnt = beio->num_segs;
592 xuio.uio_td = curthread;
593
594 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
595 xiovec->iov_base = beio->sg_segs[i].addr;
596 xiovec->iov_len = beio->sg_segs[i].len;
597 }
598
599 if (beio->bio_cmd == BIO_READ) {
600 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
601
602 binuptime(&beio->ds_t0);
603 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
604
605 /*
606 * UFS pays attention to IO_DIRECT for reads. If the
607 * DIRECTIO option is configured into the kernel, it calls
608 * ffs_rawread(). But that only works for single-segment
609 * uios with user space addresses. In our case, with a
610 * kernel uio, it still reads into the buffer cache, but it
611 * will just try to release the buffer from the cache later
612 * on in ffs_read().
613 *
614 * ZFS does not pay attention to IO_DIRECT for reads.
615 *
616 * UFS does not pay attention to IO_SYNC for reads.
617 *
618 * ZFS pays attention to IO_SYNC (which translates into the
619 * Solaris define FRSYNC for zfs_read()) for reads. It
620 * attempts to sync the file before reading.
621 *
622 * So, to attempt to provide some barrier semantics in the
623 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.
624 */
625 error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
626 (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
627
628 VOP_UNLOCK(be_lun->vn, 0);
618 xuio.uio_offset = beio->io_offset;
619 xuio.uio_resid = beio->io_len;
620 xuio.uio_segflg = UIO_SYSSPACE;
621 xuio.uio_iov = beio->xiovecs;
622 xuio.uio_iovcnt = beio->num_segs;
623 xuio.uio_td = curthread;
624
625 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
626 xiovec->iov_base = beio->sg_segs[i].addr;
627 xiovec->iov_len = beio->sg_segs[i].len;
628 }
629
630 if (beio->bio_cmd == BIO_READ) {
631 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
632
633 binuptime(&beio->ds_t0);
634 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
635
636 /*
637 * UFS pays attention to IO_DIRECT for reads. If the
638 * DIRECTIO option is configured into the kernel, it calls
639 * ffs_rawread(). But that only works for single-segment
640 * uios with user space addresses. In our case, with a
641 * kernel uio, it still reads into the buffer cache, but it
642 * will just try to release the buffer from the cache later
643 * on in ffs_read().
644 *
645 * ZFS does not pay attention to IO_DIRECT for reads.
646 *
647 * UFS does not pay attention to IO_SYNC for reads.
648 *
649 * ZFS pays attention to IO_SYNC (which translates into the
650 * Solaris define FRSYNC for zfs_read()) for reads. It
651 * attempts to sync the file before reading.
652 *
653 * So, to attempt to provide some barrier semantics in the
654 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.
655 */
656 error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
657 (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
658
659 VOP_UNLOCK(be_lun->vn, 0);
660 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
629 } else {
630 struct mount *mountpoint;
631 int lock_flags;
632
633 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
634
635 if (MNT_SHARED_WRITES(mountpoint)
636 || ((mountpoint == NULL)
637 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
638 lock_flags = LK_SHARED;
639 else
640 lock_flags = LK_EXCLUSIVE;
641
642 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
643
644 binuptime(&beio->ds_t0);
645 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
646
647 /*
648 * UFS pays attention to IO_DIRECT for writes. The write
649 * is done asynchronously. (Normally the write would just
650 * get put into cache.
651 *
652 * UFS pays attention to IO_SYNC for writes. It will
653 * attempt to write the buffer out synchronously if that
654 * flag is set.
655 *
656 * ZFS does not pay attention to IO_DIRECT for writes.
657 *
658 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
659 * for writes. It will flush the transaction from the
660 * cache before returning.
661 *
662 * So if we've got the BIO_ORDERED flag set, we want
663 * IO_SYNC in either the UFS or ZFS case.
664 */
665 error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
666 IO_SYNC : 0, file_data->cred);
667 VOP_UNLOCK(be_lun->vn, 0);
668
669 vn_finished_write(mountpoint);
661 } else {
662 struct mount *mountpoint;
663 int lock_flags;
664
665 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
666
667 if (MNT_SHARED_WRITES(mountpoint)
668 || ((mountpoint == NULL)
669 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
670 lock_flags = LK_SHARED;
671 else
672 lock_flags = LK_EXCLUSIVE;
673
674 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
675
676 binuptime(&beio->ds_t0);
677 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
678
679 /*
680 * UFS pays attention to IO_DIRECT for writes. The write
681 * is done asynchronously. (Normally the write would just
682 * get put into cache.
683 *
684 * UFS pays attention to IO_SYNC for writes. It will
685 * attempt to write the buffer out synchronously if that
686 * flag is set.
687 *
688 * ZFS does not pay attention to IO_DIRECT for writes.
689 *
690 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
691 * for writes. It will flush the transaction from the
692 * cache before returning.
693 *
694 * So if we've got the BIO_ORDERED flag set, we want
695 * IO_SYNC in either the UFS or ZFS case.
696 */
697 error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
698 IO_SYNC : 0, file_data->cred);
699 VOP_UNLOCK(be_lun->vn, 0);
700
701 vn_finished_write(mountpoint);
702 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
670 }
671
672 /*
673 * If we got an error, set the sense data to "MEDIUM ERROR" and
674 * return the I/O to the user.
675 */
676 if (error != 0) {
677 char path_str[32];
678
679 ctl_scsi_path_string(io, path_str, sizeof(path_str));
680 /*
681 * XXX KDM ZFS returns ENOSPC when the underlying
682 * filesystem fills up. What kind of SCSI error should we
683 * return for that?
684 */
685 printf("%s%s command returned errno %d\n", path_str,
686 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
687 ctl_set_medium_error(&io->scsiio);
688 ctl_complete_beio(beio);
689 return;
690 }
691
692 /*
693 * If this is a write, we're all done.
694 * If this is a read, we can now send the data to the user.
695 */
703 }
704
705 /*
706 * If we got an error, set the sense data to "MEDIUM ERROR" and
707 * return the I/O to the user.
708 */
709 if (error != 0) {
710 char path_str[32];
711
712 ctl_scsi_path_string(io, path_str, sizeof(path_str));
713 /*
714 * XXX KDM ZFS returns ENOSPC when the underlying
715 * filesystem fills up. What kind of SCSI error should we
716 * return for that?
717 */
718 printf("%s%s command returned errno %d\n", path_str,
719 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
720 ctl_set_medium_error(&io->scsiio);
721 ctl_complete_beio(beio);
722 return;
723 }
724
725 /*
726 * If this is a write, we're all done.
727 * If this is a read, we can now send the data to the user.
728 */
696 if (beio->bio_cmd == BIO_WRITE) {
729 if (ARGS(io)->flags & (CTL_LLF_WRITE | CTL_LLF_VERIFY)) {
697 ctl_set_success(&io->scsiio);
730 ctl_set_success(&io->scsiio);
698 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
699 ctl_complete_beio(beio);
700 } else {
731 ctl_complete_beio(beio);
732 } else {
701 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
702#ifdef CTL_TIME_IO
703 getbintime(&io->io_hdr.dma_start_bt);
704#endif
705 ctl_datamove(io);
706 }
707}
708
709static void
710ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
711 struct ctl_be_block_io *beio)
712{
713 struct bio *bio;
714 union ctl_io *io;
715 struct ctl_be_block_devdata *dev_data;
716
717 dev_data = &be_lun->backend.dev;
718 io = beio->io;
719
720 DPRINTF("entered\n");
721
722 /* This can't fail, it's a blocking allocation. */
723 bio = g_alloc_bio();
724
725 bio->bio_cmd = BIO_FLUSH;
726 bio->bio_flags |= BIO_ORDERED;
727 bio->bio_dev = dev_data->cdev;
728 bio->bio_offset = 0;
729 bio->bio_data = 0;
730 bio->bio_done = ctl_be_block_biodone;
731 bio->bio_caller1 = beio;
732 bio->bio_pblkno = 0;
733
734 /*
735 * We don't need to acquire the LUN lock here, because we are only
736 * sending one bio, and so there is no other context to synchronize
737 * with.
738 */
739 beio->num_bios_sent = 1;
740 beio->send_complete = 1;
741
742 binuptime(&beio->ds_t0);
743 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
744
745 (*dev_data->csw->d_strategy)(bio);
746}
747
748static void
749ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
750 struct ctl_be_block_io *beio,
751 uint64_t off, uint64_t len, int last)
752{
753 struct bio *bio;
754 struct ctl_be_block_devdata *dev_data;
755 uint64_t maxlen;
756
757 dev_data = &be_lun->backend.dev;
758 maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
759 while (len > 0) {
760 bio = g_alloc_bio();
761 bio->bio_cmd = BIO_DELETE;
762 bio->bio_flags |= beio->bio_flags;
763 bio->bio_dev = dev_data->cdev;
764 bio->bio_offset = off;
765 bio->bio_length = MIN(len, maxlen);
766 bio->bio_data = 0;
767 bio->bio_done = ctl_be_block_biodone;
768 bio->bio_caller1 = beio;
769 bio->bio_pblkno = off / be_lun->blocksize;
770
771 off += bio->bio_length;
772 len -= bio->bio_length;
773
774 mtx_lock(&be_lun->lock);
775 beio->num_bios_sent++;
776 if (last && len == 0)
777 beio->send_complete = 1;
778 mtx_unlock(&be_lun->lock);
779
780 (*dev_data->csw->d_strategy)(bio);
781 }
782}
783
784static void
785ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
786 struct ctl_be_block_io *beio)
787{
788 union ctl_io *io;
789 struct ctl_be_block_devdata *dev_data;
790 struct ctl_ptr_len_flags *ptrlen;
791 struct scsi_unmap_desc *buf, *end;
792 uint64_t len;
793
794 dev_data = &be_lun->backend.dev;
795 io = beio->io;
796
797 DPRINTF("entered\n");
798
799 binuptime(&beio->ds_t0);
800 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
801
802 if (beio->io_offset == -1) {
803 beio->io_len = 0;
804 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
805 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
806 end = buf + ptrlen->len / sizeof(*buf);
807 for (; buf < end; buf++) {
808 len = (uint64_t)scsi_4btoul(buf->length) *
809 be_lun->blocksize;
810 beio->io_len += len;
811 ctl_be_block_unmap_dev_range(be_lun, beio,
812 scsi_8btou64(buf->lba) * be_lun->blocksize, len,
813 (end - buf < 2) ? TRUE : FALSE);
814 }
815 } else
816 ctl_be_block_unmap_dev_range(be_lun, beio,
817 beio->io_offset, beio->io_len, TRUE);
818}
819
820static void
821ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
822 struct ctl_be_block_io *beio)
823{
824 int i;
825 struct bio *bio;
826 struct ctl_be_block_devdata *dev_data;
827 off_t cur_offset;
828 int max_iosize;
829
830 DPRINTF("entered\n");
831
832 dev_data = &be_lun->backend.dev;
833
834 /*
835 * We have to limit our I/O size to the maximum supported by the
836 * backend device. Hopefully it is MAXPHYS. If the driver doesn't
837 * set it properly, use DFLTPHYS.
838 */
839 max_iosize = dev_data->cdev->si_iosize_max;
840 if (max_iosize < PAGE_SIZE)
841 max_iosize = DFLTPHYS;
842
843 cur_offset = beio->io_offset;
844
845 /*
846 * XXX KDM need to accurately reflect the number of I/Os outstanding
847 * to a device.
848 */
849 binuptime(&beio->ds_t0);
850 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
851
852 for (i = 0; i < beio->num_segs; i++) {
853 size_t cur_size;
854 uint8_t *cur_ptr;
855
856 cur_size = beio->sg_segs[i].len;
857 cur_ptr = beio->sg_segs[i].addr;
858
859 while (cur_size > 0) {
860 /* This can't fail, it's a blocking allocation. */
861 bio = g_alloc_bio();
862
863 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
864
865 bio->bio_cmd = beio->bio_cmd;
866 bio->bio_flags |= beio->bio_flags;
867 bio->bio_dev = dev_data->cdev;
868 bio->bio_caller1 = beio;
869 bio->bio_length = min(cur_size, max_iosize);
870 bio->bio_offset = cur_offset;
871 bio->bio_data = cur_ptr;
872 bio->bio_done = ctl_be_block_biodone;
873 bio->bio_pblkno = cur_offset / be_lun->blocksize;
874
875 cur_offset += bio->bio_length;
876 cur_ptr += bio->bio_length;
877 cur_size -= bio->bio_length;
878
879 /*
880 * Make sure we set the complete bit just before we
881 * issue the last bio so we don't wind up with a
882 * race.
883 *
884 * Use the LUN mutex here instead of a combination
885 * of atomic variables for simplicity.
886 *
887 * XXX KDM we could have a per-IO lock, but that
888 * would cause additional per-IO setup and teardown
889 * overhead. Hopefully there won't be too much
890 * contention on the LUN lock.
891 */
892 mtx_lock(&be_lun->lock);
893
894 beio->num_bios_sent++;
895
896 if ((i == beio->num_segs - 1)
897 && (cur_size == 0))
898 beio->send_complete = 1;
899
900 mtx_unlock(&be_lun->lock);
901
902 (*dev_data->csw->d_strategy)(bio);
903 }
904 }
905}
906
907static void
908ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
909{
910 union ctl_io *io;
911
912 io = beio->io;
913 ctl_free_beio(beio);
914 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
915 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
916 ctl_config_write_done(io);
917 return;
918 }
919
920 ctl_be_block_config_write(io);
921}
922
923static void
924ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
925 union ctl_io *io)
926{
927 struct ctl_be_block_io *beio;
928 struct ctl_be_block_softc *softc;
929 struct ctl_lba_len_flags *lbalen;
930 uint64_t len_left, lba;
931 int i, seglen;
932 uint8_t *buf, *end;
933
934 DPRINTF("entered\n");
935
936 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
937 softc = be_lun->softc;
733#ifdef CTL_TIME_IO
734 getbintime(&io->io_hdr.dma_start_bt);
735#endif
736 ctl_datamove(io);
737 }
738}
739
740static void
741ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
742 struct ctl_be_block_io *beio)
743{
744 struct bio *bio;
745 union ctl_io *io;
746 struct ctl_be_block_devdata *dev_data;
747
748 dev_data = &be_lun->backend.dev;
749 io = beio->io;
750
751 DPRINTF("entered\n");
752
753 /* This can't fail, it's a blocking allocation. */
754 bio = g_alloc_bio();
755
756 bio->bio_cmd = BIO_FLUSH;
757 bio->bio_flags |= BIO_ORDERED;
758 bio->bio_dev = dev_data->cdev;
759 bio->bio_offset = 0;
760 bio->bio_data = 0;
761 bio->bio_done = ctl_be_block_biodone;
762 bio->bio_caller1 = beio;
763 bio->bio_pblkno = 0;
764
765 /*
766 * We don't need to acquire the LUN lock here, because we are only
767 * sending one bio, and so there is no other context to synchronize
768 * with.
769 */
770 beio->num_bios_sent = 1;
771 beio->send_complete = 1;
772
773 binuptime(&beio->ds_t0);
774 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
775
776 (*dev_data->csw->d_strategy)(bio);
777}
778
779static void
780ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
781 struct ctl_be_block_io *beio,
782 uint64_t off, uint64_t len, int last)
783{
784 struct bio *bio;
785 struct ctl_be_block_devdata *dev_data;
786 uint64_t maxlen;
787
788 dev_data = &be_lun->backend.dev;
789 maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
790 while (len > 0) {
791 bio = g_alloc_bio();
792 bio->bio_cmd = BIO_DELETE;
793 bio->bio_flags |= beio->bio_flags;
794 bio->bio_dev = dev_data->cdev;
795 bio->bio_offset = off;
796 bio->bio_length = MIN(len, maxlen);
797 bio->bio_data = 0;
798 bio->bio_done = ctl_be_block_biodone;
799 bio->bio_caller1 = beio;
800 bio->bio_pblkno = off / be_lun->blocksize;
801
802 off += bio->bio_length;
803 len -= bio->bio_length;
804
805 mtx_lock(&be_lun->lock);
806 beio->num_bios_sent++;
807 if (last && len == 0)
808 beio->send_complete = 1;
809 mtx_unlock(&be_lun->lock);
810
811 (*dev_data->csw->d_strategy)(bio);
812 }
813}
814
815static void
816ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
817 struct ctl_be_block_io *beio)
818{
819 union ctl_io *io;
820 struct ctl_be_block_devdata *dev_data;
821 struct ctl_ptr_len_flags *ptrlen;
822 struct scsi_unmap_desc *buf, *end;
823 uint64_t len;
824
825 dev_data = &be_lun->backend.dev;
826 io = beio->io;
827
828 DPRINTF("entered\n");
829
830 binuptime(&beio->ds_t0);
831 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
832
833 if (beio->io_offset == -1) {
834 beio->io_len = 0;
835 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
836 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
837 end = buf + ptrlen->len / sizeof(*buf);
838 for (; buf < end; buf++) {
839 len = (uint64_t)scsi_4btoul(buf->length) *
840 be_lun->blocksize;
841 beio->io_len += len;
842 ctl_be_block_unmap_dev_range(be_lun, beio,
843 scsi_8btou64(buf->lba) * be_lun->blocksize, len,
844 (end - buf < 2) ? TRUE : FALSE);
845 }
846 } else
847 ctl_be_block_unmap_dev_range(be_lun, beio,
848 beio->io_offset, beio->io_len, TRUE);
849}
850
851static void
852ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
853 struct ctl_be_block_io *beio)
854{
855 int i;
856 struct bio *bio;
857 struct ctl_be_block_devdata *dev_data;
858 off_t cur_offset;
859 int max_iosize;
860
861 DPRINTF("entered\n");
862
863 dev_data = &be_lun->backend.dev;
864
865 /*
866 * We have to limit our I/O size to the maximum supported by the
867 * backend device. Hopefully it is MAXPHYS. If the driver doesn't
868 * set it properly, use DFLTPHYS.
869 */
870 max_iosize = dev_data->cdev->si_iosize_max;
871 if (max_iosize < PAGE_SIZE)
872 max_iosize = DFLTPHYS;
873
874 cur_offset = beio->io_offset;
875
876 /*
877 * XXX KDM need to accurately reflect the number of I/Os outstanding
878 * to a device.
879 */
880 binuptime(&beio->ds_t0);
881 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
882
883 for (i = 0; i < beio->num_segs; i++) {
884 size_t cur_size;
885 uint8_t *cur_ptr;
886
887 cur_size = beio->sg_segs[i].len;
888 cur_ptr = beio->sg_segs[i].addr;
889
890 while (cur_size > 0) {
891 /* This can't fail, it's a blocking allocation. */
892 bio = g_alloc_bio();
893
894 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
895
896 bio->bio_cmd = beio->bio_cmd;
897 bio->bio_flags |= beio->bio_flags;
898 bio->bio_dev = dev_data->cdev;
899 bio->bio_caller1 = beio;
900 bio->bio_length = min(cur_size, max_iosize);
901 bio->bio_offset = cur_offset;
902 bio->bio_data = cur_ptr;
903 bio->bio_done = ctl_be_block_biodone;
904 bio->bio_pblkno = cur_offset / be_lun->blocksize;
905
906 cur_offset += bio->bio_length;
907 cur_ptr += bio->bio_length;
908 cur_size -= bio->bio_length;
909
910 /*
911 * Make sure we set the complete bit just before we
912 * issue the last bio so we don't wind up with a
913 * race.
914 *
915 * Use the LUN mutex here instead of a combination
916 * of atomic variables for simplicity.
917 *
918 * XXX KDM we could have a per-IO lock, but that
919 * would cause additional per-IO setup and teardown
920 * overhead. Hopefully there won't be too much
921 * contention on the LUN lock.
922 */
923 mtx_lock(&be_lun->lock);
924
925 beio->num_bios_sent++;
926
927 if ((i == beio->num_segs - 1)
928 && (cur_size == 0))
929 beio->send_complete = 1;
930
931 mtx_unlock(&be_lun->lock);
932
933 (*dev_data->csw->d_strategy)(bio);
934 }
935 }
936}
937
938static void
939ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
940{
941 union ctl_io *io;
942
943 io = beio->io;
944 ctl_free_beio(beio);
945 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
946 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
947 ctl_config_write_done(io);
948 return;
949 }
950
951 ctl_be_block_config_write(io);
952}
953
954static void
955ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
956 union ctl_io *io)
957{
958 struct ctl_be_block_io *beio;
959 struct ctl_be_block_softc *softc;
960 struct ctl_lba_len_flags *lbalen;
961 uint64_t len_left, lba;
962 int i, seglen;
963 uint8_t *buf, *end;
964
965 DPRINTF("entered\n");
966
967 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
968 softc = be_lun->softc;
938 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
969 lbalen = ARGS(beio->io);
939
940 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP) ||
941 (lbalen->flags & SWS_UNMAP && be_lun->unmap == NULL)) {
942 ctl_free_beio(beio);
943 ctl_set_invalid_field(&io->scsiio,
944 /*sks_valid*/ 1,
945 /*command*/ 1,
946 /*field*/ 1,
947 /*bit_valid*/ 0,
948 /*bit*/ 0);
949 ctl_config_write_done(io);
950 return;
951 }
952
953 /*
954 * If the I/O came down with an ordered or head of queue tag, set
955 * the BIO_ORDERED attribute. For head of queue tags, that's
956 * pretty much the best we can do.
957 */
958 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
959 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
960 beio->bio_flags = BIO_ORDERED;
961
962 switch (io->scsiio.tag_type) {
963 case CTL_TAG_ORDERED:
964 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
965 break;
966 case CTL_TAG_HEAD_OF_QUEUE:
967 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
968 break;
969 case CTL_TAG_UNTAGGED:
970 case CTL_TAG_SIMPLE:
971 case CTL_TAG_ACA:
972 default:
973 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
974 break;
975 }
976
977 if (lbalen->flags & SWS_UNMAP) {
978 beio->io_offset = lbalen->lba * be_lun->blocksize;
979 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize;
980 beio->bio_cmd = BIO_DELETE;
981 beio->ds_trans_type = DEVSTAT_FREE;
982
983 be_lun->unmap(be_lun, beio);
984 return;
985 }
986
987 beio->bio_cmd = BIO_WRITE;
988 beio->ds_trans_type = DEVSTAT_WRITE;
989
990 DPRINTF("WRITE SAME at LBA %jx len %u\n",
991 (uintmax_t)lbalen->lba, lbalen->len);
992
993 len_left = (uint64_t)lbalen->len * be_lun->blocksize;
994 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
995
996 /*
997 * Setup the S/G entry for this chunk.
998 */
999 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1000 seglen -= seglen % be_lun->blocksize;
1001 beio->sg_segs[i].len = seglen;
1002 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1003
1004 DPRINTF("segment %d addr %p len %zd\n", i,
1005 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1006
1007 beio->num_segs++;
1008 len_left -= seglen;
1009
1010 buf = beio->sg_segs[i].addr;
1011 end = buf + seglen;
1012 for (; buf < end; buf += be_lun->blocksize) {
1013 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
1014 if (lbalen->flags & SWS_LBDATA)
1015 scsi_ulto4b(lbalen->lba + lba, buf);
1016 lba++;
1017 }
1018 }
1019
1020 beio->io_offset = lbalen->lba * be_lun->blocksize;
1021 beio->io_len = lba * be_lun->blocksize;
1022
1023 /* We can not do all in one run. Correct and schedule rerun. */
1024 if (len_left > 0) {
1025 lbalen->lba += lba;
1026 lbalen->len -= lba;
1027 beio->beio_cont = ctl_be_block_cw_done_ws;
1028 }
1029
1030 be_lun->dispatch(be_lun, beio);
1031}
1032
1033static void
1034ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1035 union ctl_io *io)
1036{
1037 struct ctl_be_block_io *beio;
1038 struct ctl_be_block_softc *softc;
1039 struct ctl_ptr_len_flags *ptrlen;
1040
1041 DPRINTF("entered\n");
1042
1043 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1044 softc = be_lun->softc;
1045 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1046
1047 if (ptrlen->flags != 0 || be_lun->unmap == NULL) {
1048 ctl_free_beio(beio);
1049 ctl_set_invalid_field(&io->scsiio,
1050 /*sks_valid*/ 0,
1051 /*command*/ 1,
1052 /*field*/ 0,
1053 /*bit_valid*/ 0,
1054 /*bit*/ 0);
1055 ctl_config_write_done(io);
1056 return;
1057 }
1058
1059 /*
1060 * If the I/O came down with an ordered or head of queue tag, set
1061 * the BIO_ORDERED attribute. For head of queue tags, that's
1062 * pretty much the best we can do.
1063 */
1064 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1065 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1066 beio->bio_flags = BIO_ORDERED;
1067
1068 switch (io->scsiio.tag_type) {
1069 case CTL_TAG_ORDERED:
1070 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1071 break;
1072 case CTL_TAG_HEAD_OF_QUEUE:
1073 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1074 break;
1075 case CTL_TAG_UNTAGGED:
1076 case CTL_TAG_SIMPLE:
1077 case CTL_TAG_ACA:
1078 default:
1079 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1080 break;
1081 }
1082
1083 beio->io_len = 0;
1084 beio->io_offset = -1;
1085
1086 beio->bio_cmd = BIO_DELETE;
1087 beio->ds_trans_type = DEVSTAT_FREE;
1088
1089 DPRINTF("UNMAP\n");
1090
1091 be_lun->unmap(be_lun, beio);
1092}
1093
1094static void
1095ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1096{
1097 union ctl_io *io;
1098
1099 io = beio->io;
1100 ctl_free_beio(beio);
1101 ctl_config_write_done(io);
1102}
1103
1104static void
1105ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1106 union ctl_io *io)
1107{
1108 struct ctl_be_block_io *beio;
1109 struct ctl_be_block_softc *softc;
1110
1111 DPRINTF("entered\n");
1112
1113 softc = be_lun->softc;
1114 beio = ctl_alloc_beio(softc);
1115 beio->io = io;
1116 beio->lun = be_lun;
1117 beio->beio_cont = ctl_be_block_cw_done;
1118 PRIV(io)->ptr = (void *)beio;
1119
1120 switch (io->scsiio.cdb[0]) {
1121 case SYNCHRONIZE_CACHE:
1122 case SYNCHRONIZE_CACHE_16:
1123 beio->bio_cmd = BIO_FLUSH;
1124 beio->ds_trans_type = DEVSTAT_NO_DATA;
1125 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1126 beio->io_len = 0;
1127 be_lun->lun_flush(be_lun, beio);
1128 break;
1129 case WRITE_SAME_10:
1130 case WRITE_SAME_16:
1131 ctl_be_block_cw_dispatch_ws(be_lun, io);
1132 break;
1133 case UNMAP:
1134 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1135 break;
1136 default:
1137 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1138 break;
1139 }
1140}
1141
1142SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1143SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1144SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1145SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1146
1147static void
1148ctl_be_block_next(struct ctl_be_block_io *beio)
1149{
1150 struct ctl_be_block_lun *be_lun;
1151 union ctl_io *io;
1152
1153 io = beio->io;
1154 be_lun = beio->lun;
1155 ctl_free_beio(beio);
1156 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
1157 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
970
971 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP) ||
972 (lbalen->flags & SWS_UNMAP && be_lun->unmap == NULL)) {
973 ctl_free_beio(beio);
974 ctl_set_invalid_field(&io->scsiio,
975 /*sks_valid*/ 1,
976 /*command*/ 1,
977 /*field*/ 1,
978 /*bit_valid*/ 0,
979 /*bit*/ 0);
980 ctl_config_write_done(io);
981 return;
982 }
983
984 /*
985 * If the I/O came down with an ordered or head of queue tag, set
986 * the BIO_ORDERED attribute. For head of queue tags, that's
987 * pretty much the best we can do.
988 */
989 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
990 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
991 beio->bio_flags = BIO_ORDERED;
992
993 switch (io->scsiio.tag_type) {
994 case CTL_TAG_ORDERED:
995 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
996 break;
997 case CTL_TAG_HEAD_OF_QUEUE:
998 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
999 break;
1000 case CTL_TAG_UNTAGGED:
1001 case CTL_TAG_SIMPLE:
1002 case CTL_TAG_ACA:
1003 default:
1004 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1005 break;
1006 }
1007
1008 if (lbalen->flags & SWS_UNMAP) {
1009 beio->io_offset = lbalen->lba * be_lun->blocksize;
1010 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize;
1011 beio->bio_cmd = BIO_DELETE;
1012 beio->ds_trans_type = DEVSTAT_FREE;
1013
1014 be_lun->unmap(be_lun, beio);
1015 return;
1016 }
1017
1018 beio->bio_cmd = BIO_WRITE;
1019 beio->ds_trans_type = DEVSTAT_WRITE;
1020
1021 DPRINTF("WRITE SAME at LBA %jx len %u\n",
1022 (uintmax_t)lbalen->lba, lbalen->len);
1023
1024 len_left = (uint64_t)lbalen->len * be_lun->blocksize;
1025 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1026
1027 /*
1028 * Setup the S/G entry for this chunk.
1029 */
1030 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1031 seglen -= seglen % be_lun->blocksize;
1032 beio->sg_segs[i].len = seglen;
1033 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1034
1035 DPRINTF("segment %d addr %p len %zd\n", i,
1036 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1037
1038 beio->num_segs++;
1039 len_left -= seglen;
1040
1041 buf = beio->sg_segs[i].addr;
1042 end = buf + seglen;
1043 for (; buf < end; buf += be_lun->blocksize) {
1044 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
1045 if (lbalen->flags & SWS_LBDATA)
1046 scsi_ulto4b(lbalen->lba + lba, buf);
1047 lba++;
1048 }
1049 }
1050
1051 beio->io_offset = lbalen->lba * be_lun->blocksize;
1052 beio->io_len = lba * be_lun->blocksize;
1053
1054 /* We can not do all in one run. Correct and schedule rerun. */
1055 if (len_left > 0) {
1056 lbalen->lba += lba;
1057 lbalen->len -= lba;
1058 beio->beio_cont = ctl_be_block_cw_done_ws;
1059 }
1060
1061 be_lun->dispatch(be_lun, beio);
1062}
1063
1064static void
1065ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1066 union ctl_io *io)
1067{
1068 struct ctl_be_block_io *beio;
1069 struct ctl_be_block_softc *softc;
1070 struct ctl_ptr_len_flags *ptrlen;
1071
1072 DPRINTF("entered\n");
1073
1074 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1075 softc = be_lun->softc;
1076 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1077
1078 if (ptrlen->flags != 0 || be_lun->unmap == NULL) {
1079 ctl_free_beio(beio);
1080 ctl_set_invalid_field(&io->scsiio,
1081 /*sks_valid*/ 0,
1082 /*command*/ 1,
1083 /*field*/ 0,
1084 /*bit_valid*/ 0,
1085 /*bit*/ 0);
1086 ctl_config_write_done(io);
1087 return;
1088 }
1089
1090 /*
1091 * If the I/O came down with an ordered or head of queue tag, set
1092 * the BIO_ORDERED attribute. For head of queue tags, that's
1093 * pretty much the best we can do.
1094 */
1095 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1096 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1097 beio->bio_flags = BIO_ORDERED;
1098
1099 switch (io->scsiio.tag_type) {
1100 case CTL_TAG_ORDERED:
1101 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1102 break;
1103 case CTL_TAG_HEAD_OF_QUEUE:
1104 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1105 break;
1106 case CTL_TAG_UNTAGGED:
1107 case CTL_TAG_SIMPLE:
1108 case CTL_TAG_ACA:
1109 default:
1110 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1111 break;
1112 }
1113
1114 beio->io_len = 0;
1115 beio->io_offset = -1;
1116
1117 beio->bio_cmd = BIO_DELETE;
1118 beio->ds_trans_type = DEVSTAT_FREE;
1119
1120 DPRINTF("UNMAP\n");
1121
1122 be_lun->unmap(be_lun, beio);
1123}
1124
1125static void
1126ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1127{
1128 union ctl_io *io;
1129
1130 io = beio->io;
1131 ctl_free_beio(beio);
1132 ctl_config_write_done(io);
1133}
1134
1135static void
1136ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1137 union ctl_io *io)
1138{
1139 struct ctl_be_block_io *beio;
1140 struct ctl_be_block_softc *softc;
1141
1142 DPRINTF("entered\n");
1143
1144 softc = be_lun->softc;
1145 beio = ctl_alloc_beio(softc);
1146 beio->io = io;
1147 beio->lun = be_lun;
1148 beio->beio_cont = ctl_be_block_cw_done;
1149 PRIV(io)->ptr = (void *)beio;
1150
1151 switch (io->scsiio.cdb[0]) {
1152 case SYNCHRONIZE_CACHE:
1153 case SYNCHRONIZE_CACHE_16:
1154 beio->bio_cmd = BIO_FLUSH;
1155 beio->ds_trans_type = DEVSTAT_NO_DATA;
1156 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1157 beio->io_len = 0;
1158 be_lun->lun_flush(be_lun, beio);
1159 break;
1160 case WRITE_SAME_10:
1161 case WRITE_SAME_16:
1162 ctl_be_block_cw_dispatch_ws(be_lun, io);
1163 break;
1164 case UNMAP:
1165 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1166 break;
1167 default:
1168 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1169 break;
1170 }
1171}
1172
1173SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1174SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1175SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1176SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1177
1178static void
1179ctl_be_block_next(struct ctl_be_block_io *beio)
1180{
1181 struct ctl_be_block_lun *be_lun;
1182 union ctl_io *io;
1183
1184 io = beio->io;
1185 be_lun = beio->lun;
1186 ctl_free_beio(beio);
1187 if (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)
1188 && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1158 ctl_done(io);
1189 ctl_data_submit_done(io);
1159 return;
1160 }
1161
1190 return;
1191 }
1192
1162 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
1163 io->io_hdr.status &= ~CTL_STATUS_MASK;
1164 io->io_hdr.status |= CTL_STATUS_NONE;
1165
1166 mtx_lock(&be_lun->lock);
1167 /*
1168 * XXX KDM make sure that links is okay to use at this point.
1169 * Otherwise, we either need to add another field to ctl_io_hdr,
1170 * or deal with resource allocation here.
1171 */
1172 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1173 mtx_unlock(&be_lun->lock);
1174
1175 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1176}
1177
1178static void
1179ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1180 union ctl_io *io)
1181{
1182 struct ctl_be_block_io *beio;
1183 struct ctl_be_block_softc *softc;
1193 io->io_hdr.status &= ~CTL_STATUS_MASK;
1194 io->io_hdr.status |= CTL_STATUS_NONE;
1195
1196 mtx_lock(&be_lun->lock);
1197 /*
1198 * XXX KDM make sure that links is okay to use at this point.
1199 * Otherwise, we either need to add another field to ctl_io_hdr,
1200 * or deal with resource allocation here.
1201 */
1202 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1203 mtx_unlock(&be_lun->lock);
1204
1205 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1206}
1207
1208static void
1209ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1210 union ctl_io *io)
1211{
1212 struct ctl_be_block_io *beio;
1213 struct ctl_be_block_softc *softc;
1184 struct ctl_lba_len *lbalen;
1214 struct ctl_lba_len_flags *lbalen;
1185 struct ctl_ptr_len_flags *bptrlen;
1186 uint64_t len_left, lbas;
1187 int i;
1188
1189 softc = be_lun->softc;
1190
1191 DPRINTF("entered\n");
1192
1215 struct ctl_ptr_len_flags *bptrlen;
1216 uint64_t len_left, lbas;
1217 int i;
1218
1219 softc = be_lun->softc;
1220
1221 DPRINTF("entered\n");
1222
1193 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
1194 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1195 } else {
1223 lbalen = ARGS(io);
1224 if (lbalen->flags & CTL_LLF_WRITE) {
1196 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1225 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1226 } else {
1227 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1197 }
1198
1199 beio = ctl_alloc_beio(softc);
1200 beio->io = io;
1201 beio->lun = be_lun;
1202 bptrlen = PRIV(io);
1203 bptrlen->ptr = (void *)beio;
1204
1205 /*
1206 * If the I/O came down with an ordered or head of queue tag, set
1207 * the BIO_ORDERED attribute. For head of queue tags, that's
1208 * pretty much the best we can do.
1209 *
1210 * XXX KDM we don't have a great way to easily know about the FUA
1211 * bit right now (it is decoded in ctl_read_write(), but we don't
1212 * pass that knowledge to the backend), and in any case we would
1213 * need to determine how to handle it.
1214 */
1215 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1216 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1217 beio->bio_flags = BIO_ORDERED;
1218
1219 switch (io->scsiio.tag_type) {
1220 case CTL_TAG_ORDERED:
1221 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1222 break;
1223 case CTL_TAG_HEAD_OF_QUEUE:
1224 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1225 break;
1226 case CTL_TAG_UNTAGGED:
1227 case CTL_TAG_SIMPLE:
1228 case CTL_TAG_ACA:
1229 default:
1230 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1231 break;
1232 }
1233
1228 }
1229
1230 beio = ctl_alloc_beio(softc);
1231 beio->io = io;
1232 beio->lun = be_lun;
1233 bptrlen = PRIV(io);
1234 bptrlen->ptr = (void *)beio;
1235
1236 /*
1237 * If the I/O came down with an ordered or head of queue tag, set
1238 * the BIO_ORDERED attribute. For head of queue tags, that's
1239 * pretty much the best we can do.
1240 *
1241 * XXX KDM we don't have a great way to easily know about the FUA
1242 * bit right now (it is decoded in ctl_read_write(), but we don't
1243 * pass that knowledge to the backend), and in any case we would
1244 * need to determine how to handle it.
1245 */
1246 if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1247 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1248 beio->bio_flags = BIO_ORDERED;
1249
1250 switch (io->scsiio.tag_type) {
1251 case CTL_TAG_ORDERED:
1252 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1253 break;
1254 case CTL_TAG_HEAD_OF_QUEUE:
1255 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1256 break;
1257 case CTL_TAG_UNTAGGED:
1258 case CTL_TAG_SIMPLE:
1259 case CTL_TAG_ACA:
1260 default:
1261 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1262 break;
1263 }
1264
1234 /*
1235 * This path handles read and write only. The config write path
1236 * handles flush operations.
1237 */
1238 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
1239 beio->bio_cmd = BIO_READ;
1240 beio->ds_trans_type = DEVSTAT_READ;
1241 } else {
1265 if (lbalen->flags & CTL_LLF_WRITE) {
1242 beio->bio_cmd = BIO_WRITE;
1243 beio->ds_trans_type = DEVSTAT_WRITE;
1266 beio->bio_cmd = BIO_WRITE;
1267 beio->ds_trans_type = DEVSTAT_WRITE;
1268 } else {
1269 beio->bio_cmd = BIO_READ;
1270 beio->ds_trans_type = DEVSTAT_READ;
1244 }
1245
1271 }
1272
1246 lbalen = (struct ctl_lba_len *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1247 DPRINTF("%s at LBA %jx len %u @%ju\n",
1248 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1249 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1273 DPRINTF("%s at LBA %jx len %u @%ju\n",
1274 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1275 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1250 lbas = MIN(lbalen->len - bptrlen->len,
1251 CTLBLK_MAX_IO_SIZE / be_lun->blocksize);
1276 if (lbalen->flags & CTL_LLF_COMPARE)
1277 lbas = CTLBLK_HALF_IO_SIZE;
1278 else
1279 lbas = CTLBLK_MAX_IO_SIZE;
1280 lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize);
1252 beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize;
1253 beio->io_len = lbas * be_lun->blocksize;
1254 bptrlen->len += lbas;
1255
1256 for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1257 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1258 i, CTLBLK_MAX_SEGS));
1259
1260 /*
1261 * Setup the S/G entry for this chunk.
1262 */
1263 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1264 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1265
1266 DPRINTF("segment %d addr %p len %zd\n", i,
1267 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1268
1281 beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize;
1282 beio->io_len = lbas * be_lun->blocksize;
1283 bptrlen->len += lbas;
1284
1285 for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1286 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1287 i, CTLBLK_MAX_SEGS));
1288
1289 /*
1290 * Setup the S/G entry for this chunk.
1291 */
1292 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1293 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1294
1295 DPRINTF("segment %d addr %p len %zd\n", i,
1296 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1297
1298 /* Set up second segment for compare operation. */
1299 if (lbalen->flags & CTL_LLF_COMPARE) {
1300 beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1301 beio->sg_segs[i].len;
1302 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1303 uma_zalloc(be_lun->lun_zone, M_WAITOK);
1304 }
1305
1269 beio->num_segs++;
1270 len_left -= beio->sg_segs[i].len;
1271 }
1272 if (bptrlen->len < lbalen->len)
1273 beio->beio_cont = ctl_be_block_next;
1274 io->scsiio.be_move_done = ctl_be_block_move_done;
1306 beio->num_segs++;
1307 len_left -= beio->sg_segs[i].len;
1308 }
1309 if (bptrlen->len < lbalen->len)
1310 beio->beio_cont = ctl_be_block_next;
1311 io->scsiio.be_move_done = ctl_be_block_move_done;
1275 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1312 /* For compare we have separate S/G lists for read and datamove. */
1313 if (lbalen->flags & CTL_LLF_COMPARE)
1314 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1315 else
1316 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1276 io->scsiio.kern_data_len = beio->io_len;
1277 io->scsiio.kern_data_resid = 0;
1278 io->scsiio.kern_sg_entries = beio->num_segs;
1279 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1280
1281 /*
1282 * For the read case, we need to read the data into our buffers and
1283 * then we can send it back to the user. For the write case, we
1284 * need to get the data from the user first.
1285 */
1286 if (beio->bio_cmd == BIO_READ) {
1287 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1288 be_lun->dispatch(be_lun, beio);
1289 } else {
1290 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1291#ifdef CTL_TIME_IO
1292 getbintime(&io->io_hdr.dma_start_bt);
1293#endif
1294 ctl_datamove(io);
1295 }
1296}
1297
1298static void
1299ctl_be_block_worker(void *context, int pending)
1300{
1301 struct ctl_be_block_lun *be_lun;
1302 struct ctl_be_block_softc *softc;
1303 union ctl_io *io;
1304
1305 be_lun = (struct ctl_be_block_lun *)context;
1306 softc = be_lun->softc;
1307
1308 DPRINTF("entered\n");
1309
1310 mtx_lock(&be_lun->lock);
1311 for (;;) {
1312 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1313 if (io != NULL) {
1314 struct ctl_be_block_io *beio;
1315
1316 DPRINTF("datamove queue\n");
1317
1318 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1319 ctl_io_hdr, links);
1320
1321 mtx_unlock(&be_lun->lock);
1322
1323 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1324
1325 be_lun->dispatch(be_lun, beio);
1326
1327 mtx_lock(&be_lun->lock);
1328 continue;
1329 }
1330 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1331 if (io != NULL) {
1332
1333 DPRINTF("config write queue\n");
1334
1335 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1336 ctl_io_hdr, links);
1337
1338 mtx_unlock(&be_lun->lock);
1339
1340 ctl_be_block_cw_dispatch(be_lun, io);
1341
1342 mtx_lock(&be_lun->lock);
1343 continue;
1344 }
1345 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1346 if (io != NULL) {
1347 DPRINTF("input queue\n");
1348
1349 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1350 ctl_io_hdr, links);
1351 mtx_unlock(&be_lun->lock);
1352
1353 /*
1354 * We must drop the lock, since this routine and
1355 * its children may sleep.
1356 */
1357 ctl_be_block_dispatch(be_lun, io);
1358
1359 mtx_lock(&be_lun->lock);
1360 continue;
1361 }
1362
1363 /*
1364 * If we get here, there is no work left in the queues, so
1365 * just break out and let the task queue go to sleep.
1366 */
1367 break;
1368 }
1369 mtx_unlock(&be_lun->lock);
1370}
1371
1372/*
1373 * Entry point from CTL to the backend for I/O. We queue everything to a
1374 * work thread, so this just puts the I/O on a queue and wakes up the
1375 * thread.
1376 */
1377static int
1378ctl_be_block_submit(union ctl_io *io)
1379{
1380 struct ctl_be_block_lun *be_lun;
1381 struct ctl_be_lun *ctl_be_lun;
1382
1383 DPRINTF("entered\n");
1384
1385 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1386 CTL_PRIV_BACKEND_LUN].ptr;
1387 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
1388
1389 /*
1390 * Make sure we only get SCSI I/O.
1391 */
1392 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1393 "%#x) encountered", io->io_hdr.io_type));
1394
1395 PRIV(io)->len = 0;
1396
1397 mtx_lock(&be_lun->lock);
1398 /*
1399 * XXX KDM make sure that links is okay to use at this point.
1400 * Otherwise, we either need to add another field to ctl_io_hdr,
1401 * or deal with resource allocation here.
1402 */
1403 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1404 mtx_unlock(&be_lun->lock);
1405 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1406
1407 return (CTL_RETVAL_COMPLETE);
1408}
1409
1410static int
1411ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1412 int flag, struct thread *td)
1413{
1414 struct ctl_be_block_softc *softc;
1415 int error;
1416
1417 softc = &backend_block_softc;
1418
1419 error = 0;
1420
1421 switch (cmd) {
1422 case CTL_LUN_REQ: {
1423 struct ctl_lun_req *lun_req;
1424
1425 lun_req = (struct ctl_lun_req *)addr;
1426
1427 switch (lun_req->reqtype) {
1428 case CTL_LUNREQ_CREATE:
1429 error = ctl_be_block_create(softc, lun_req);
1430 break;
1431 case CTL_LUNREQ_RM:
1432 error = ctl_be_block_rm(softc, lun_req);
1433 break;
1434 case CTL_LUNREQ_MODIFY:
1435 error = ctl_be_block_modify(softc, lun_req);
1436 break;
1437 default:
1438 lun_req->status = CTL_LUN_ERROR;
1439 snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1440 "%s: invalid LUN request type %d", __func__,
1441 lun_req->reqtype);
1442 break;
1443 }
1444 break;
1445 }
1446 default:
1447 error = ENOTTY;
1448 break;
1449 }
1450
1451 return (error);
1452}
1453
1454static int
1455ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1456{
1457 struct ctl_be_block_filedata *file_data;
1458 struct ctl_lun_create_params *params;
1459 struct vattr vattr;
1460 int error;
1461
1462 error = 0;
1463 file_data = &be_lun->backend.file;
1464 params = &req->reqdata.create;
1465
1466 be_lun->dev_type = CTL_BE_BLOCK_FILE;
1467 be_lun->dispatch = ctl_be_block_dispatch_file;
1468 be_lun->lun_flush = ctl_be_block_flush_file;
1469
1470 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1471 if (error != 0) {
1472 snprintf(req->error_str, sizeof(req->error_str),
1473 "error calling VOP_GETATTR() for file %s",
1474 be_lun->dev_path);
1475 return (error);
1476 }
1477
1478 /*
1479 * Verify that we have the ability to upgrade to exclusive
1480 * access on this file so we can trap errors at open instead
1481 * of reporting them during first access.
1482 */
1483 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1484 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1485 if (be_lun->vn->v_iflag & VI_DOOMED) {
1486 error = EBADF;
1487 snprintf(req->error_str, sizeof(req->error_str),
1488 "error locking file %s", be_lun->dev_path);
1489 return (error);
1490 }
1491 }
1492
1493
1494 file_data->cred = crhold(curthread->td_ucred);
1495 if (params->lun_size_bytes != 0)
1496 be_lun->size_bytes = params->lun_size_bytes;
1497 else
1498 be_lun->size_bytes = vattr.va_size;
1499 /*
1500 * We set the multi thread flag for file operations because all
1501 * filesystems (in theory) are capable of allowing multiple readers
1502 * of a file at once. So we want to get the maximum possible
1503 * concurrency.
1504 */
1505 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
1506
1507 /*
1508 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
1509 * With ZFS, it is 131072 bytes. Block sizes that large don't work
1510 * with disklabel and UFS on FreeBSD at least. Large block sizes
1511 * may not work with other OSes as well. So just export a sector
1512 * size of 512 bytes, which should work with any OS or
1513 * application. Since our backing is a file, any block size will
1514 * work fine for the backing store.
1515 */
1516#if 0
1517 be_lun->blocksize= vattr.va_blocksize;
1518#endif
1519 if (params->blocksize_bytes != 0)
1520 be_lun->blocksize = params->blocksize_bytes;
1521 else
1522 be_lun->blocksize = 512;
1523
1524 /*
1525 * Sanity check. The media size has to be at least one
1526 * sector long.
1527 */
1528 if (be_lun->size_bytes < be_lun->blocksize) {
1529 error = EINVAL;
1530 snprintf(req->error_str, sizeof(req->error_str),
1531 "file %s size %ju < block size %u", be_lun->dev_path,
1532 (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
1533 }
1534 return (error);
1535}
1536
1537static int
1538ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1539{
1540 struct ctl_lun_create_params *params;
1541 struct vattr vattr;
1542 struct cdev *dev;
1543 struct cdevsw *devsw;
1544 int error;
1545 off_t ps, pss, po, pos;
1546
1547 params = &req->reqdata.create;
1548
1549 be_lun->dev_type = CTL_BE_BLOCK_DEV;
1550 be_lun->dispatch = ctl_be_block_dispatch_dev;
1551 be_lun->lun_flush = ctl_be_block_flush_dev;
1552 be_lun->unmap = ctl_be_block_unmap_dev;
1553 be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
1554 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
1555 &be_lun->backend.dev.dev_ref);
1556 if (be_lun->backend.dev.csw == NULL)
1557 panic("Unable to retrieve device switch");
1558
1559 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
1560 if (error) {
1561 snprintf(req->error_str, sizeof(req->error_str),
1562 "%s: error getting vnode attributes for device %s",
1563 __func__, be_lun->dev_path);
1564 return (error);
1565 }
1566
1567 dev = be_lun->vn->v_rdev;
1568 devsw = dev->si_devsw;
1569 if (!devsw->d_ioctl) {
1570 snprintf(req->error_str, sizeof(req->error_str),
1571 "%s: no d_ioctl for device %s!", __func__,
1572 be_lun->dev_path);
1573 return (ENODEV);
1574 }
1575
1576 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
1577 (caddr_t)&be_lun->blocksize, FREAD,
1578 curthread);
1579 if (error) {
1580 snprintf(req->error_str, sizeof(req->error_str),
1581 "%s: error %d returned for DIOCGSECTORSIZE ioctl "
1582 "on %s!", __func__, error, be_lun->dev_path);
1583 return (error);
1584 }
1585
1586 /*
1587 * If the user has asked for a blocksize that is greater than the
1588 * backing device's blocksize, we can do it only if the blocksize
1589 * the user is asking for is an even multiple of the underlying
1590 * device's blocksize.
1591 */
1592 if ((params->blocksize_bytes != 0)
1593 && (params->blocksize_bytes > be_lun->blocksize)) {
1594 uint32_t bs_multiple, tmp_blocksize;
1595
1596 bs_multiple = params->blocksize_bytes / be_lun->blocksize;
1597
1598 tmp_blocksize = bs_multiple * be_lun->blocksize;
1599
1600 if (tmp_blocksize == params->blocksize_bytes) {
1601 be_lun->blocksize = params->blocksize_bytes;
1602 } else {
1603 snprintf(req->error_str, sizeof(req->error_str),
1604 "%s: requested blocksize %u is not an even "
1605 "multiple of backing device blocksize %u",
1606 __func__, params->blocksize_bytes,
1607 be_lun->blocksize);
1608 return (EINVAL);
1609
1610 }
1611 } else if ((params->blocksize_bytes != 0)
1612 && (params->blocksize_bytes != be_lun->blocksize)) {
1613 snprintf(req->error_str, sizeof(req->error_str),
1614 "%s: requested blocksize %u < backing device "
1615 "blocksize %u", __func__, params->blocksize_bytes,
1616 be_lun->blocksize);
1617 return (EINVAL);
1618 }
1619
1620 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
1621 (caddr_t)&be_lun->size_bytes, FREAD,
1622 curthread);
1623 if (error) {
1624 snprintf(req->error_str, sizeof(req->error_str),
1625 "%s: error %d returned for DIOCGMEDIASIZE "
1626 " ioctl on %s!", __func__, error,
1627 be_lun->dev_path);
1628 return (error);
1629 }
1630
1631 if (params->lun_size_bytes != 0) {
1632 if (params->lun_size_bytes > be_lun->size_bytes) {
1633 snprintf(req->error_str, sizeof(req->error_str),
1634 "%s: requested LUN size %ju > backing device "
1635 "size %ju", __func__,
1636 (uintmax_t)params->lun_size_bytes,
1637 (uintmax_t)be_lun->size_bytes);
1638 return (EINVAL);
1639 }
1640
1641 be_lun->size_bytes = params->lun_size_bytes;
1642 }
1643
1644 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
1645 (caddr_t)&ps, FREAD, curthread);
1646 if (error)
1647 ps = po = 0;
1648 else {
1649 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
1650 (caddr_t)&po, FREAD, curthread);
1651 if (error)
1652 po = 0;
1653 }
1654 pss = ps / be_lun->blocksize;
1655 pos = po / be_lun->blocksize;
1656 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
1657 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
1658 be_lun->pblockexp = fls(pss) - 1;
1659 be_lun->pblockoff = (pss - pos) % pss;
1660 }
1661
1662 return (0);
1663}
1664
1665static int
1666ctl_be_block_close(struct ctl_be_block_lun *be_lun)
1667{
1668 DROP_GIANT();
1669 if (be_lun->vn) {
1670 int flags = FREAD | FWRITE;
1671
1672 switch (be_lun->dev_type) {
1673 case CTL_BE_BLOCK_DEV:
1674 if (be_lun->backend.dev.csw) {
1675 dev_relthread(be_lun->backend.dev.cdev,
1676 be_lun->backend.dev.dev_ref);
1677 be_lun->backend.dev.csw = NULL;
1678 be_lun->backend.dev.cdev = NULL;
1679 }
1680 break;
1681 case CTL_BE_BLOCK_FILE:
1682 break;
1683 case CTL_BE_BLOCK_NONE:
1684 break;
1685 default:
1686 panic("Unexpected backend type.");
1687 break;
1688 }
1689
1690 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
1691 be_lun->vn = NULL;
1692
1693 switch (be_lun->dev_type) {
1694 case CTL_BE_BLOCK_DEV:
1695 break;
1696 case CTL_BE_BLOCK_FILE:
1697 if (be_lun->backend.file.cred != NULL) {
1698 crfree(be_lun->backend.file.cred);
1699 be_lun->backend.file.cred = NULL;
1700 }
1701 break;
1702 case CTL_BE_BLOCK_NONE:
1703 break;
1704 default:
1705 panic("Unexpected backend type.");
1706 break;
1707 }
1708 }
1709 PICKUP_GIANT();
1710
1711 return (0);
1712}
1713
1714static int
1715ctl_be_block_open(struct ctl_be_block_softc *softc,
1716 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1717{
1718 struct nameidata nd;
1719 int flags;
1720 int error;
1721
1722 /*
1723 * XXX KDM allow a read-only option?
1724 */
1725 flags = FREAD | FWRITE;
1726 error = 0;
1727
1728 if (rootvnode == NULL) {
1729 snprintf(req->error_str, sizeof(req->error_str),
1730 "%s: Root filesystem is not mounted", __func__);
1731 return (1);
1732 }
1733
1734 if (!curthread->td_proc->p_fd->fd_cdir) {
1735 curthread->td_proc->p_fd->fd_cdir = rootvnode;
1736 VREF(rootvnode);
1737 }
1738 if (!curthread->td_proc->p_fd->fd_rdir) {
1739 curthread->td_proc->p_fd->fd_rdir = rootvnode;
1740 VREF(rootvnode);
1741 }
1742 if (!curthread->td_proc->p_fd->fd_jdir) {
1743 curthread->td_proc->p_fd->fd_jdir = rootvnode;
1744 VREF(rootvnode);
1745 }
1746
1747 again:
1748 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
1749 error = vn_open(&nd, &flags, 0, NULL);
1750 if (error) {
1751 /*
1752 * This is the only reasonable guess we can make as far as
1753 * path if the user doesn't give us a fully qualified path.
1754 * If they want to specify a file, they need to specify the
1755 * full path.
1756 */
1757 if (be_lun->dev_path[0] != '/') {
1758 char *dev_path = "/dev/";
1759 char *dev_name;
1760
1761 /* Try adding device path at beginning of name */
1762 dev_name = malloc(strlen(be_lun->dev_path)
1763 + strlen(dev_path) + 1,
1764 M_CTLBLK, M_WAITOK);
1765 if (dev_name) {
1766 sprintf(dev_name, "%s%s", dev_path,
1767 be_lun->dev_path);
1768 free(be_lun->dev_path, M_CTLBLK);
1769 be_lun->dev_path = dev_name;
1770 goto again;
1771 }
1772 }
1773 snprintf(req->error_str, sizeof(req->error_str),
1774 "%s: error opening %s", __func__, be_lun->dev_path);
1775 return (error);
1776 }
1777
1778 NDFREE(&nd, NDF_ONLY_PNBUF);
1779
1780 be_lun->vn = nd.ni_vp;
1781
1782 /* We only support disks and files. */
1783 if (vn_isdisk(be_lun->vn, &error)) {
1784 error = ctl_be_block_open_dev(be_lun, req);
1785 } else if (be_lun->vn->v_type == VREG) {
1786 error = ctl_be_block_open_file(be_lun, req);
1787 } else {
1788 error = EINVAL;
1789 snprintf(req->error_str, sizeof(req->error_str),
1790 "%s is not a disk or plain file", be_lun->dev_path);
1791 }
1792 VOP_UNLOCK(be_lun->vn, 0);
1793
1794 if (error != 0) {
1795 ctl_be_block_close(be_lun);
1796 return (error);
1797 }
1798
1799 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1800 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
1801
1802 return (0);
1803}
1804
1805static int
1806ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
1807{
1808 struct ctl_be_block_lun *be_lun;
1809 struct ctl_lun_create_params *params;
1810 char num_thread_str[16];
1811 char tmpstr[32];
1812 char *value;
1813 int retval, num_threads, unmap;
1814 int tmp_num_threads;
1815
1816 params = &req->reqdata.create;
1817 retval = 0;
1818
1819 num_threads = cbb_num_threads;
1820
1821 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
1822
1823 be_lun->softc = softc;
1824 STAILQ_INIT(&be_lun->input_queue);
1825 STAILQ_INIT(&be_lun->config_write_queue);
1826 STAILQ_INIT(&be_lun->datamove_queue);
1827 sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
1828 mtx_init(&be_lun->lock, be_lun->lunname, NULL, MTX_DEF);
1829 ctl_init_opts(&be_lun->ctl_be_lun, req);
1830
1831 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
1832 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
1833
1834 if (be_lun->lun_zone == NULL) {
1835 snprintf(req->error_str, sizeof(req->error_str),
1836 "%s: error allocating UMA zone", __func__);
1837 goto bailout_error;
1838 }
1839
1840 if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1841 be_lun->ctl_be_lun.lun_type = params->device_type;
1842 else
1843 be_lun->ctl_be_lun.lun_type = T_DIRECT;
1844
1845 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
1846 value = ctl_get_opt(&be_lun->ctl_be_lun, "file");
1847 if (value == NULL) {
1848 snprintf(req->error_str, sizeof(req->error_str),
1849 "%s: no file argument specified", __func__);
1850 goto bailout_error;
1851 }
1852 be_lun->dev_path = strdup(value, M_CTLBLK);
1853
1854 retval = ctl_be_block_open(softc, be_lun, req);
1855 if (retval != 0) {
1856 retval = 0;
1857 goto bailout_error;
1858 }
1859
1860 /*
1861 * Tell the user the size of the file/device.
1862 */
1863 params->lun_size_bytes = be_lun->size_bytes;
1864
1865 /*
1866 * The maximum LBA is the size - 1.
1867 */
1868 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
1869 } else {
1870 /*
1871 * For processor devices, we don't have any size.
1872 */
1873 be_lun->blocksize = 0;
1874 be_lun->pblockexp = 0;
1875 be_lun->pblockoff = 0;
1876 be_lun->size_blocks = 0;
1877 be_lun->size_bytes = 0;
1878 be_lun->ctl_be_lun.maxlba = 0;
1879 params->lun_size_bytes = 0;
1880
1881 /*
1882 * Default to just 1 thread for processor devices.
1883 */
1884 num_threads = 1;
1885 }
1886
1887 /*
1888 * XXX This searching loop might be refactored to be combined with
1889 * the loop above,
1890 */
1891 value = ctl_get_opt(&be_lun->ctl_be_lun, "num_threads");
1892 if (value != NULL) {
1893 tmp_num_threads = strtol(value, NULL, 0);
1894
1895 /*
1896 * We don't let the user specify less than one
1897 * thread, but hope he's clueful enough not to
1898 * specify 1000 threads.
1899 */
1900 if (tmp_num_threads < 1) {
1901 snprintf(req->error_str, sizeof(req->error_str),
1902 "%s: invalid number of threads %s",
1903 __func__, num_thread_str);
1904 goto bailout_error;
1905 }
1906 num_threads = tmp_num_threads;
1907 }
1908 unmap = 0;
1909 value = ctl_get_opt(&be_lun->ctl_be_lun, "unmap");
1910 if (value != NULL && strcmp(value, "on") == 0)
1911 unmap = 1;
1912
1913 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
1914 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
1915 if (unmap)
1916 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
1917 be_lun->ctl_be_lun.be_lun = be_lun;
1918 be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
1919 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
1920 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
1921 /* Tell the user the blocksize we ended up using */
1922 params->blocksize_bytes = be_lun->blocksize;
1923 if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1924 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
1925 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
1926 } else
1927 be_lun->ctl_be_lun.req_lun_id = 0;
1928
1929 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
1930 be_lun->ctl_be_lun.lun_config_status =
1931 ctl_be_block_lun_config_status;
1932 be_lun->ctl_be_lun.be = &ctl_be_block_driver;
1933
1934 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1935 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
1936 softc->num_luns);
1937 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
1938 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1939 sizeof(tmpstr)));
1940
1941 /* Tell the user what we used for a serial number */
1942 strncpy((char *)params->serial_num, tmpstr,
1943 ctl_min(sizeof(params->serial_num), sizeof(tmpstr)));
1944 } else {
1945 strncpy((char *)be_lun->ctl_be_lun.serial_num,
1946 params->serial_num,
1947 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1948 sizeof(params->serial_num)));
1949 }
1950 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1951 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
1952 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
1953 ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1954 sizeof(tmpstr)));
1955
1956 /* Tell the user what we used for a device ID */
1957 strncpy((char *)params->device_id, tmpstr,
1958 ctl_min(sizeof(params->device_id), sizeof(tmpstr)));
1959 } else {
1960 strncpy((char *)be_lun->ctl_be_lun.device_id,
1961 params->device_id,
1962 ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1963 sizeof(params->device_id)));
1964 }
1965
1966 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
1967
1968 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
1969 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
1970
1971 if (be_lun->io_taskqueue == NULL) {
1972 snprintf(req->error_str, sizeof(req->error_str),
1973 "%s: Unable to create taskqueue", __func__);
1974 goto bailout_error;
1975 }
1976
1977 /*
1978 * Note that we start the same number of threads by default for
1979 * both the file case and the block device case. For the file
1980 * case, we need multiple threads to allow concurrency, because the
1981 * vnode interface is designed to be a blocking interface. For the
1982 * block device case, ZFS zvols at least will block the caller's
1983 * context in many instances, and so we need multiple threads to
1984 * overcome that problem. Other block devices don't need as many
1985 * threads, but they shouldn't cause too many problems.
1986 *
1987 * If the user wants to just have a single thread for a block
1988 * device, he can specify that when the LUN is created, or change
1989 * the tunable/sysctl to alter the default number of threads.
1990 */
1991 retval = taskqueue_start_threads(&be_lun->io_taskqueue,
1992 /*num threads*/num_threads,
1993 /*priority*/PWAIT,
1994 /*thread name*/
1995 "%s taskq", be_lun->lunname);
1996
1997 if (retval != 0)
1998 goto bailout_error;
1999
2000 be_lun->num_threads = num_threads;
2001
2002 mtx_lock(&softc->lock);
2003 softc->num_luns++;
2004 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2005
2006 mtx_unlock(&softc->lock);
2007
2008 retval = ctl_add_lun(&be_lun->ctl_be_lun);
2009 if (retval != 0) {
2010 mtx_lock(&softc->lock);
2011 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2012 links);
2013 softc->num_luns--;
2014 mtx_unlock(&softc->lock);
2015 snprintf(req->error_str, sizeof(req->error_str),
2016 "%s: ctl_add_lun() returned error %d, see dmesg for "
2017 "details", __func__, retval);
2018 retval = 0;
2019 goto bailout_error;
2020 }
2021
2022 mtx_lock(&softc->lock);
2023
2024 /*
2025 * Tell the config_status routine that we're waiting so it won't
2026 * clean up the LUN in the event of an error.
2027 */
2028 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2029
2030 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2031 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2032 if (retval == EINTR)
2033 break;
2034 }
2035 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2036
2037 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2038 snprintf(req->error_str, sizeof(req->error_str),
2039 "%s: LUN configuration error, see dmesg for details",
2040 __func__);
2041 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2042 links);
2043 softc->num_luns--;
2044 mtx_unlock(&softc->lock);
2045 goto bailout_error;
2046 } else {
2047 params->req_lun_id = be_lun->ctl_be_lun.lun_id;
2048 }
2049
2050 mtx_unlock(&softc->lock);
2051
2052 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2053 be_lun->blocksize,
2054 DEVSTAT_ALL_SUPPORTED,
2055 be_lun->ctl_be_lun.lun_type
2056 | DEVSTAT_TYPE_IF_OTHER,
2057 DEVSTAT_PRIORITY_OTHER);
2058
2059
2060 req->status = CTL_LUN_OK;
2061
2062 return (retval);
2063
2064bailout_error:
2065 req->status = CTL_LUN_ERROR;
2066
2067 if (be_lun->io_taskqueue != NULL)
2068 taskqueue_free(be_lun->io_taskqueue);
2069 ctl_be_block_close(be_lun);
2070 if (be_lun->dev_path != NULL)
2071 free(be_lun->dev_path, M_CTLBLK);
2072 if (be_lun->lun_zone != NULL)
2073 uma_zdestroy(be_lun->lun_zone);
2074 ctl_free_opts(&be_lun->ctl_be_lun);
2075 mtx_destroy(&be_lun->lock);
2076 free(be_lun, M_CTLBLK);
2077
2078 return (retval);
2079}
2080
2081static int
2082ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2083{
2084 struct ctl_lun_rm_params *params;
2085 struct ctl_be_block_lun *be_lun;
2086 int retval;
2087
2088 params = &req->reqdata.rm;
2089
2090 mtx_lock(&softc->lock);
2091
2092 be_lun = NULL;
2093
2094 STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2095 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2096 break;
2097 }
2098 mtx_unlock(&softc->lock);
2099
2100 if (be_lun == NULL) {
2101 snprintf(req->error_str, sizeof(req->error_str),
2102 "%s: LUN %u is not managed by the block backend",
2103 __func__, params->lun_id);
2104 goto bailout_error;
2105 }
2106
2107 retval = ctl_disable_lun(&be_lun->ctl_be_lun);
2108
2109 if (retval != 0) {
2110 snprintf(req->error_str, sizeof(req->error_str),
2111 "%s: error %d returned from ctl_disable_lun() for "
2112 "LUN %d", __func__, retval, params->lun_id);
2113 goto bailout_error;
2114
2115 }
2116
2117 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
2118 if (retval != 0) {
2119 snprintf(req->error_str, sizeof(req->error_str),
2120 "%s: error %d returned from ctl_invalidate_lun() for "
2121 "LUN %d", __func__, retval, params->lun_id);
2122 goto bailout_error;
2123 }
2124
2125 mtx_lock(&softc->lock);
2126
2127 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2128
2129 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2130 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2131 if (retval == EINTR)
2132 break;
2133 }
2134
2135 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2136
2137 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2138 snprintf(req->error_str, sizeof(req->error_str),
2139 "%s: interrupted waiting for LUN to be freed",
2140 __func__);
2141 mtx_unlock(&softc->lock);
2142 goto bailout_error;
2143 }
2144
2145 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2146
2147 softc->num_luns--;
2148 mtx_unlock(&softc->lock);
2149
2150 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
2151
2152 taskqueue_free(be_lun->io_taskqueue);
2153
2154 ctl_be_block_close(be_lun);
2155
2156 if (be_lun->disk_stats != NULL)
2157 devstat_remove_entry(be_lun->disk_stats);
2158
2159 uma_zdestroy(be_lun->lun_zone);
2160
2161 ctl_free_opts(&be_lun->ctl_be_lun);
2162 free(be_lun->dev_path, M_CTLBLK);
2163
2164 free(be_lun, M_CTLBLK);
2165
2166 req->status = CTL_LUN_OK;
2167
2168 return (0);
2169
2170bailout_error:
2171
2172 req->status = CTL_LUN_ERROR;
2173
2174 return (0);
2175}
2176
2177static int
2178ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2179 struct ctl_lun_req *req)
2180{
2181 struct vattr vattr;
2182 int error;
2183 struct ctl_lun_modify_params *params;
2184
2185 params = &req->reqdata.modify;
2186
2187 if (params->lun_size_bytes != 0) {
2188 be_lun->size_bytes = params->lun_size_bytes;
2189 } else {
2190 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2191 if (error != 0) {
2192 snprintf(req->error_str, sizeof(req->error_str),
2193 "error calling VOP_GETATTR() for file %s",
2194 be_lun->dev_path);
2195 return (error);
2196 }
2197
2198 be_lun->size_bytes = vattr.va_size;
2199 }
2200
2201 return (0);
2202}
2203
2204static int
2205ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2206 struct ctl_lun_req *req)
2207{
2208 struct cdev *dev;
2209 struct cdevsw *devsw;
2210 int error;
2211 struct ctl_lun_modify_params *params;
2212 uint64_t size_bytes;
2213
2214 params = &req->reqdata.modify;
2215
2216 dev = be_lun->vn->v_rdev;
2217 devsw = dev->si_devsw;
2218 if (!devsw->d_ioctl) {
2219 snprintf(req->error_str, sizeof(req->error_str),
2220 "%s: no d_ioctl for device %s!", __func__,
2221 be_lun->dev_path);
2222 return (ENODEV);
2223 }
2224
2225 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
2226 (caddr_t)&size_bytes, FREAD,
2227 curthread);
2228 if (error) {
2229 snprintf(req->error_str, sizeof(req->error_str),
2230 "%s: error %d returned for DIOCGMEDIASIZE ioctl "
2231 "on %s!", __func__, error, be_lun->dev_path);
2232 return (error);
2233 }
2234
2235 if (params->lun_size_bytes != 0) {
2236 if (params->lun_size_bytes > size_bytes) {
2237 snprintf(req->error_str, sizeof(req->error_str),
2238 "%s: requested LUN size %ju > backing device "
2239 "size %ju", __func__,
2240 (uintmax_t)params->lun_size_bytes,
2241 (uintmax_t)size_bytes);
2242 return (EINVAL);
2243 }
2244
2245 be_lun->size_bytes = params->lun_size_bytes;
2246 } else {
2247 be_lun->size_bytes = size_bytes;
2248 }
2249
2250 return (0);
2251}
2252
2253static int
2254ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2255{
2256 struct ctl_lun_modify_params *params;
2257 struct ctl_be_block_lun *be_lun;
2258 int error;
2259
2260 params = &req->reqdata.modify;
2261
2262 mtx_lock(&softc->lock);
2263
2264 be_lun = NULL;
2265
2266 STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2267 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2268 break;
2269 }
2270 mtx_unlock(&softc->lock);
2271
2272 if (be_lun == NULL) {
2273 snprintf(req->error_str, sizeof(req->error_str),
2274 "%s: LUN %u is not managed by the block backend",
2275 __func__, params->lun_id);
2276 goto bailout_error;
2277 }
2278
2279 if (params->lun_size_bytes != 0) {
2280 if (params->lun_size_bytes < be_lun->blocksize) {
2281 snprintf(req->error_str, sizeof(req->error_str),
2282 "%s: LUN size %ju < blocksize %u", __func__,
2283 params->lun_size_bytes, be_lun->blocksize);
2284 goto bailout_error;
2285 }
2286 }
2287
2288 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2289
2290 if (be_lun->vn->v_type == VREG)
2291 error = ctl_be_block_modify_file(be_lun, req);
2292 else
2293 error = ctl_be_block_modify_dev(be_lun, req);
2294
2295 VOP_UNLOCK(be_lun->vn, 0);
2296
2297 if (error != 0)
2298 goto bailout_error;
2299
2300 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
2301
2302 /*
2303 * The maximum LBA is the size - 1.
2304 *
2305 * XXX: Note that this field is being updated without locking,
2306 * which might cause problems on 32-bit architectures.
2307 */
2308 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
2309 ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
2310
2311 /* Tell the user the exact size we ended up using */
2312 params->lun_size_bytes = be_lun->size_bytes;
2313
2314 req->status = CTL_LUN_OK;
2315
2316 return (0);
2317
2318bailout_error:
2319 req->status = CTL_LUN_ERROR;
2320
2321 return (0);
2322}
2323
2324static void
2325ctl_be_block_lun_shutdown(void *be_lun)
2326{
2327 struct ctl_be_block_lun *lun;
2328 struct ctl_be_block_softc *softc;
2329
2330 lun = (struct ctl_be_block_lun *)be_lun;
2331
2332 softc = lun->softc;
2333
2334 mtx_lock(&softc->lock);
2335 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2336 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2337 wakeup(lun);
2338 mtx_unlock(&softc->lock);
2339
2340}
2341
2342static void
2343ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2344{
2345 struct ctl_be_block_lun *lun;
2346 struct ctl_be_block_softc *softc;
2347
2348 lun = (struct ctl_be_block_lun *)be_lun;
2349 softc = lun->softc;
2350
2351 if (status == CTL_LUN_CONFIG_OK) {
2352 mtx_lock(&softc->lock);
2353 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2354 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2355 wakeup(lun);
2356 mtx_unlock(&softc->lock);
2357
2358 /*
2359 * We successfully added the LUN, attempt to enable it.
2360 */
2361 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
2362 printf("%s: ctl_enable_lun() failed!\n", __func__);
2363 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
2364 printf("%s: ctl_invalidate_lun() failed!\n",
2365 __func__);
2366 }
2367 }
2368
2369 return;
2370 }
2371
2372
2373 mtx_lock(&softc->lock);
2374 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2375 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2376 wakeup(lun);
2377 mtx_unlock(&softc->lock);
2378}
2379
2380
2381static int
2382ctl_be_block_config_write(union ctl_io *io)
2383{
2384 struct ctl_be_block_lun *be_lun;
2385 struct ctl_be_lun *ctl_be_lun;
2386 int retval;
2387
2388 retval = 0;
2389
2390 DPRINTF("entered\n");
2391
2392 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2393 CTL_PRIV_BACKEND_LUN].ptr;
2394 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
2395
2396 switch (io->scsiio.cdb[0]) {
2397 case SYNCHRONIZE_CACHE:
2398 case SYNCHRONIZE_CACHE_16:
2399 case WRITE_SAME_10:
2400 case WRITE_SAME_16:
2401 case UNMAP:
2402 /*
2403 * The upper level CTL code will filter out any CDBs with
2404 * the immediate bit set and return the proper error.
2405 *
2406 * We don't really need to worry about what LBA range the
2407 * user asked to be synced out. When they issue a sync
2408 * cache command, we'll sync out the whole thing.
2409 */
2410 mtx_lock(&be_lun->lock);
2411 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2412 links);
2413 mtx_unlock(&be_lun->lock);
2414 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2415 break;
2416 case START_STOP_UNIT: {
2417 struct scsi_start_stop_unit *cdb;
2418
2419 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2420
2421 if (cdb->how & SSS_START)
2422 retval = ctl_start_lun(ctl_be_lun);
2423 else {
2424 retval = ctl_stop_lun(ctl_be_lun);
2425 /*
2426 * XXX KDM Copan-specific offline behavior.
2427 * Figure out a reasonable way to port this?
2428 */
2429#ifdef NEEDTOPORT
2430 if ((retval == 0)
2431 && (cdb->byte2 & SSS_ONOFFLINE))
2432 retval = ctl_lun_offline(ctl_be_lun);
2433#endif
2434 }
2435
2436 /*
2437 * In general, the above routines should not fail. They
2438 * just set state for the LUN. So we've got something
2439 * pretty wrong here if we can't start or stop the LUN.
2440 */
2441 if (retval != 0) {
2442 ctl_set_internal_failure(&io->scsiio,
2443 /*sks_valid*/ 1,
2444 /*retry_count*/ 0xf051);
2445 retval = CTL_RETVAL_COMPLETE;
2446 } else {
2447 ctl_set_success(&io->scsiio);
2448 }
2449 ctl_config_write_done(io);
2450 break;
2451 }
2452 default:
2453 ctl_set_invalid_opcode(&io->scsiio);
2454 ctl_config_write_done(io);
2455 retval = CTL_RETVAL_COMPLETE;
2456 break;
2457 }
2458
2459 return (retval);
2460
2461}
2462
2463static int
2464ctl_be_block_config_read(union ctl_io *io)
2465{
2466 return (0);
2467}
2468
2469static int
2470ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2471{
2472 struct ctl_be_block_lun *lun;
2473 int retval;
2474
2475 lun = (struct ctl_be_block_lun *)be_lun;
2476 retval = 0;
2477
2478 retval = sbuf_printf(sb, "<num_threads>");
2479
2480 if (retval != 0)
2481 goto bailout;
2482
2483 retval = sbuf_printf(sb, "%d", lun->num_threads);
2484
2485 if (retval != 0)
2486 goto bailout;
2487
2488 retval = sbuf_printf(sb, "</num_threads>");
2489
2490bailout:
2491
2492 return (retval);
2493}
2494
2495int
2496ctl_be_block_init(void)
2497{
2498 struct ctl_be_block_softc *softc;
2499 int retval;
2500
2501 softc = &backend_block_softc;
2502 retval = 0;
2503
2504 mtx_init(&softc->lock, "ctlblk", NULL, MTX_DEF);
2505 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2506 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2507 STAILQ_INIT(&softc->disk_list);
2508 STAILQ_INIT(&softc->lun_list);
2509
2510 return (retval);
2511}
1317 io->scsiio.kern_data_len = beio->io_len;
1318 io->scsiio.kern_data_resid = 0;
1319 io->scsiio.kern_sg_entries = beio->num_segs;
1320 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1321
1322 /*
1323 * For the read case, we need to read the data into our buffers and
1324 * then we can send it back to the user. For the write case, we
1325 * need to get the data from the user first.
1326 */
1327 if (beio->bio_cmd == BIO_READ) {
1328 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1329 be_lun->dispatch(be_lun, beio);
1330 } else {
1331 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1332#ifdef CTL_TIME_IO
1333 getbintime(&io->io_hdr.dma_start_bt);
1334#endif
1335 ctl_datamove(io);
1336 }
1337}
1338
1339static void
1340ctl_be_block_worker(void *context, int pending)
1341{
1342 struct ctl_be_block_lun *be_lun;
1343 struct ctl_be_block_softc *softc;
1344 union ctl_io *io;
1345
1346 be_lun = (struct ctl_be_block_lun *)context;
1347 softc = be_lun->softc;
1348
1349 DPRINTF("entered\n");
1350
1351 mtx_lock(&be_lun->lock);
1352 for (;;) {
1353 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1354 if (io != NULL) {
1355 struct ctl_be_block_io *beio;
1356
1357 DPRINTF("datamove queue\n");
1358
1359 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1360 ctl_io_hdr, links);
1361
1362 mtx_unlock(&be_lun->lock);
1363
1364 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1365
1366 be_lun->dispatch(be_lun, beio);
1367
1368 mtx_lock(&be_lun->lock);
1369 continue;
1370 }
1371 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1372 if (io != NULL) {
1373
1374 DPRINTF("config write queue\n");
1375
1376 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1377 ctl_io_hdr, links);
1378
1379 mtx_unlock(&be_lun->lock);
1380
1381 ctl_be_block_cw_dispatch(be_lun, io);
1382
1383 mtx_lock(&be_lun->lock);
1384 continue;
1385 }
1386 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1387 if (io != NULL) {
1388 DPRINTF("input queue\n");
1389
1390 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1391 ctl_io_hdr, links);
1392 mtx_unlock(&be_lun->lock);
1393
1394 /*
1395 * We must drop the lock, since this routine and
1396 * its children may sleep.
1397 */
1398 ctl_be_block_dispatch(be_lun, io);
1399
1400 mtx_lock(&be_lun->lock);
1401 continue;
1402 }
1403
1404 /*
1405 * If we get here, there is no work left in the queues, so
1406 * just break out and let the task queue go to sleep.
1407 */
1408 break;
1409 }
1410 mtx_unlock(&be_lun->lock);
1411}
1412
1413/*
1414 * Entry point from CTL to the backend for I/O. We queue everything to a
1415 * work thread, so this just puts the I/O on a queue and wakes up the
1416 * thread.
1417 */
1418static int
1419ctl_be_block_submit(union ctl_io *io)
1420{
1421 struct ctl_be_block_lun *be_lun;
1422 struct ctl_be_lun *ctl_be_lun;
1423
1424 DPRINTF("entered\n");
1425
1426 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1427 CTL_PRIV_BACKEND_LUN].ptr;
1428 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
1429
1430 /*
1431 * Make sure we only get SCSI I/O.
1432 */
1433 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1434 "%#x) encountered", io->io_hdr.io_type));
1435
1436 PRIV(io)->len = 0;
1437
1438 mtx_lock(&be_lun->lock);
1439 /*
1440 * XXX KDM make sure that links is okay to use at this point.
1441 * Otherwise, we either need to add another field to ctl_io_hdr,
1442 * or deal with resource allocation here.
1443 */
1444 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1445 mtx_unlock(&be_lun->lock);
1446 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1447
1448 return (CTL_RETVAL_COMPLETE);
1449}
1450
1451static int
1452ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1453 int flag, struct thread *td)
1454{
1455 struct ctl_be_block_softc *softc;
1456 int error;
1457
1458 softc = &backend_block_softc;
1459
1460 error = 0;
1461
1462 switch (cmd) {
1463 case CTL_LUN_REQ: {
1464 struct ctl_lun_req *lun_req;
1465
1466 lun_req = (struct ctl_lun_req *)addr;
1467
1468 switch (lun_req->reqtype) {
1469 case CTL_LUNREQ_CREATE:
1470 error = ctl_be_block_create(softc, lun_req);
1471 break;
1472 case CTL_LUNREQ_RM:
1473 error = ctl_be_block_rm(softc, lun_req);
1474 break;
1475 case CTL_LUNREQ_MODIFY:
1476 error = ctl_be_block_modify(softc, lun_req);
1477 break;
1478 default:
1479 lun_req->status = CTL_LUN_ERROR;
1480 snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1481 "%s: invalid LUN request type %d", __func__,
1482 lun_req->reqtype);
1483 break;
1484 }
1485 break;
1486 }
1487 default:
1488 error = ENOTTY;
1489 break;
1490 }
1491
1492 return (error);
1493}
1494
1495static int
1496ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1497{
1498 struct ctl_be_block_filedata *file_data;
1499 struct ctl_lun_create_params *params;
1500 struct vattr vattr;
1501 int error;
1502
1503 error = 0;
1504 file_data = &be_lun->backend.file;
1505 params = &req->reqdata.create;
1506
1507 be_lun->dev_type = CTL_BE_BLOCK_FILE;
1508 be_lun->dispatch = ctl_be_block_dispatch_file;
1509 be_lun->lun_flush = ctl_be_block_flush_file;
1510
1511 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1512 if (error != 0) {
1513 snprintf(req->error_str, sizeof(req->error_str),
1514 "error calling VOP_GETATTR() for file %s",
1515 be_lun->dev_path);
1516 return (error);
1517 }
1518
1519 /*
1520 * Verify that we have the ability to upgrade to exclusive
1521 * access on this file so we can trap errors at open instead
1522 * of reporting them during first access.
1523 */
1524 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1525 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1526 if (be_lun->vn->v_iflag & VI_DOOMED) {
1527 error = EBADF;
1528 snprintf(req->error_str, sizeof(req->error_str),
1529 "error locking file %s", be_lun->dev_path);
1530 return (error);
1531 }
1532 }
1533
1534
1535 file_data->cred = crhold(curthread->td_ucred);
1536 if (params->lun_size_bytes != 0)
1537 be_lun->size_bytes = params->lun_size_bytes;
1538 else
1539 be_lun->size_bytes = vattr.va_size;
1540 /*
1541 * We set the multi thread flag for file operations because all
1542 * filesystems (in theory) are capable of allowing multiple readers
1543 * of a file at once. So we want to get the maximum possible
1544 * concurrency.
1545 */
1546 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
1547
1548 /*
1549 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
1550 * With ZFS, it is 131072 bytes. Block sizes that large don't work
1551 * with disklabel and UFS on FreeBSD at least. Large block sizes
1552 * may not work with other OSes as well. So just export a sector
1553 * size of 512 bytes, which should work with any OS or
1554 * application. Since our backing is a file, any block size will
1555 * work fine for the backing store.
1556 */
1557#if 0
1558 be_lun->blocksize= vattr.va_blocksize;
1559#endif
1560 if (params->blocksize_bytes != 0)
1561 be_lun->blocksize = params->blocksize_bytes;
1562 else
1563 be_lun->blocksize = 512;
1564
1565 /*
1566 * Sanity check. The media size has to be at least one
1567 * sector long.
1568 */
1569 if (be_lun->size_bytes < be_lun->blocksize) {
1570 error = EINVAL;
1571 snprintf(req->error_str, sizeof(req->error_str),
1572 "file %s size %ju < block size %u", be_lun->dev_path,
1573 (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
1574 }
1575 return (error);
1576}
1577
1578static int
1579ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1580{
1581 struct ctl_lun_create_params *params;
1582 struct vattr vattr;
1583 struct cdev *dev;
1584 struct cdevsw *devsw;
1585 int error;
1586 off_t ps, pss, po, pos;
1587
1588 params = &req->reqdata.create;
1589
1590 be_lun->dev_type = CTL_BE_BLOCK_DEV;
1591 be_lun->dispatch = ctl_be_block_dispatch_dev;
1592 be_lun->lun_flush = ctl_be_block_flush_dev;
1593 be_lun->unmap = ctl_be_block_unmap_dev;
1594 be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
1595 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
1596 &be_lun->backend.dev.dev_ref);
1597 if (be_lun->backend.dev.csw == NULL)
1598 panic("Unable to retrieve device switch");
1599
1600 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
1601 if (error) {
1602 snprintf(req->error_str, sizeof(req->error_str),
1603 "%s: error getting vnode attributes for device %s",
1604 __func__, be_lun->dev_path);
1605 return (error);
1606 }
1607
1608 dev = be_lun->vn->v_rdev;
1609 devsw = dev->si_devsw;
1610 if (!devsw->d_ioctl) {
1611 snprintf(req->error_str, sizeof(req->error_str),
1612 "%s: no d_ioctl for device %s!", __func__,
1613 be_lun->dev_path);
1614 return (ENODEV);
1615 }
1616
1617 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
1618 (caddr_t)&be_lun->blocksize, FREAD,
1619 curthread);
1620 if (error) {
1621 snprintf(req->error_str, sizeof(req->error_str),
1622 "%s: error %d returned for DIOCGSECTORSIZE ioctl "
1623 "on %s!", __func__, error, be_lun->dev_path);
1624 return (error);
1625 }
1626
1627 /*
1628 * If the user has asked for a blocksize that is greater than the
1629 * backing device's blocksize, we can do it only if the blocksize
1630 * the user is asking for is an even multiple of the underlying
1631 * device's blocksize.
1632 */
1633 if ((params->blocksize_bytes != 0)
1634 && (params->blocksize_bytes > be_lun->blocksize)) {
1635 uint32_t bs_multiple, tmp_blocksize;
1636
1637 bs_multiple = params->blocksize_bytes / be_lun->blocksize;
1638
1639 tmp_blocksize = bs_multiple * be_lun->blocksize;
1640
1641 if (tmp_blocksize == params->blocksize_bytes) {
1642 be_lun->blocksize = params->blocksize_bytes;
1643 } else {
1644 snprintf(req->error_str, sizeof(req->error_str),
1645 "%s: requested blocksize %u is not an even "
1646 "multiple of backing device blocksize %u",
1647 __func__, params->blocksize_bytes,
1648 be_lun->blocksize);
1649 return (EINVAL);
1650
1651 }
1652 } else if ((params->blocksize_bytes != 0)
1653 && (params->blocksize_bytes != be_lun->blocksize)) {
1654 snprintf(req->error_str, sizeof(req->error_str),
1655 "%s: requested blocksize %u < backing device "
1656 "blocksize %u", __func__, params->blocksize_bytes,
1657 be_lun->blocksize);
1658 return (EINVAL);
1659 }
1660
1661 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
1662 (caddr_t)&be_lun->size_bytes, FREAD,
1663 curthread);
1664 if (error) {
1665 snprintf(req->error_str, sizeof(req->error_str),
1666 "%s: error %d returned for DIOCGMEDIASIZE "
1667 " ioctl on %s!", __func__, error,
1668 be_lun->dev_path);
1669 return (error);
1670 }
1671
1672 if (params->lun_size_bytes != 0) {
1673 if (params->lun_size_bytes > be_lun->size_bytes) {
1674 snprintf(req->error_str, sizeof(req->error_str),
1675 "%s: requested LUN size %ju > backing device "
1676 "size %ju", __func__,
1677 (uintmax_t)params->lun_size_bytes,
1678 (uintmax_t)be_lun->size_bytes);
1679 return (EINVAL);
1680 }
1681
1682 be_lun->size_bytes = params->lun_size_bytes;
1683 }
1684
1685 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
1686 (caddr_t)&ps, FREAD, curthread);
1687 if (error)
1688 ps = po = 0;
1689 else {
1690 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
1691 (caddr_t)&po, FREAD, curthread);
1692 if (error)
1693 po = 0;
1694 }
1695 pss = ps / be_lun->blocksize;
1696 pos = po / be_lun->blocksize;
1697 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
1698 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
1699 be_lun->pblockexp = fls(pss) - 1;
1700 be_lun->pblockoff = (pss - pos) % pss;
1701 }
1702
1703 return (0);
1704}
1705
1706static int
1707ctl_be_block_close(struct ctl_be_block_lun *be_lun)
1708{
1709 DROP_GIANT();
1710 if (be_lun->vn) {
1711 int flags = FREAD | FWRITE;
1712
1713 switch (be_lun->dev_type) {
1714 case CTL_BE_BLOCK_DEV:
1715 if (be_lun->backend.dev.csw) {
1716 dev_relthread(be_lun->backend.dev.cdev,
1717 be_lun->backend.dev.dev_ref);
1718 be_lun->backend.dev.csw = NULL;
1719 be_lun->backend.dev.cdev = NULL;
1720 }
1721 break;
1722 case CTL_BE_BLOCK_FILE:
1723 break;
1724 case CTL_BE_BLOCK_NONE:
1725 break;
1726 default:
1727 panic("Unexpected backend type.");
1728 break;
1729 }
1730
1731 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
1732 be_lun->vn = NULL;
1733
1734 switch (be_lun->dev_type) {
1735 case CTL_BE_BLOCK_DEV:
1736 break;
1737 case CTL_BE_BLOCK_FILE:
1738 if (be_lun->backend.file.cred != NULL) {
1739 crfree(be_lun->backend.file.cred);
1740 be_lun->backend.file.cred = NULL;
1741 }
1742 break;
1743 case CTL_BE_BLOCK_NONE:
1744 break;
1745 default:
1746 panic("Unexpected backend type.");
1747 break;
1748 }
1749 }
1750 PICKUP_GIANT();
1751
1752 return (0);
1753}
1754
1755static int
1756ctl_be_block_open(struct ctl_be_block_softc *softc,
1757 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1758{
1759 struct nameidata nd;
1760 int flags;
1761 int error;
1762
1763 /*
1764 * XXX KDM allow a read-only option?
1765 */
1766 flags = FREAD | FWRITE;
1767 error = 0;
1768
1769 if (rootvnode == NULL) {
1770 snprintf(req->error_str, sizeof(req->error_str),
1771 "%s: Root filesystem is not mounted", __func__);
1772 return (1);
1773 }
1774
1775 if (!curthread->td_proc->p_fd->fd_cdir) {
1776 curthread->td_proc->p_fd->fd_cdir = rootvnode;
1777 VREF(rootvnode);
1778 }
1779 if (!curthread->td_proc->p_fd->fd_rdir) {
1780 curthread->td_proc->p_fd->fd_rdir = rootvnode;
1781 VREF(rootvnode);
1782 }
1783 if (!curthread->td_proc->p_fd->fd_jdir) {
1784 curthread->td_proc->p_fd->fd_jdir = rootvnode;
1785 VREF(rootvnode);
1786 }
1787
1788 again:
1789 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
1790 error = vn_open(&nd, &flags, 0, NULL);
1791 if (error) {
1792 /*
1793 * This is the only reasonable guess we can make as far as
1794 * path if the user doesn't give us a fully qualified path.
1795 * If they want to specify a file, they need to specify the
1796 * full path.
1797 */
1798 if (be_lun->dev_path[0] != '/') {
1799 char *dev_path = "/dev/";
1800 char *dev_name;
1801
1802 /* Try adding device path at beginning of name */
1803 dev_name = malloc(strlen(be_lun->dev_path)
1804 + strlen(dev_path) + 1,
1805 M_CTLBLK, M_WAITOK);
1806 if (dev_name) {
1807 sprintf(dev_name, "%s%s", dev_path,
1808 be_lun->dev_path);
1809 free(be_lun->dev_path, M_CTLBLK);
1810 be_lun->dev_path = dev_name;
1811 goto again;
1812 }
1813 }
1814 snprintf(req->error_str, sizeof(req->error_str),
1815 "%s: error opening %s", __func__, be_lun->dev_path);
1816 return (error);
1817 }
1818
1819 NDFREE(&nd, NDF_ONLY_PNBUF);
1820
1821 be_lun->vn = nd.ni_vp;
1822
1823 /* We only support disks and files. */
1824 if (vn_isdisk(be_lun->vn, &error)) {
1825 error = ctl_be_block_open_dev(be_lun, req);
1826 } else if (be_lun->vn->v_type == VREG) {
1827 error = ctl_be_block_open_file(be_lun, req);
1828 } else {
1829 error = EINVAL;
1830 snprintf(req->error_str, sizeof(req->error_str),
1831 "%s is not a disk or plain file", be_lun->dev_path);
1832 }
1833 VOP_UNLOCK(be_lun->vn, 0);
1834
1835 if (error != 0) {
1836 ctl_be_block_close(be_lun);
1837 return (error);
1838 }
1839
1840 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1841 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
1842
1843 return (0);
1844}
1845
1846static int
1847ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
1848{
1849 struct ctl_be_block_lun *be_lun;
1850 struct ctl_lun_create_params *params;
1851 char num_thread_str[16];
1852 char tmpstr[32];
1853 char *value;
1854 int retval, num_threads, unmap;
1855 int tmp_num_threads;
1856
1857 params = &req->reqdata.create;
1858 retval = 0;
1859
1860 num_threads = cbb_num_threads;
1861
1862 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
1863
1864 be_lun->softc = softc;
1865 STAILQ_INIT(&be_lun->input_queue);
1866 STAILQ_INIT(&be_lun->config_write_queue);
1867 STAILQ_INIT(&be_lun->datamove_queue);
1868 sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
1869 mtx_init(&be_lun->lock, be_lun->lunname, NULL, MTX_DEF);
1870 ctl_init_opts(&be_lun->ctl_be_lun, req);
1871
1872 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
1873 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
1874
1875 if (be_lun->lun_zone == NULL) {
1876 snprintf(req->error_str, sizeof(req->error_str),
1877 "%s: error allocating UMA zone", __func__);
1878 goto bailout_error;
1879 }
1880
1881 if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1882 be_lun->ctl_be_lun.lun_type = params->device_type;
1883 else
1884 be_lun->ctl_be_lun.lun_type = T_DIRECT;
1885
1886 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
1887 value = ctl_get_opt(&be_lun->ctl_be_lun, "file");
1888 if (value == NULL) {
1889 snprintf(req->error_str, sizeof(req->error_str),
1890 "%s: no file argument specified", __func__);
1891 goto bailout_error;
1892 }
1893 be_lun->dev_path = strdup(value, M_CTLBLK);
1894
1895 retval = ctl_be_block_open(softc, be_lun, req);
1896 if (retval != 0) {
1897 retval = 0;
1898 goto bailout_error;
1899 }
1900
1901 /*
1902 * Tell the user the size of the file/device.
1903 */
1904 params->lun_size_bytes = be_lun->size_bytes;
1905
1906 /*
1907 * The maximum LBA is the size - 1.
1908 */
1909 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
1910 } else {
1911 /*
1912 * For processor devices, we don't have any size.
1913 */
1914 be_lun->blocksize = 0;
1915 be_lun->pblockexp = 0;
1916 be_lun->pblockoff = 0;
1917 be_lun->size_blocks = 0;
1918 be_lun->size_bytes = 0;
1919 be_lun->ctl_be_lun.maxlba = 0;
1920 params->lun_size_bytes = 0;
1921
1922 /*
1923 * Default to just 1 thread for processor devices.
1924 */
1925 num_threads = 1;
1926 }
1927
1928 /*
1929 * XXX This searching loop might be refactored to be combined with
1930 * the loop above,
1931 */
1932 value = ctl_get_opt(&be_lun->ctl_be_lun, "num_threads");
1933 if (value != NULL) {
1934 tmp_num_threads = strtol(value, NULL, 0);
1935
1936 /*
1937 * We don't let the user specify less than one
1938 * thread, but hope he's clueful enough not to
1939 * specify 1000 threads.
1940 */
1941 if (tmp_num_threads < 1) {
1942 snprintf(req->error_str, sizeof(req->error_str),
1943 "%s: invalid number of threads %s",
1944 __func__, num_thread_str);
1945 goto bailout_error;
1946 }
1947 num_threads = tmp_num_threads;
1948 }
1949 unmap = 0;
1950 value = ctl_get_opt(&be_lun->ctl_be_lun, "unmap");
1951 if (value != NULL && strcmp(value, "on") == 0)
1952 unmap = 1;
1953
1954 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
1955 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
1956 if (unmap)
1957 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
1958 be_lun->ctl_be_lun.be_lun = be_lun;
1959 be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
1960 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
1961 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
1962 /* Tell the user the blocksize we ended up using */
1963 params->blocksize_bytes = be_lun->blocksize;
1964 if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1965 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
1966 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
1967 } else
1968 be_lun->ctl_be_lun.req_lun_id = 0;
1969
1970 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
1971 be_lun->ctl_be_lun.lun_config_status =
1972 ctl_be_block_lun_config_status;
1973 be_lun->ctl_be_lun.be = &ctl_be_block_driver;
1974
1975 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1976 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
1977 softc->num_luns);
1978 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
1979 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1980 sizeof(tmpstr)));
1981
1982 /* Tell the user what we used for a serial number */
1983 strncpy((char *)params->serial_num, tmpstr,
1984 ctl_min(sizeof(params->serial_num), sizeof(tmpstr)));
1985 } else {
1986 strncpy((char *)be_lun->ctl_be_lun.serial_num,
1987 params->serial_num,
1988 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1989 sizeof(params->serial_num)));
1990 }
1991 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1992 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
1993 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
1994 ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1995 sizeof(tmpstr)));
1996
1997 /* Tell the user what we used for a device ID */
1998 strncpy((char *)params->device_id, tmpstr,
1999 ctl_min(sizeof(params->device_id), sizeof(tmpstr)));
2000 } else {
2001 strncpy((char *)be_lun->ctl_be_lun.device_id,
2002 params->device_id,
2003 ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
2004 sizeof(params->device_id)));
2005 }
2006
2007 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2008
2009 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2010 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2011
2012 if (be_lun->io_taskqueue == NULL) {
2013 snprintf(req->error_str, sizeof(req->error_str),
2014 "%s: Unable to create taskqueue", __func__);
2015 goto bailout_error;
2016 }
2017
2018 /*
2019 * Note that we start the same number of threads by default for
2020 * both the file case and the block device case. For the file
2021 * case, we need multiple threads to allow concurrency, because the
2022 * vnode interface is designed to be a blocking interface. For the
2023 * block device case, ZFS zvols at least will block the caller's
2024 * context in many instances, and so we need multiple threads to
2025 * overcome that problem. Other block devices don't need as many
2026 * threads, but they shouldn't cause too many problems.
2027 *
2028 * If the user wants to just have a single thread for a block
2029 * device, he can specify that when the LUN is created, or change
2030 * the tunable/sysctl to alter the default number of threads.
2031 */
2032 retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2033 /*num threads*/num_threads,
2034 /*priority*/PWAIT,
2035 /*thread name*/
2036 "%s taskq", be_lun->lunname);
2037
2038 if (retval != 0)
2039 goto bailout_error;
2040
2041 be_lun->num_threads = num_threads;
2042
2043 mtx_lock(&softc->lock);
2044 softc->num_luns++;
2045 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2046
2047 mtx_unlock(&softc->lock);
2048
2049 retval = ctl_add_lun(&be_lun->ctl_be_lun);
2050 if (retval != 0) {
2051 mtx_lock(&softc->lock);
2052 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2053 links);
2054 softc->num_luns--;
2055 mtx_unlock(&softc->lock);
2056 snprintf(req->error_str, sizeof(req->error_str),
2057 "%s: ctl_add_lun() returned error %d, see dmesg for "
2058 "details", __func__, retval);
2059 retval = 0;
2060 goto bailout_error;
2061 }
2062
2063 mtx_lock(&softc->lock);
2064
2065 /*
2066 * Tell the config_status routine that we're waiting so it won't
2067 * clean up the LUN in the event of an error.
2068 */
2069 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2070
2071 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2072 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2073 if (retval == EINTR)
2074 break;
2075 }
2076 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2077
2078 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2079 snprintf(req->error_str, sizeof(req->error_str),
2080 "%s: LUN configuration error, see dmesg for details",
2081 __func__);
2082 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2083 links);
2084 softc->num_luns--;
2085 mtx_unlock(&softc->lock);
2086 goto bailout_error;
2087 } else {
2088 params->req_lun_id = be_lun->ctl_be_lun.lun_id;
2089 }
2090
2091 mtx_unlock(&softc->lock);
2092
2093 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2094 be_lun->blocksize,
2095 DEVSTAT_ALL_SUPPORTED,
2096 be_lun->ctl_be_lun.lun_type
2097 | DEVSTAT_TYPE_IF_OTHER,
2098 DEVSTAT_PRIORITY_OTHER);
2099
2100
2101 req->status = CTL_LUN_OK;
2102
2103 return (retval);
2104
2105bailout_error:
2106 req->status = CTL_LUN_ERROR;
2107
2108 if (be_lun->io_taskqueue != NULL)
2109 taskqueue_free(be_lun->io_taskqueue);
2110 ctl_be_block_close(be_lun);
2111 if (be_lun->dev_path != NULL)
2112 free(be_lun->dev_path, M_CTLBLK);
2113 if (be_lun->lun_zone != NULL)
2114 uma_zdestroy(be_lun->lun_zone);
2115 ctl_free_opts(&be_lun->ctl_be_lun);
2116 mtx_destroy(&be_lun->lock);
2117 free(be_lun, M_CTLBLK);
2118
2119 return (retval);
2120}
2121
2122static int
2123ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2124{
2125 struct ctl_lun_rm_params *params;
2126 struct ctl_be_block_lun *be_lun;
2127 int retval;
2128
2129 params = &req->reqdata.rm;
2130
2131 mtx_lock(&softc->lock);
2132
2133 be_lun = NULL;
2134
2135 STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2136 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2137 break;
2138 }
2139 mtx_unlock(&softc->lock);
2140
2141 if (be_lun == NULL) {
2142 snprintf(req->error_str, sizeof(req->error_str),
2143 "%s: LUN %u is not managed by the block backend",
2144 __func__, params->lun_id);
2145 goto bailout_error;
2146 }
2147
2148 retval = ctl_disable_lun(&be_lun->ctl_be_lun);
2149
2150 if (retval != 0) {
2151 snprintf(req->error_str, sizeof(req->error_str),
2152 "%s: error %d returned from ctl_disable_lun() for "
2153 "LUN %d", __func__, retval, params->lun_id);
2154 goto bailout_error;
2155
2156 }
2157
2158 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
2159 if (retval != 0) {
2160 snprintf(req->error_str, sizeof(req->error_str),
2161 "%s: error %d returned from ctl_invalidate_lun() for "
2162 "LUN %d", __func__, retval, params->lun_id);
2163 goto bailout_error;
2164 }
2165
2166 mtx_lock(&softc->lock);
2167
2168 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2169
2170 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2171 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2172 if (retval == EINTR)
2173 break;
2174 }
2175
2176 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2177
2178 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2179 snprintf(req->error_str, sizeof(req->error_str),
2180 "%s: interrupted waiting for LUN to be freed",
2181 __func__);
2182 mtx_unlock(&softc->lock);
2183 goto bailout_error;
2184 }
2185
2186 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2187
2188 softc->num_luns--;
2189 mtx_unlock(&softc->lock);
2190
2191 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
2192
2193 taskqueue_free(be_lun->io_taskqueue);
2194
2195 ctl_be_block_close(be_lun);
2196
2197 if (be_lun->disk_stats != NULL)
2198 devstat_remove_entry(be_lun->disk_stats);
2199
2200 uma_zdestroy(be_lun->lun_zone);
2201
2202 ctl_free_opts(&be_lun->ctl_be_lun);
2203 free(be_lun->dev_path, M_CTLBLK);
2204
2205 free(be_lun, M_CTLBLK);
2206
2207 req->status = CTL_LUN_OK;
2208
2209 return (0);
2210
2211bailout_error:
2212
2213 req->status = CTL_LUN_ERROR;
2214
2215 return (0);
2216}
2217
2218static int
2219ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2220 struct ctl_lun_req *req)
2221{
2222 struct vattr vattr;
2223 int error;
2224 struct ctl_lun_modify_params *params;
2225
2226 params = &req->reqdata.modify;
2227
2228 if (params->lun_size_bytes != 0) {
2229 be_lun->size_bytes = params->lun_size_bytes;
2230 } else {
2231 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2232 if (error != 0) {
2233 snprintf(req->error_str, sizeof(req->error_str),
2234 "error calling VOP_GETATTR() for file %s",
2235 be_lun->dev_path);
2236 return (error);
2237 }
2238
2239 be_lun->size_bytes = vattr.va_size;
2240 }
2241
2242 return (0);
2243}
2244
2245static int
2246ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2247 struct ctl_lun_req *req)
2248{
2249 struct cdev *dev;
2250 struct cdevsw *devsw;
2251 int error;
2252 struct ctl_lun_modify_params *params;
2253 uint64_t size_bytes;
2254
2255 params = &req->reqdata.modify;
2256
2257 dev = be_lun->vn->v_rdev;
2258 devsw = dev->si_devsw;
2259 if (!devsw->d_ioctl) {
2260 snprintf(req->error_str, sizeof(req->error_str),
2261 "%s: no d_ioctl for device %s!", __func__,
2262 be_lun->dev_path);
2263 return (ENODEV);
2264 }
2265
2266 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
2267 (caddr_t)&size_bytes, FREAD,
2268 curthread);
2269 if (error) {
2270 snprintf(req->error_str, sizeof(req->error_str),
2271 "%s: error %d returned for DIOCGMEDIASIZE ioctl "
2272 "on %s!", __func__, error, be_lun->dev_path);
2273 return (error);
2274 }
2275
2276 if (params->lun_size_bytes != 0) {
2277 if (params->lun_size_bytes > size_bytes) {
2278 snprintf(req->error_str, sizeof(req->error_str),
2279 "%s: requested LUN size %ju > backing device "
2280 "size %ju", __func__,
2281 (uintmax_t)params->lun_size_bytes,
2282 (uintmax_t)size_bytes);
2283 return (EINVAL);
2284 }
2285
2286 be_lun->size_bytes = params->lun_size_bytes;
2287 } else {
2288 be_lun->size_bytes = size_bytes;
2289 }
2290
2291 return (0);
2292}
2293
2294static int
2295ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2296{
2297 struct ctl_lun_modify_params *params;
2298 struct ctl_be_block_lun *be_lun;
2299 int error;
2300
2301 params = &req->reqdata.modify;
2302
2303 mtx_lock(&softc->lock);
2304
2305 be_lun = NULL;
2306
2307 STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2308 if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2309 break;
2310 }
2311 mtx_unlock(&softc->lock);
2312
2313 if (be_lun == NULL) {
2314 snprintf(req->error_str, sizeof(req->error_str),
2315 "%s: LUN %u is not managed by the block backend",
2316 __func__, params->lun_id);
2317 goto bailout_error;
2318 }
2319
2320 if (params->lun_size_bytes != 0) {
2321 if (params->lun_size_bytes < be_lun->blocksize) {
2322 snprintf(req->error_str, sizeof(req->error_str),
2323 "%s: LUN size %ju < blocksize %u", __func__,
2324 params->lun_size_bytes, be_lun->blocksize);
2325 goto bailout_error;
2326 }
2327 }
2328
2329 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2330
2331 if (be_lun->vn->v_type == VREG)
2332 error = ctl_be_block_modify_file(be_lun, req);
2333 else
2334 error = ctl_be_block_modify_dev(be_lun, req);
2335
2336 VOP_UNLOCK(be_lun->vn, 0);
2337
2338 if (error != 0)
2339 goto bailout_error;
2340
2341 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
2342
2343 /*
2344 * The maximum LBA is the size - 1.
2345 *
2346 * XXX: Note that this field is being updated without locking,
2347 * which might cause problems on 32-bit architectures.
2348 */
2349 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
2350 ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
2351
2352 /* Tell the user the exact size we ended up using */
2353 params->lun_size_bytes = be_lun->size_bytes;
2354
2355 req->status = CTL_LUN_OK;
2356
2357 return (0);
2358
2359bailout_error:
2360 req->status = CTL_LUN_ERROR;
2361
2362 return (0);
2363}
2364
2365static void
2366ctl_be_block_lun_shutdown(void *be_lun)
2367{
2368 struct ctl_be_block_lun *lun;
2369 struct ctl_be_block_softc *softc;
2370
2371 lun = (struct ctl_be_block_lun *)be_lun;
2372
2373 softc = lun->softc;
2374
2375 mtx_lock(&softc->lock);
2376 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2377 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2378 wakeup(lun);
2379 mtx_unlock(&softc->lock);
2380
2381}
2382
2383static void
2384ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2385{
2386 struct ctl_be_block_lun *lun;
2387 struct ctl_be_block_softc *softc;
2388
2389 lun = (struct ctl_be_block_lun *)be_lun;
2390 softc = lun->softc;
2391
2392 if (status == CTL_LUN_CONFIG_OK) {
2393 mtx_lock(&softc->lock);
2394 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2395 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2396 wakeup(lun);
2397 mtx_unlock(&softc->lock);
2398
2399 /*
2400 * We successfully added the LUN, attempt to enable it.
2401 */
2402 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
2403 printf("%s: ctl_enable_lun() failed!\n", __func__);
2404 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
2405 printf("%s: ctl_invalidate_lun() failed!\n",
2406 __func__);
2407 }
2408 }
2409
2410 return;
2411 }
2412
2413
2414 mtx_lock(&softc->lock);
2415 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2416 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2417 wakeup(lun);
2418 mtx_unlock(&softc->lock);
2419}
2420
2421
2422static int
2423ctl_be_block_config_write(union ctl_io *io)
2424{
2425 struct ctl_be_block_lun *be_lun;
2426 struct ctl_be_lun *ctl_be_lun;
2427 int retval;
2428
2429 retval = 0;
2430
2431 DPRINTF("entered\n");
2432
2433 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2434 CTL_PRIV_BACKEND_LUN].ptr;
2435 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
2436
2437 switch (io->scsiio.cdb[0]) {
2438 case SYNCHRONIZE_CACHE:
2439 case SYNCHRONIZE_CACHE_16:
2440 case WRITE_SAME_10:
2441 case WRITE_SAME_16:
2442 case UNMAP:
2443 /*
2444 * The upper level CTL code will filter out any CDBs with
2445 * the immediate bit set and return the proper error.
2446 *
2447 * We don't really need to worry about what LBA range the
2448 * user asked to be synced out. When they issue a sync
2449 * cache command, we'll sync out the whole thing.
2450 */
2451 mtx_lock(&be_lun->lock);
2452 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2453 links);
2454 mtx_unlock(&be_lun->lock);
2455 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2456 break;
2457 case START_STOP_UNIT: {
2458 struct scsi_start_stop_unit *cdb;
2459
2460 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2461
2462 if (cdb->how & SSS_START)
2463 retval = ctl_start_lun(ctl_be_lun);
2464 else {
2465 retval = ctl_stop_lun(ctl_be_lun);
2466 /*
2467 * XXX KDM Copan-specific offline behavior.
2468 * Figure out a reasonable way to port this?
2469 */
2470#ifdef NEEDTOPORT
2471 if ((retval == 0)
2472 && (cdb->byte2 & SSS_ONOFFLINE))
2473 retval = ctl_lun_offline(ctl_be_lun);
2474#endif
2475 }
2476
2477 /*
2478 * In general, the above routines should not fail. They
2479 * just set state for the LUN. So we've got something
2480 * pretty wrong here if we can't start or stop the LUN.
2481 */
2482 if (retval != 0) {
2483 ctl_set_internal_failure(&io->scsiio,
2484 /*sks_valid*/ 1,
2485 /*retry_count*/ 0xf051);
2486 retval = CTL_RETVAL_COMPLETE;
2487 } else {
2488 ctl_set_success(&io->scsiio);
2489 }
2490 ctl_config_write_done(io);
2491 break;
2492 }
2493 default:
2494 ctl_set_invalid_opcode(&io->scsiio);
2495 ctl_config_write_done(io);
2496 retval = CTL_RETVAL_COMPLETE;
2497 break;
2498 }
2499
2500 return (retval);
2501
2502}
2503
2504static int
2505ctl_be_block_config_read(union ctl_io *io)
2506{
2507 return (0);
2508}
2509
2510static int
2511ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2512{
2513 struct ctl_be_block_lun *lun;
2514 int retval;
2515
2516 lun = (struct ctl_be_block_lun *)be_lun;
2517 retval = 0;
2518
2519 retval = sbuf_printf(sb, "<num_threads>");
2520
2521 if (retval != 0)
2522 goto bailout;
2523
2524 retval = sbuf_printf(sb, "%d", lun->num_threads);
2525
2526 if (retval != 0)
2527 goto bailout;
2528
2529 retval = sbuf_printf(sb, "</num_threads>");
2530
2531bailout:
2532
2533 return (retval);
2534}
2535
2536int
2537ctl_be_block_init(void)
2538{
2539 struct ctl_be_block_softc *softc;
2540 int retval;
2541
2542 softc = &backend_block_softc;
2543 retval = 0;
2544
2545 mtx_init(&softc->lock, "ctlblk", NULL, MTX_DEF);
2546 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2547 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2548 STAILQ_INIT(&softc->disk_list);
2549 STAILQ_INIT(&softc->lun_list);
2550
2551 return (retval);
2552}