nvme_ns.c revision 248756
1/*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/nvme/nvme_ns.c 248756 2013-03-26 21:00:18Z jimharris $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/bus.h> 33#include <sys/conf.h> 34#include <sys/disk.h> 35#include <sys/fcntl.h> 36#include <sys/ioccom.h> 37#include <sys/module.h> 38#include <sys/proc.h> 39 40#include <dev/pci/pcivar.h> 41 42#include "nvme_private.h" 43 44static void 45nvme_ns_cb(void *arg, const struct nvme_completion *status) 46{ 47 struct nvme_completion *cpl = arg; 48 struct mtx *mtx; 49 50 /* 51 * Copy status into the argument passed by the caller, so that 52 * the caller can check the status to determine if the 53 * the request passed or failed. 54 */ 55 memcpy(cpl, status, sizeof(*cpl)); 56 mtx = mtx_pool_find(mtxpool_sleep, cpl); 57 mtx_lock(mtx); 58 wakeup(cpl); 59 mtx_unlock(mtx); 60} 61 62static int 63nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 64 struct thread *td) 65{ 66 struct nvme_namespace *ns; 67 struct nvme_controller *ctrlr; 68 struct nvme_completion cpl; 69 struct mtx *mtx; 70 71 ns = cdev->si_drv1; 72 ctrlr = ns->ctrlr; 73 74 switch (cmd) { 75 case NVME_IDENTIFY_NAMESPACE: 76#ifdef CHATHAM2 77 /* 78 * Don't refresh data on Chatham, since Chatham returns 79 * garbage on IDENTIFY anyways. 80 */ 81 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) { 82 memcpy(arg, &ns->data, sizeof(ns->data)); 83 break; 84 } 85#endif 86 /* Refresh data before returning to user. */ 87 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 88 mtx_lock(mtx); 89 nvme_ctrlr_cmd_identify_namespace(ctrlr, ns->id, &ns->data, 90 nvme_ns_cb, &cpl); 91 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0); 92 mtx_unlock(mtx); 93 if (nvme_completion_is_error(&cpl)) 94 return (ENXIO); 95 memcpy(arg, &ns->data, sizeof(ns->data)); 96 break; 97 case NVME_IO_TEST: 98 case NVME_BIO_TEST: 99 nvme_ns_test(ns, cmd, arg); 100 break; 101 case DIOCGMEDIASIZE: 102 *(off_t *)arg = (off_t)nvme_ns_get_size(ns); 103 break; 104 case DIOCGSECTORSIZE: 105 *(u_int *)arg = nvme_ns_get_sector_size(ns); 106 break; 107 default: 108 return (ENOTTY); 109 } 110 111 return (0); 112} 113 114static int 115nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused, 116 struct thread *td) 117{ 118 int error = 0; 119 120 if (flags & FWRITE) 121 error = securelevel_gt(td->td_ucred, 0); 122 123 return (error); 124} 125 126static int 127nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, 128 struct thread *td) 129{ 130 131 return (0); 132} 133 134static void 135nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) 136{ 137 struct bio *bp = arg; 138 139 /* 140 * TODO: add more extensive translation of NVMe status codes 141 * to different bio error codes (i.e. EIO, EINVAL, etc.) 142 */ 143 if (nvme_completion_is_error(cpl)) { 144 bp->bio_error = EIO; 145 bp->bio_flags |= BIO_ERROR; 146 bp->bio_resid = bp->bio_bcount; 147 } else 148 bp->bio_resid = 0; 149 150 biodone(bp); 151} 152 153static void 154nvme_ns_strategy(struct bio *bp) 155{ 156 struct nvme_namespace *ns; 157 int err; 158 159 ns = bp->bio_dev->si_drv1; 160 err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done); 161 162 if (err) { 163 bp->bio_error = err; 164 bp->bio_flags |= BIO_ERROR; 165 bp->bio_resid = bp->bio_bcount; 166 biodone(bp); 167 } 168 169} 170 171static struct cdevsw nvme_ns_cdevsw = { 172 .d_version = D_VERSION, 173 .d_flags = D_DISK, 174 .d_open = nvme_ns_open, 175 .d_close = nvme_ns_close, 176 .d_read = nvme_ns_physio, 177 .d_write = nvme_ns_physio, 178 .d_strategy = nvme_ns_strategy, 179 .d_ioctl = nvme_ns_ioctl 180}; 181 182uint32_t 183nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) 184{ 185 return ns->ctrlr->max_xfer_size; 186} 187 188uint32_t 189nvme_ns_get_sector_size(struct nvme_namespace *ns) 190{ 191 return (1 << ns->data.lbaf[0].lbads); 192} 193 194uint64_t 195nvme_ns_get_num_sectors(struct nvme_namespace *ns) 196{ 197 return (ns->data.nsze); 198} 199 200uint64_t 201nvme_ns_get_size(struct nvme_namespace *ns) 202{ 203 return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns)); 204} 205 206uint32_t 207nvme_ns_get_flags(struct nvme_namespace *ns) 208{ 209 return (ns->flags); 210} 211 212const char * 213nvme_ns_get_serial_number(struct nvme_namespace *ns) 214{ 215 return ((const char *)ns->ctrlr->cdata.sn); 216} 217 218const char * 219nvme_ns_get_model_number(struct nvme_namespace *ns) 220{ 221 return ((const char *)ns->ctrlr->cdata.mn); 222} 223 224const struct nvme_namespace_data * 225nvme_ns_get_data(struct nvme_namespace *ns) 226{ 227 228 return (&ns->data); 229} 230 231static void 232nvme_ns_bio_done(void *arg, const struct nvme_completion *status) 233{ 234 struct bio *bp = arg; 235 nvme_cb_fn_t bp_cb_fn; 236 237 bp_cb_fn = bp->bio_driver1; 238 239 if (bp->bio_driver2) 240 free(bp->bio_driver2, M_NVME); 241 242 bp_cb_fn(bp, status); 243} 244 245int 246nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, 247 nvme_cb_fn_t cb_fn) 248{ 249 struct nvme_dsm_range *dsm_range; 250 int err; 251 252 bp->bio_driver1 = cb_fn; 253 254 switch (bp->bio_cmd) { 255 case BIO_READ: 256 err = nvme_ns_cmd_read(ns, bp->bio_data, 257 bp->bio_offset/nvme_ns_get_sector_size(ns), 258 bp->bio_bcount/nvme_ns_get_sector_size(ns), 259 nvme_ns_bio_done, bp); 260 break; 261 case BIO_WRITE: 262 err = nvme_ns_cmd_write(ns, bp->bio_data, 263 bp->bio_offset/nvme_ns_get_sector_size(ns), 264 bp->bio_bcount/nvme_ns_get_sector_size(ns), 265 nvme_ns_bio_done, bp); 266 break; 267 case BIO_FLUSH: 268 err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp); 269 break; 270 case BIO_DELETE: 271 /* 272 * Note: Chatham2 doesn't support DSM, so this code 273 * can't be fully tested yet. 274 */ 275 dsm_range = 276 malloc(sizeof(struct nvme_dsm_range), M_NVME, 277 M_ZERO | M_NOWAIT); 278 dsm_range->length = 279 bp->bio_bcount/nvme_ns_get_sector_size(ns); 280 dsm_range->starting_lba = 281 bp->bio_offset/nvme_ns_get_sector_size(ns); 282 bp->bio_driver2 = dsm_range; 283 err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, 284 nvme_ns_bio_done, bp); 285 if (err != 0) 286 free(dsm_range, M_NVME); 287 break; 288 default: 289 err = EIO; 290 break; 291 } 292 293 return (err); 294} 295 296#ifdef CHATHAM2 297static void 298nvme_ns_populate_chatham_data(struct nvme_namespace *ns) 299{ 300 struct nvme_controller *ctrlr; 301 struct nvme_namespace_data *nsdata; 302 303 ctrlr = ns->ctrlr; 304 nsdata = &ns->data; 305 306 nsdata->nsze = ctrlr->chatham_lbas; 307 nsdata->ncap = ctrlr->chatham_lbas; 308 nsdata->nuse = ctrlr->chatham_lbas; 309 310 /* Chatham2 doesn't support thin provisioning. */ 311 nsdata->nsfeat.thin_prov = 0; 312 313 /* Set LBA size to 512 bytes. */ 314 nsdata->lbaf[0].lbads = 9; 315} 316#endif /* CHATHAM2 */ 317 318int 319nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, 320 struct nvme_controller *ctrlr) 321{ 322 struct nvme_completion cpl; 323 struct mtx *mtx; 324 int status; 325 326 ns->ctrlr = ctrlr; 327 ns->id = id; 328 329#ifdef CHATHAM2 330 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 331 nvme_ns_populate_chatham_data(ns); 332 else { 333#endif 334 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 335 336 mtx_lock(mtx); 337 nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, 338 nvme_ns_cb, &cpl); 339 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 340 mtx_unlock(mtx); 341 if ((status != 0) || nvme_completion_is_error(&cpl)) { 342 printf("nvme_identify_namespace failed!\n"); 343 return (ENXIO); 344 } 345#ifdef CHATHAM2 346 } 347#endif 348 349 if (ctrlr->cdata.oncs.dsm) 350 ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; 351 352 if (ctrlr->cdata.vwc.present) 353 ns->flags |= NVME_NS_FLUSH_SUPPORTED; 354 355 /* 356 * cdev may have already been created, if we are reconstructing the 357 * namespace after a controller-level reset. 358 */ 359 if (ns->cdev != NULL) 360 return (0); 361 362/* 363 * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never 364 * be destroyed. This avoids refcounting on the cdev object. 365 * That should be OK case here, as long as we're not supporting PCIe 366 * surprise removal nor namespace deletion. 367 */ 368#ifdef MAKEDEV_ETERNAL_KLD 369 ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0, 370 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 371 device_get_unit(ctrlr->dev), ns->id); 372#else 373 ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0, 374 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 375 device_get_unit(ctrlr->dev), ns->id); 376#endif 377 378 if (ns->cdev != NULL) 379 ns->cdev->si_drv1 = ns; 380 381 return (0); 382} 383 384void nvme_ns_destruct(struct nvme_namespace *ns) 385{ 386 387 if (ns->cdev != NULL) 388 destroy_dev(ns->cdev); 389} 390