nvme_ns.c revision 346242
1118611Snjl/*- 2118611Snjl * Copyright (C) 2012-2013 Intel Corporation 3118611Snjl * All rights reserved. 4118611Snjl * 5118611Snjl * Redistribution and use in source and binary forms, with or without 6118611Snjl * modification, are permitted provided that the following conditions 7316303Sjkim * are met: 8316303Sjkim * 1. Redistributions of source code must retain the above copyright 9316303Sjkim * notice, this list of conditions and the following disclaimer. 10316303Sjkim * 2. Redistributions in binary form must reproduce the above copyright 11316303Sjkim * notice, this list of conditions and the following disclaimer in the 12118611Snjl * documentation and/or other materials provided with the distribution. 13118611Snjl * 14316303Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15316303Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16316303Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17316303Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18316303Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19316303Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20316303Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21316303Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22316303Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23316303Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24316303Sjkim * SUCH DAMAGE. 25316303Sjkim */ 26316303Sjkim 27316303Sjkim#include <sys/cdefs.h> 28316303Sjkim__FBSDID("$FreeBSD: stable/11/sys/dev/nvme/nvme_ns.c 346242 2019-04-15 16:27:06Z mav $"); 29316303Sjkim 30316303Sjkim#include <sys/param.h> 31316303Sjkim#include <sys/bio.h> 32316303Sjkim#include <sys/bus.h> 33316303Sjkim#include <sys/conf.h> 34316303Sjkim#include <sys/disk.h> 35316303Sjkim#include <sys/fcntl.h> 36316303Sjkim#include <sys/ioccom.h> 37316303Sjkim#include <sys/malloc.h> 38316303Sjkim#include <sys/module.h> 39316303Sjkim#include <sys/proc.h> 40316303Sjkim#include <sys/systm.h> 41316303Sjkim 42316303Sjkim#include <dev/pci/pcivar.h> 43316303Sjkim 44316303Sjkim#include <geom/geom.h> 45316303Sjkim 46316303Sjkim#include "nvme_private.h" 47316303Sjkim 48316303Sjkimstatic void nvme_bio_child_inbed(struct bio *parent, int bio_error); 49316303Sjkimstatic void nvme_bio_child_done(void *arg, 50316303Sjkim const struct nvme_completion *cpl); 51316303Sjkimstatic uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, 52316303Sjkim uint32_t alignment); 53316303Sjkimstatic void nvme_free_child_bios(int num_bios, 54316303Sjkim struct bio **child_bios); 55316303Sjkimstatic struct bio ** nvme_allocate_child_bios(int num_bios); 56316303Sjkimstatic struct bio ** nvme_construct_child_bios(struct bio *bp, 57316303Sjkim uint32_t alignment, 58316303Sjkim int *num_bios); 59316303Sjkimstatic int nvme_ns_split_bio(struct nvme_namespace *ns, 60316303Sjkim struct bio *bp, 61316303Sjkim uint32_t alignment); 62316303Sjkim 63316303Sjkimstatic int 64316303Sjkimnvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 65316303Sjkim struct thread *td) 66316303Sjkim{ 67316303Sjkim struct nvme_namespace *ns; 68316303Sjkim struct nvme_controller *ctrlr; 69316303Sjkim struct nvme_pt_command *pt; 70316303Sjkim 71316303Sjkim ns = cdev->si_drv1; 72316303Sjkim ctrlr = ns->ctrlr; 73316303Sjkim 74316303Sjkim switch (cmd) { 75316303Sjkim case NVME_IO_TEST: 76316303Sjkim case NVME_BIO_TEST: 77316303Sjkim nvme_ns_test(ns, cmd, arg); 78316303Sjkim break; 79316303Sjkim case NVME_PASSTHROUGH_CMD: 80316303Sjkim pt = (struct nvme_pt_command *)arg; 81316303Sjkim return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 82316303Sjkim 1 /* is_user_buffer */, 0 /* is_admin_cmd */)); 83316303Sjkim case DIOCGMEDIASIZE: 84316303Sjkim *(off_t *)arg = (off_t)nvme_ns_get_size(ns); 85316303Sjkim break; 86316303Sjkim case DIOCGSECTORSIZE: 87316303Sjkim *(u_int *)arg = nvme_ns_get_sector_size(ns); 88316303Sjkim break; 89316303Sjkim default: 90316303Sjkim return (ENOTTY); 91316303Sjkim } 92316303Sjkim 93316303Sjkim return (0); 94316303Sjkim} 95316303Sjkim 96316303Sjkimstatic int 97316303Sjkimnvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused, 98316303Sjkim struct thread *td) 99316303Sjkim{ 100316303Sjkim int error = 0; 101316303Sjkim 102316303Sjkim if (flags & FWRITE) 103316303Sjkim error = securelevel_gt(td->td_ucred, 0); 104316303Sjkim 105316303Sjkim return (error); 106316303Sjkim} 107316303Sjkim 108316303Sjkimstatic int 109316303Sjkimnvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, 110316303Sjkim struct thread *td) 111316303Sjkim{ 112316303Sjkim 113316303Sjkim return (0); 114316303Sjkim} 115316303Sjkim 116316303Sjkimstatic void 117316303Sjkimnvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) 118316303Sjkim{ 119217365Sjkim struct bio *bp = arg; 120217365Sjkim 121217365Sjkim /* 122217365Sjkim * TODO: add more extensive translation of NVMe status codes 123217365Sjkim * to different bio error codes (i.e. EIO, EINVAL, etc.) 124217365Sjkim */ 125217365Sjkim if (nvme_completion_is_error(cpl)) { 126217365Sjkim bp->bio_error = EIO; 127217365Sjkim bp->bio_flags |= BIO_ERROR; 128217365Sjkim bp->bio_resid = bp->bio_bcount; 129217365Sjkim } else 130217365Sjkim bp->bio_resid = 0; 131217365Sjkim 132217365Sjkim biodone(bp); 133118611Snjl} 134316303Sjkim 135316303Sjkimstatic void 136316303Sjkimnvme_ns_strategy(struct bio *bp) 137316303Sjkim{ 138316303Sjkim struct nvme_namespace *ns; 139316303Sjkim int err; 140316303Sjkim 141316303Sjkim ns = bp->bio_dev->si_drv1; 142316303Sjkim err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done); 143316303Sjkim 144316303Sjkim if (err) { 145316303Sjkim bp->bio_error = err; 146316303Sjkim bp->bio_flags |= BIO_ERROR; 147217365Sjkim bp->bio_resid = bp->bio_bcount; 148217365Sjkim biodone(bp); 149118611Snjl } 150316303Sjkim 151118611Snjl} 152151937Sjkim 153118611Snjlstatic struct cdevsw nvme_ns_cdevsw = { 154118611Snjl .d_version = D_VERSION, 155118611Snjl .d_flags = D_DISK, 156118611Snjl .d_read = physread, 157151937Sjkim .d_write = physwrite, 158118611Snjl .d_open = nvme_ns_open, 159151937Sjkim .d_close = nvme_ns_close, 160151937Sjkim .d_strategy = nvme_ns_strategy, 161151937Sjkim .d_ioctl = nvme_ns_ioctl 162151937Sjkim}; 163322877Sjkim 164322877Sjkimuint32_t 165322877Sjkimnvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) 166322877Sjkim{ 167151937Sjkim return ns->ctrlr->max_xfer_size; 168322877Sjkim} 169322877Sjkim 170322877Sjkimuint32_t 171322877Sjkimnvme_ns_get_sector_size(struct nvme_namespace *ns) 172322877Sjkim{ 173327557Sjkim return (1 << ns->data.lbaf[ns->data.flbas.format].lbads); 174327557Sjkim} 175327557Sjkim 176327557Sjkimuint64_t 177327557Sjkimnvme_ns_get_num_sectors(struct nvme_namespace *ns) 178327557Sjkim{ 179327557Sjkim return (ns->data.nsze); 180327557Sjkim} 181327557Sjkim 182327557Sjkimuint64_t 183327557Sjkimnvme_ns_get_size(struct nvme_namespace *ns) 184327557Sjkim{ 185322877Sjkim return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns)); 186327557Sjkim} 187327557Sjkim 188327557Sjkimuint32_t 189327557Sjkimnvme_ns_get_flags(struct nvme_namespace *ns) 190327557Sjkim{ 191327557Sjkim return (ns->flags); 192327557Sjkim} 193327557Sjkim 194327557Sjkimconst char * 195327557Sjkimnvme_ns_get_serial_number(struct nvme_namespace *ns) 196327557Sjkim{ 197327557Sjkim return ((const char *)ns->ctrlr->cdata.sn); 198327557Sjkim} 199327557Sjkim 200327557Sjkimconst char * 201327557Sjkimnvme_ns_get_model_number(struct nvme_namespace *ns) 202327557Sjkim{ 203327557Sjkim return ((const char *)ns->ctrlr->cdata.mn); 204327557Sjkim} 205233250Sjkim 206233250Sjkimconst struct nvme_namespace_data * 207272444Sjkimnvme_ns_get_data(struct nvme_namespace *ns) 208272444Sjkim{ 209272444Sjkim 210272444Sjkim return (&ns->data); 211272444Sjkim} 212272444Sjkim 213272444Sjkimuint32_t 214272444Sjkimnvme_ns_get_stripesize(struct nvme_namespace *ns) 215272444Sjkim{ 216272444Sjkim 217272444Sjkim return (ns->stripesize); 218272444Sjkim} 219272444Sjkim 220272444Sjkimstatic void 221272444Sjkimnvme_ns_bio_done(void *arg, const struct nvme_completion *status) 222272444Sjkim{ 223272444Sjkim struct bio *bp = arg; 224272444Sjkim nvme_cb_fn_t bp_cb_fn; 225272444Sjkim 226272444Sjkim bp_cb_fn = bp->bio_driver1; 227272444Sjkim 228272444Sjkim if (bp->bio_driver2) 229272444Sjkim free(bp->bio_driver2, M_NVME); 230272444Sjkim 231272444Sjkim if (nvme_completion_is_error(status)) { 232272444Sjkim bp->bio_flags |= BIO_ERROR; 233272444Sjkim if (bp->bio_error == 0) 234272444Sjkim bp->bio_error = EIO; 235272444Sjkim } 236272444Sjkim 237233250Sjkim if ((bp->bio_flags & BIO_ERROR) == 0) 238233250Sjkim bp->bio_resid = 0; 239233250Sjkim else 240233250Sjkim bp->bio_resid = bp->bio_bcount; 241233250Sjkim 242233250Sjkim bp_cb_fn(bp, status); 243233250Sjkim} 244233250Sjkim 245233250Sjkimstatic void 246233250Sjkimnvme_bio_child_inbed(struct bio *parent, int bio_error) 247193529Sjkim{ 248193529Sjkim struct nvme_completion parent_cpl; 249193529Sjkim int children, inbed; 250193529Sjkim 251193529Sjkim if (bio_error != 0) { 252193529Sjkim parent->bio_flags |= BIO_ERROR; 253193529Sjkim parent->bio_error = bio_error; 254327557Sjkim } 255193529Sjkim 256193529Sjkim /* 257193529Sjkim * atomic_fetchadd will return value before adding 1, so we still 258193529Sjkim * must add 1 to get the updated inbed number. Save bio_children 259193529Sjkim * before incrementing to guard against race conditions when 260193529Sjkim * two children bios complete on different queues. 261193529Sjkim */ 262193529Sjkim children = atomic_load_acq_int(&parent->bio_children); 263193529Sjkim inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; 264193529Sjkim if (inbed == children) { 265193529Sjkim bzero(&parent_cpl, sizeof(parent_cpl)); 266193529Sjkim if (parent->bio_flags & BIO_ERROR) 267193529Sjkim parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR; 268118611Snjl nvme_ns_bio_done(parent, &parent_cpl); 269118611Snjl } 270118611Snjl} 271118611Snjl 272118611Snjlstatic void 273118611Snjlnvme_bio_child_done(void *arg, const struct nvme_completion *cpl) 274118611Snjl{ 275118611Snjl struct bio *child = arg; 276241973Sjkim struct bio *parent; 277118611Snjl int bio_error; 278118611Snjl 279118611Snjl parent = child->bio_parent; 280118611Snjl g_destroy_bio(child); 281118611Snjl bio_error = nvme_completion_is_error(cpl) ? EIO : 0; 282151937Sjkim nvme_bio_child_inbed(parent, bio_error); 283118611Snjl} 284118611Snjl 285118611Snjlstatic uint32_t 286118611Snjlnvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align) 287118611Snjl{ 288118611Snjl uint32_t num_segs, offset, remainder; 289118611Snjl 290202771Sjkim if (align == 0) 291118611Snjl return (1); 292118611Snjl 293118611Snjl KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n")); 294118611Snjl 295118611Snjl num_segs = size / align; 296118611Snjl remainder = size & (align - 1); 297118611Snjl offset = addr & (align - 1); 298202771Sjkim if (remainder > 0 || offset > 0) 299202771Sjkim num_segs += 1 + (remainder + offset - 1) / align; 300202771Sjkim return (num_segs); 301202771Sjkim} 302118611Snjl 303118611Snjlstatic void 304118611Snjlnvme_free_child_bios(int num_bios, struct bio **child_bios) 305327557Sjkim{ 306118611Snjl int i; 307118611Snjl 308118611Snjl for (i = 0; i < num_bios; i++) { 309118611Snjl if (child_bios[i] != NULL) 310118611Snjl g_destroy_bio(child_bios[i]); 311118611Snjl } 312118611Snjl 313118611Snjl free(child_bios, M_NVME); 314118611Snjl} 315118611Snjl 316118611Snjlstatic struct bio ** 317118611Snjlnvme_allocate_child_bios(int num_bios) 318118611Snjl{ 319118611Snjl struct bio **child_bios; 320118611Snjl int err = 0, i; 321118611Snjl 322118611Snjl child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT); 323118611Snjl if (child_bios == NULL) 324118611Snjl return (NULL); 325118611Snjl 326118611Snjl for (i = 0; i < num_bios; i++) { 327118611Snjl child_bios[i] = g_new_bio(); 328327557Sjkim if (child_bios[i] == NULL) 329118611Snjl err = ENOMEM; 330327557Sjkim } 331118611Snjl 332327557Sjkim if (err == ENOMEM) { 333327557Sjkim nvme_free_child_bios(num_bios, child_bios); 334118611Snjl return (NULL); 335118611Snjl } 336118611Snjl 337327557Sjkim return (child_bios); 338118611Snjl} 339118611Snjl 340118611Snjlstatic struct bio ** 341327557Sjkimnvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios) 342327557Sjkim{ 343327557Sjkim struct bio **child_bios; 344118611Snjl struct bio *child; 345327557Sjkim uint64_t cur_offset; 346327557Sjkim caddr_t data; 347118611Snjl uint32_t rem_bcount; 348118611Snjl int i; 349272444Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT 350118611Snjl struct vm_page **ma; 351118611Snjl uint32_t ma_offset; 352118611Snjl#endif 353118611Snjl 354118611Snjl *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount, 355327557Sjkim alignment); 356327557Sjkim child_bios = nvme_allocate_child_bios(*num_bios); 357327557Sjkim if (child_bios == NULL) 358327557Sjkim return (NULL); 359327557Sjkim 360327557Sjkim bp->bio_children = *num_bios; 361327557Sjkim bp->bio_inbed = 0; 362327557Sjkim cur_offset = bp->bio_offset; 363327557Sjkim rem_bcount = bp->bio_bcount; 364327557Sjkim data = bp->bio_data; 365193529Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT 366327557Sjkim ma_offset = bp->bio_ma_offset; 367193529Sjkim ma = bp->bio_ma; 368193529Sjkim#endif 369193529Sjkim 370327557Sjkim for (i = 0; i < *num_bios; i++) { 371327557Sjkim child = child_bios[i]; 372118611Snjl child->bio_parent = bp; 373327557Sjkim child->bio_cmd = bp->bio_cmd; 374118611Snjl child->bio_offset = cur_offset; 375327557Sjkim child->bio_bcount = min(rem_bcount, 376327557Sjkim alignment - (cur_offset & (alignment - 1))); 377118611Snjl child->bio_flags = bp->bio_flags; 378327557Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT 379327557Sjkim if (bp->bio_flags & BIO_UNMAPPED) { 380272444Sjkim child->bio_ma_offset = ma_offset; 381327557Sjkim child->bio_ma = ma; 382327557Sjkim child->bio_ma_n = 383327557Sjkim nvme_get_num_segments(child->bio_ma_offset, 384272444Sjkim child->bio_bcount, PAGE_SIZE); 385327557Sjkim ma_offset = (ma_offset + child->bio_bcount) & 386327557Sjkim PAGE_MASK; 387327557Sjkim ma += child->bio_ma_n; 388327557Sjkim if (ma_offset != 0) 389327557Sjkim ma -= 1; 390327557Sjkim } else 391327557Sjkim#endif 392327557Sjkim { 393327557Sjkim child->bio_data = data; 394327557Sjkim data += child->bio_bcount; 395327557Sjkim } 396250838Sjkim cur_offset += child->bio_bcount; 397327557Sjkim rem_bcount -= child->bio_bcount; 398118611Snjl } 399327557Sjkim 400327557Sjkim return (child_bios); 401327557Sjkim} 402118611Snjl 403327557Sjkimstatic int 404118611Snjlnvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp, 405327557Sjkim uint32_t alignment) 406327557Sjkim{ 407327557Sjkim struct bio *child; 408118611Snjl struct bio **child_bios; 409327557Sjkim int err, i, num_bios; 410327557Sjkim 411327557Sjkim child_bios = nvme_construct_child_bios(bp, alignment, &num_bios); 412327557Sjkim if (child_bios == NULL) 413327557Sjkim return (ENOMEM); 414327557Sjkim 415118611Snjl for (i = 0; i < num_bios; i++) { 416327557Sjkim child = child_bios[i]; 417250838Sjkim err = nvme_ns_bio_process(ns, child, nvme_bio_child_done); 418327557Sjkim if (err != 0) { 419327557Sjkim nvme_bio_child_inbed(bp, err); 420327557Sjkim g_destroy_bio(child); 421118611Snjl } 422118611Snjl } 423327557Sjkim 424327557Sjkim free(child_bios, M_NVME); 425327557Sjkim return (0); 426327557Sjkim} 427118611Snjl 428327557Sjkimint 429327557Sjkimnvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, 430327557Sjkim nvme_cb_fn_t cb_fn) 431327557Sjkim{ 432327557Sjkim struct nvme_dsm_range *dsm_range; 433327557Sjkim uint32_t num_bios; 434209746Sjkim int err; 435327557Sjkim 436327557Sjkim bp->bio_driver1 = cb_fn; 437327557Sjkim 438327557Sjkim if (ns->stripesize > 0 && 439327557Sjkim (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { 440327557Sjkim num_bios = nvme_get_num_segments(bp->bio_offset, 441327557Sjkim bp->bio_bcount, ns->stripesize); 442327557Sjkim if (num_bios > 1) 443327557Sjkim return (nvme_ns_split_bio(ns, bp, ns->stripesize)); 444327557Sjkim } 445327557Sjkim 446327557Sjkim switch (bp->bio_cmd) { 447327557Sjkim case BIO_READ: 448327557Sjkim err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); 449327557Sjkim break; 450327557Sjkim case BIO_WRITE: 451327557Sjkim err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp); 452327557Sjkim break; 453327557Sjkim case BIO_FLUSH: 454327557Sjkim err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp); 455327557Sjkim break; 456327557Sjkim case BIO_DELETE: 457327557Sjkim dsm_range = 458327557Sjkim malloc(sizeof(struct nvme_dsm_range), M_NVME, 459327557Sjkim M_ZERO | M_WAITOK); 460327557Sjkim dsm_range->length = 461327557Sjkim bp->bio_bcount/nvme_ns_get_sector_size(ns); 462327557Sjkim dsm_range->starting_lba = 463327557Sjkim bp->bio_offset/nvme_ns_get_sector_size(ns); 464327557Sjkim bp->bio_driver2 = dsm_range; 465327557Sjkim err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, 466233250Sjkim nvme_ns_bio_done, bp); 467209746Sjkim if (err != 0) 468298714Sjkim free(dsm_range, M_NVME); 469298714Sjkim break; 470298714Sjkim default: 471298714Sjkim err = EIO; 472233250Sjkim break; 473233250Sjkim } 474233250Sjkim 475233250Sjkim return (err); 476233250Sjkim} 477216471Sjkim 478233250Sjkimint 479233250Sjkimnvme_ns_construct(struct nvme_namespace *ns, uint32_t id, 480233250Sjkim struct nvme_controller *ctrlr) 481216471Sjkim{ 482233250Sjkim struct make_dev_args md_args; 483233250Sjkim struct nvme_completion_poll_status status; 484233250Sjkim int res; 485233250Sjkim int unit; 486233250Sjkim 487327557Sjkim ns->ctrlr = ctrlr; 488233250Sjkim ns->id = id; 489216471Sjkim ns->stripesize = 0; 490327557Sjkim 491327557Sjkim /* 492327557Sjkim * Older Intel devices advertise in vendor specific space an alignment 493327557Sjkim * that improves performance. If present use for the stripe size. NVMe 494327557Sjkim * 1.3 standardized this as NOIOB, and newer Intel drives use that. 495327557Sjkim */ 496216471Sjkim switch (pci_get_devid(ctrlr->dev)) { 497216471Sjkim case 0x09538086: /* Intel DC PC3500 */ 498118611Snjl case 0x0a538086: /* Intel DC PC3520 */ 499118611Snjl case 0x0a548086: /* Intel DC PC4500 */ 500327557Sjkim case 0x0a558086: /* Dell Intel P4600 */ 501118611Snjl if (ctrlr->cdata.vs[3] != 0) 502327557Sjkim ns->stripesize = 503327557Sjkim (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size; 504327557Sjkim break; 505118611Snjl default: 506327557Sjkim break; 507327557Sjkim } 508327557Sjkim 509327557Sjkim /* 510327557Sjkim * Namespaces are reconstructed after a controller reset, so check 511327557Sjkim * to make sure we only call mtx_init once on each mtx. 512118611Snjl * 513327557Sjkim * TODO: Move this somewhere where it gets called at controller 514327557Sjkim * construction time, which is not invoked as part of each 515327557Sjkim * controller reset. 516327557Sjkim */ 517327557Sjkim if (!mtx_initialized(&ns->lock)) 518327557Sjkim mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF); 519118611Snjl 520327557Sjkim status.done = 0; 521327557Sjkim nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, 522327557Sjkim nvme_completion_poll_cb, &status); 523327557Sjkim while (!atomic_load_acq_int(&status.done)) 524327557Sjkim pause("nvme", 1); 525327557Sjkim if (nvme_completion_is_error(&status.cpl)) { 526327557Sjkim nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); 527327557Sjkim return (ENXIO); 528327557Sjkim } 529327557Sjkim 530327557Sjkim /* 531327557Sjkim * If the size of is zero, chances are this isn't a valid 532327557Sjkim * namespace (eg one that's not been configured yet). The 533327557Sjkim * standard says the entire id will be zeros, so this is a 534327557Sjkim * cheap way to test for that. 535327557Sjkim */ 536327557Sjkim if (ns->data.nsze == 0) 537327557Sjkim return (ENXIO); 538327557Sjkim 539327557Sjkim /* 540327557Sjkim * Note: format is a 0-based value, so > is appropriate here, 541327557Sjkim * not >=. 542327557Sjkim */ 543327557Sjkim if (ns->data.flbas.format > ns->data.nlbaf) { 544327557Sjkim printf("lba format %d exceeds number supported (%d)\n", 545327557Sjkim ns->data.flbas.format, ns->data.nlbaf+1); 546327557Sjkim return (ENXIO); 547327557Sjkim } 548327557Sjkim 549327557Sjkim if (ctrlr->cdata.oncs.dsm) 550327557Sjkim ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; 551233250Sjkim 552327557Sjkim if (ctrlr->cdata.vwc.present) 553327557Sjkim ns->flags |= NVME_NS_FLUSH_SUPPORTED; 554327557Sjkim 555327557Sjkim /* 556327557Sjkim * cdev may have already been created, if we are reconstructing the 557327557Sjkim * namespace after a controller-level reset. 558327557Sjkim */ 559233250Sjkim if (ns->cdev != NULL) 560327557Sjkim return (0); 561233250Sjkim 562327557Sjkim /* 563327557Sjkim * Namespace IDs start at 1, so we need to subtract 1 to create a 564327557Sjkim * correct unit number. 565327557Sjkim */ 566118611Snjl unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1; 567327557Sjkim 568327557Sjkim make_dev_args_init(&md_args); 569327557Sjkim md_args.mda_devsw = &nvme_ns_cdevsw; 570327557Sjkim md_args.mda_unit = unit; 571327557Sjkim md_args.mda_mode = 0600; 572327557Sjkim md_args.mda_si_drv1 = ns; 573249112Sjkim res = make_dev_s(&md_args, &ns->cdev, "nvme%dns%d", 574327557Sjkim device_get_unit(ctrlr->dev), ns->id); 575118611Snjl if (res != 0) 576327557Sjkim return (ENXIO); 577118611Snjl 578118611Snjl#ifdef NVME_UNMAPPED_BIO_SUPPORT 579327557Sjkim ns->cdev->si_flags |= SI_UNMAPPED; 580118611Snjl#endif 581118611Snjl 582327557Sjkim return (0); 583327557Sjkim} 584327557Sjkim 585327557Sjkimvoid nvme_ns_destruct(struct nvme_namespace *ns) 586327557Sjkim{ 587327557Sjkim 588327557Sjkim if (ns->cdev != NULL) 589118611Snjl destroy_dev(ns->cdev); 590327557Sjkim} 591327557Sjkim