nvme_ns.c revision 346242
1118611Snjl/*-
2118611Snjl * Copyright (C) 2012-2013 Intel Corporation
3118611Snjl * All rights reserved.
4118611Snjl *
5118611Snjl * Redistribution and use in source and binary forms, with or without
6118611Snjl * modification, are permitted provided that the following conditions
7316303Sjkim * are met:
8316303Sjkim * 1. Redistributions of source code must retain the above copyright
9316303Sjkim *    notice, this list of conditions and the following disclaimer.
10316303Sjkim * 2. Redistributions in binary form must reproduce the above copyright
11316303Sjkim *    notice, this list of conditions and the following disclaimer in the
12118611Snjl *    documentation and/or other materials provided with the distribution.
13118611Snjl *
14316303Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15316303Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16316303Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17316303Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18316303Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19316303Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20316303Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21316303Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22316303Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23316303Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24316303Sjkim * SUCH DAMAGE.
25316303Sjkim */
26316303Sjkim
27316303Sjkim#include <sys/cdefs.h>
28316303Sjkim__FBSDID("$FreeBSD: stable/11/sys/dev/nvme/nvme_ns.c 346242 2019-04-15 16:27:06Z mav $");
29316303Sjkim
30316303Sjkim#include <sys/param.h>
31316303Sjkim#include <sys/bio.h>
32316303Sjkim#include <sys/bus.h>
33316303Sjkim#include <sys/conf.h>
34316303Sjkim#include <sys/disk.h>
35316303Sjkim#include <sys/fcntl.h>
36316303Sjkim#include <sys/ioccom.h>
37316303Sjkim#include <sys/malloc.h>
38316303Sjkim#include <sys/module.h>
39316303Sjkim#include <sys/proc.h>
40316303Sjkim#include <sys/systm.h>
41316303Sjkim
42316303Sjkim#include <dev/pci/pcivar.h>
43316303Sjkim
44316303Sjkim#include <geom/geom.h>
45316303Sjkim
46316303Sjkim#include "nvme_private.h"
47316303Sjkim
48316303Sjkimstatic void		nvme_bio_child_inbed(struct bio *parent, int bio_error);
49316303Sjkimstatic void		nvme_bio_child_done(void *arg,
50316303Sjkim					    const struct nvme_completion *cpl);
51316303Sjkimstatic uint32_t		nvme_get_num_segments(uint64_t addr, uint64_t size,
52316303Sjkim					      uint32_t alignment);
53316303Sjkimstatic void		nvme_free_child_bios(int num_bios,
54316303Sjkim					     struct bio **child_bios);
55316303Sjkimstatic struct bio **	nvme_allocate_child_bios(int num_bios);
56316303Sjkimstatic struct bio **	nvme_construct_child_bios(struct bio *bp,
57316303Sjkim						  uint32_t alignment,
58316303Sjkim						  int *num_bios);
59316303Sjkimstatic int		nvme_ns_split_bio(struct nvme_namespace *ns,
60316303Sjkim					  struct bio *bp,
61316303Sjkim					  uint32_t alignment);
62316303Sjkim
63316303Sjkimstatic int
64316303Sjkimnvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
65316303Sjkim    struct thread *td)
66316303Sjkim{
67316303Sjkim	struct nvme_namespace			*ns;
68316303Sjkim	struct nvme_controller			*ctrlr;
69316303Sjkim	struct nvme_pt_command			*pt;
70316303Sjkim
71316303Sjkim	ns = cdev->si_drv1;
72316303Sjkim	ctrlr = ns->ctrlr;
73316303Sjkim
74316303Sjkim	switch (cmd) {
75316303Sjkim	case NVME_IO_TEST:
76316303Sjkim	case NVME_BIO_TEST:
77316303Sjkim		nvme_ns_test(ns, cmd, arg);
78316303Sjkim		break;
79316303Sjkim	case NVME_PASSTHROUGH_CMD:
80316303Sjkim		pt = (struct nvme_pt_command *)arg;
81316303Sjkim		return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id,
82316303Sjkim		    1 /* is_user_buffer */, 0 /* is_admin_cmd */));
83316303Sjkim	case DIOCGMEDIASIZE:
84316303Sjkim		*(off_t *)arg = (off_t)nvme_ns_get_size(ns);
85316303Sjkim		break;
86316303Sjkim	case DIOCGSECTORSIZE:
87316303Sjkim		*(u_int *)arg = nvme_ns_get_sector_size(ns);
88316303Sjkim		break;
89316303Sjkim	default:
90316303Sjkim		return (ENOTTY);
91316303Sjkim	}
92316303Sjkim
93316303Sjkim	return (0);
94316303Sjkim}
95316303Sjkim
96316303Sjkimstatic int
97316303Sjkimnvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused,
98316303Sjkim    struct thread *td)
99316303Sjkim{
100316303Sjkim	int error = 0;
101316303Sjkim
102316303Sjkim	if (flags & FWRITE)
103316303Sjkim		error = securelevel_gt(td->td_ucred, 0);
104316303Sjkim
105316303Sjkim	return (error);
106316303Sjkim}
107316303Sjkim
108316303Sjkimstatic int
109316303Sjkimnvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused,
110316303Sjkim    struct thread *td)
111316303Sjkim{
112316303Sjkim
113316303Sjkim	return (0);
114316303Sjkim}
115316303Sjkim
116316303Sjkimstatic void
117316303Sjkimnvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl)
118316303Sjkim{
119217365Sjkim	struct bio *bp = arg;
120217365Sjkim
121217365Sjkim	/*
122217365Sjkim	 * TODO: add more extensive translation of NVMe status codes
123217365Sjkim	 *  to different bio error codes (i.e. EIO, EINVAL, etc.)
124217365Sjkim	 */
125217365Sjkim	if (nvme_completion_is_error(cpl)) {
126217365Sjkim		bp->bio_error = EIO;
127217365Sjkim		bp->bio_flags |= BIO_ERROR;
128217365Sjkim		bp->bio_resid = bp->bio_bcount;
129217365Sjkim	} else
130217365Sjkim		bp->bio_resid = 0;
131217365Sjkim
132217365Sjkim	biodone(bp);
133118611Snjl}
134316303Sjkim
135316303Sjkimstatic void
136316303Sjkimnvme_ns_strategy(struct bio *bp)
137316303Sjkim{
138316303Sjkim	struct nvme_namespace	*ns;
139316303Sjkim	int			err;
140316303Sjkim
141316303Sjkim	ns = bp->bio_dev->si_drv1;
142316303Sjkim	err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done);
143316303Sjkim
144316303Sjkim	if (err) {
145316303Sjkim		bp->bio_error = err;
146316303Sjkim		bp->bio_flags |= BIO_ERROR;
147217365Sjkim		bp->bio_resid = bp->bio_bcount;
148217365Sjkim		biodone(bp);
149118611Snjl	}
150316303Sjkim
151118611Snjl}
152151937Sjkim
153118611Snjlstatic struct cdevsw nvme_ns_cdevsw = {
154118611Snjl	.d_version =	D_VERSION,
155118611Snjl	.d_flags =	D_DISK,
156118611Snjl	.d_read =	physread,
157151937Sjkim	.d_write =	physwrite,
158118611Snjl	.d_open =	nvme_ns_open,
159151937Sjkim	.d_close =	nvme_ns_close,
160151937Sjkim	.d_strategy =	nvme_ns_strategy,
161151937Sjkim	.d_ioctl =	nvme_ns_ioctl
162151937Sjkim};
163322877Sjkim
164322877Sjkimuint32_t
165322877Sjkimnvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns)
166322877Sjkim{
167151937Sjkim	return ns->ctrlr->max_xfer_size;
168322877Sjkim}
169322877Sjkim
170322877Sjkimuint32_t
171322877Sjkimnvme_ns_get_sector_size(struct nvme_namespace *ns)
172322877Sjkim{
173327557Sjkim	return (1 << ns->data.lbaf[ns->data.flbas.format].lbads);
174327557Sjkim}
175327557Sjkim
176327557Sjkimuint64_t
177327557Sjkimnvme_ns_get_num_sectors(struct nvme_namespace *ns)
178327557Sjkim{
179327557Sjkim	return (ns->data.nsze);
180327557Sjkim}
181327557Sjkim
182327557Sjkimuint64_t
183327557Sjkimnvme_ns_get_size(struct nvme_namespace *ns)
184327557Sjkim{
185322877Sjkim	return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns));
186327557Sjkim}
187327557Sjkim
188327557Sjkimuint32_t
189327557Sjkimnvme_ns_get_flags(struct nvme_namespace *ns)
190327557Sjkim{
191327557Sjkim	return (ns->flags);
192327557Sjkim}
193327557Sjkim
194327557Sjkimconst char *
195327557Sjkimnvme_ns_get_serial_number(struct nvme_namespace *ns)
196327557Sjkim{
197327557Sjkim	return ((const char *)ns->ctrlr->cdata.sn);
198327557Sjkim}
199327557Sjkim
200327557Sjkimconst char *
201327557Sjkimnvme_ns_get_model_number(struct nvme_namespace *ns)
202327557Sjkim{
203327557Sjkim	return ((const char *)ns->ctrlr->cdata.mn);
204327557Sjkim}
205233250Sjkim
206233250Sjkimconst struct nvme_namespace_data *
207272444Sjkimnvme_ns_get_data(struct nvme_namespace *ns)
208272444Sjkim{
209272444Sjkim
210272444Sjkim	return (&ns->data);
211272444Sjkim}
212272444Sjkim
213272444Sjkimuint32_t
214272444Sjkimnvme_ns_get_stripesize(struct nvme_namespace *ns)
215272444Sjkim{
216272444Sjkim
217272444Sjkim	return (ns->stripesize);
218272444Sjkim}
219272444Sjkim
220272444Sjkimstatic void
221272444Sjkimnvme_ns_bio_done(void *arg, const struct nvme_completion *status)
222272444Sjkim{
223272444Sjkim	struct bio	*bp = arg;
224272444Sjkim	nvme_cb_fn_t	bp_cb_fn;
225272444Sjkim
226272444Sjkim	bp_cb_fn = bp->bio_driver1;
227272444Sjkim
228272444Sjkim	if (bp->bio_driver2)
229272444Sjkim		free(bp->bio_driver2, M_NVME);
230272444Sjkim
231272444Sjkim	if (nvme_completion_is_error(status)) {
232272444Sjkim		bp->bio_flags |= BIO_ERROR;
233272444Sjkim		if (bp->bio_error == 0)
234272444Sjkim			bp->bio_error = EIO;
235272444Sjkim	}
236272444Sjkim
237233250Sjkim	if ((bp->bio_flags & BIO_ERROR) == 0)
238233250Sjkim		bp->bio_resid = 0;
239233250Sjkim	else
240233250Sjkim		bp->bio_resid = bp->bio_bcount;
241233250Sjkim
242233250Sjkim	bp_cb_fn(bp, status);
243233250Sjkim}
244233250Sjkim
245233250Sjkimstatic void
246233250Sjkimnvme_bio_child_inbed(struct bio *parent, int bio_error)
247193529Sjkim{
248193529Sjkim	struct nvme_completion	parent_cpl;
249193529Sjkim	int			children, inbed;
250193529Sjkim
251193529Sjkim	if (bio_error != 0) {
252193529Sjkim		parent->bio_flags |= BIO_ERROR;
253193529Sjkim		parent->bio_error = bio_error;
254327557Sjkim	}
255193529Sjkim
256193529Sjkim	/*
257193529Sjkim	 * atomic_fetchadd will return value before adding 1, so we still
258193529Sjkim	 *  must add 1 to get the updated inbed number.  Save bio_children
259193529Sjkim	 *  before incrementing to guard against race conditions when
260193529Sjkim	 *  two children bios complete on different queues.
261193529Sjkim	 */
262193529Sjkim	children = atomic_load_acq_int(&parent->bio_children);
263193529Sjkim	inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
264193529Sjkim	if (inbed == children) {
265193529Sjkim		bzero(&parent_cpl, sizeof(parent_cpl));
266193529Sjkim		if (parent->bio_flags & BIO_ERROR)
267193529Sjkim			parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
268118611Snjl		nvme_ns_bio_done(parent, &parent_cpl);
269118611Snjl	}
270118611Snjl}
271118611Snjl
272118611Snjlstatic void
273118611Snjlnvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
274118611Snjl{
275118611Snjl	struct bio		*child = arg;
276241973Sjkim	struct bio		*parent;
277118611Snjl	int			bio_error;
278118611Snjl
279118611Snjl	parent = child->bio_parent;
280118611Snjl	g_destroy_bio(child);
281118611Snjl	bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
282151937Sjkim	nvme_bio_child_inbed(parent, bio_error);
283118611Snjl}
284118611Snjl
285118611Snjlstatic uint32_t
286118611Snjlnvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
287118611Snjl{
288118611Snjl	uint32_t	num_segs, offset, remainder;
289118611Snjl
290202771Sjkim	if (align == 0)
291118611Snjl		return (1);
292118611Snjl
293118611Snjl	KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
294118611Snjl
295118611Snjl	num_segs = size / align;
296118611Snjl	remainder = size & (align - 1);
297118611Snjl	offset = addr & (align - 1);
298202771Sjkim	if (remainder > 0 || offset > 0)
299202771Sjkim		num_segs += 1 + (remainder + offset - 1) / align;
300202771Sjkim	return (num_segs);
301202771Sjkim}
302118611Snjl
303118611Snjlstatic void
304118611Snjlnvme_free_child_bios(int num_bios, struct bio **child_bios)
305327557Sjkim{
306118611Snjl	int i;
307118611Snjl
308118611Snjl	for (i = 0; i < num_bios; i++) {
309118611Snjl		if (child_bios[i] != NULL)
310118611Snjl			g_destroy_bio(child_bios[i]);
311118611Snjl	}
312118611Snjl
313118611Snjl	free(child_bios, M_NVME);
314118611Snjl}
315118611Snjl
316118611Snjlstatic struct bio **
317118611Snjlnvme_allocate_child_bios(int num_bios)
318118611Snjl{
319118611Snjl	struct bio **child_bios;
320118611Snjl	int err = 0, i;
321118611Snjl
322118611Snjl	child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
323118611Snjl	if (child_bios == NULL)
324118611Snjl		return (NULL);
325118611Snjl
326118611Snjl	for (i = 0; i < num_bios; i++) {
327118611Snjl		child_bios[i] = g_new_bio();
328327557Sjkim		if (child_bios[i] == NULL)
329118611Snjl			err = ENOMEM;
330327557Sjkim	}
331118611Snjl
332327557Sjkim	if (err == ENOMEM) {
333327557Sjkim		nvme_free_child_bios(num_bios, child_bios);
334118611Snjl		return (NULL);
335118611Snjl	}
336118611Snjl
337327557Sjkim	return (child_bios);
338118611Snjl}
339118611Snjl
340118611Snjlstatic struct bio **
341327557Sjkimnvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
342327557Sjkim{
343327557Sjkim	struct bio	**child_bios;
344118611Snjl	struct bio	*child;
345327557Sjkim	uint64_t	cur_offset;
346327557Sjkim	caddr_t		data;
347118611Snjl	uint32_t	rem_bcount;
348118611Snjl	int		i;
349272444Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT
350118611Snjl	struct vm_page	**ma;
351118611Snjl	uint32_t	ma_offset;
352118611Snjl#endif
353118611Snjl
354118611Snjl	*num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
355327557Sjkim	    alignment);
356327557Sjkim	child_bios = nvme_allocate_child_bios(*num_bios);
357327557Sjkim	if (child_bios == NULL)
358327557Sjkim		return (NULL);
359327557Sjkim
360327557Sjkim	bp->bio_children = *num_bios;
361327557Sjkim	bp->bio_inbed = 0;
362327557Sjkim	cur_offset = bp->bio_offset;
363327557Sjkim	rem_bcount = bp->bio_bcount;
364327557Sjkim	data = bp->bio_data;
365193529Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT
366327557Sjkim	ma_offset = bp->bio_ma_offset;
367193529Sjkim	ma = bp->bio_ma;
368193529Sjkim#endif
369193529Sjkim
370327557Sjkim	for (i = 0; i < *num_bios; i++) {
371327557Sjkim		child = child_bios[i];
372118611Snjl		child->bio_parent = bp;
373327557Sjkim		child->bio_cmd = bp->bio_cmd;
374118611Snjl		child->bio_offset = cur_offset;
375327557Sjkim		child->bio_bcount = min(rem_bcount,
376327557Sjkim		    alignment - (cur_offset & (alignment - 1)));
377118611Snjl		child->bio_flags = bp->bio_flags;
378327557Sjkim#ifdef NVME_UNMAPPED_BIO_SUPPORT
379327557Sjkim		if (bp->bio_flags & BIO_UNMAPPED) {
380272444Sjkim			child->bio_ma_offset = ma_offset;
381327557Sjkim			child->bio_ma = ma;
382327557Sjkim			child->bio_ma_n =
383327557Sjkim			    nvme_get_num_segments(child->bio_ma_offset,
384272444Sjkim				child->bio_bcount, PAGE_SIZE);
385327557Sjkim			ma_offset = (ma_offset + child->bio_bcount) &
386327557Sjkim			    PAGE_MASK;
387327557Sjkim			ma += child->bio_ma_n;
388327557Sjkim			if (ma_offset != 0)
389327557Sjkim				ma -= 1;
390327557Sjkim		} else
391327557Sjkim#endif
392327557Sjkim		{
393327557Sjkim			child->bio_data = data;
394327557Sjkim			data += child->bio_bcount;
395327557Sjkim		}
396250838Sjkim		cur_offset += child->bio_bcount;
397327557Sjkim		rem_bcount -= child->bio_bcount;
398118611Snjl	}
399327557Sjkim
400327557Sjkim	return (child_bios);
401327557Sjkim}
402118611Snjl
403327557Sjkimstatic int
404118611Snjlnvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
405327557Sjkim    uint32_t alignment)
406327557Sjkim{
407327557Sjkim	struct bio	*child;
408118611Snjl	struct bio	**child_bios;
409327557Sjkim	int		err, i, num_bios;
410327557Sjkim
411327557Sjkim	child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
412327557Sjkim	if (child_bios == NULL)
413327557Sjkim		return (ENOMEM);
414327557Sjkim
415118611Snjl	for (i = 0; i < num_bios; i++) {
416327557Sjkim		child = child_bios[i];
417250838Sjkim		err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
418327557Sjkim		if (err != 0) {
419327557Sjkim			nvme_bio_child_inbed(bp, err);
420327557Sjkim			g_destroy_bio(child);
421118611Snjl		}
422118611Snjl	}
423327557Sjkim
424327557Sjkim	free(child_bios, M_NVME);
425327557Sjkim	return (0);
426327557Sjkim}
427118611Snjl
428327557Sjkimint
429327557Sjkimnvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
430327557Sjkim	nvme_cb_fn_t cb_fn)
431327557Sjkim{
432327557Sjkim	struct nvme_dsm_range	*dsm_range;
433327557Sjkim	uint32_t		num_bios;
434209746Sjkim	int			err;
435327557Sjkim
436327557Sjkim	bp->bio_driver1 = cb_fn;
437327557Sjkim
438327557Sjkim	if (ns->stripesize > 0 &&
439327557Sjkim	    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
440327557Sjkim		num_bios = nvme_get_num_segments(bp->bio_offset,
441327557Sjkim		    bp->bio_bcount, ns->stripesize);
442327557Sjkim		if (num_bios > 1)
443327557Sjkim			return (nvme_ns_split_bio(ns, bp, ns->stripesize));
444327557Sjkim	}
445327557Sjkim
446327557Sjkim	switch (bp->bio_cmd) {
447327557Sjkim	case BIO_READ:
448327557Sjkim		err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
449327557Sjkim		break;
450327557Sjkim	case BIO_WRITE:
451327557Sjkim		err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp);
452327557Sjkim		break;
453327557Sjkim	case BIO_FLUSH:
454327557Sjkim		err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp);
455327557Sjkim		break;
456327557Sjkim	case BIO_DELETE:
457327557Sjkim		dsm_range =
458327557Sjkim		    malloc(sizeof(struct nvme_dsm_range), M_NVME,
459327557Sjkim		    M_ZERO | M_WAITOK);
460327557Sjkim		dsm_range->length =
461327557Sjkim		    bp->bio_bcount/nvme_ns_get_sector_size(ns);
462327557Sjkim		dsm_range->starting_lba =
463327557Sjkim		    bp->bio_offset/nvme_ns_get_sector_size(ns);
464327557Sjkim		bp->bio_driver2 = dsm_range;
465327557Sjkim		err = nvme_ns_cmd_deallocate(ns, dsm_range, 1,
466233250Sjkim			nvme_ns_bio_done, bp);
467209746Sjkim		if (err != 0)
468298714Sjkim			free(dsm_range, M_NVME);
469298714Sjkim		break;
470298714Sjkim	default:
471298714Sjkim		err = EIO;
472233250Sjkim		break;
473233250Sjkim	}
474233250Sjkim
475233250Sjkim	return (err);
476233250Sjkim}
477216471Sjkim
478233250Sjkimint
479233250Sjkimnvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
480233250Sjkim    struct nvme_controller *ctrlr)
481216471Sjkim{
482233250Sjkim	struct make_dev_args                    md_args;
483233250Sjkim	struct nvme_completion_poll_status	status;
484233250Sjkim	int                                     res;
485233250Sjkim	int					unit;
486233250Sjkim
487327557Sjkim	ns->ctrlr = ctrlr;
488233250Sjkim	ns->id = id;
489216471Sjkim	ns->stripesize = 0;
490327557Sjkim
491327557Sjkim	/*
492327557Sjkim	 * Older Intel devices advertise in vendor specific space an alignment
493327557Sjkim	 * that improves performance.  If present use for the stripe size.  NVMe
494327557Sjkim	 * 1.3 standardized this as NOIOB, and newer Intel drives use that.
495327557Sjkim	 */
496216471Sjkim	switch (pci_get_devid(ctrlr->dev)) {
497216471Sjkim	case 0x09538086:		/* Intel DC PC3500 */
498118611Snjl	case 0x0a538086:		/* Intel DC PC3520 */
499118611Snjl	case 0x0a548086:		/* Intel DC PC4500 */
500327557Sjkim	case 0x0a558086:		/* Dell Intel P4600 */
501118611Snjl		if (ctrlr->cdata.vs[3] != 0)
502327557Sjkim			ns->stripesize =
503327557Sjkim			    (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
504327557Sjkim		break;
505118611Snjl	default:
506327557Sjkim		break;
507327557Sjkim	}
508327557Sjkim
509327557Sjkim	/*
510327557Sjkim	 * Namespaces are reconstructed after a controller reset, so check
511327557Sjkim	 *  to make sure we only call mtx_init once on each mtx.
512118611Snjl	 *
513327557Sjkim	 * TODO: Move this somewhere where it gets called at controller
514327557Sjkim	 *  construction time, which is not invoked as part of each
515327557Sjkim	 *  controller reset.
516327557Sjkim	 */
517327557Sjkim	if (!mtx_initialized(&ns->lock))
518327557Sjkim		mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF);
519118611Snjl
520327557Sjkim	status.done = 0;
521327557Sjkim	nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data,
522327557Sjkim	    nvme_completion_poll_cb, &status);
523327557Sjkim	while (!atomic_load_acq_int(&status.done))
524327557Sjkim		pause("nvme", 1);
525327557Sjkim	if (nvme_completion_is_error(&status.cpl)) {
526327557Sjkim		nvme_printf(ctrlr, "nvme_identify_namespace failed\n");
527327557Sjkim		return (ENXIO);
528327557Sjkim	}
529327557Sjkim
530327557Sjkim	/*
531327557Sjkim	 * If the size of is zero, chances are this isn't a valid
532327557Sjkim	 * namespace (eg one that's not been configured yet). The
533327557Sjkim	 * standard says the entire id will be zeros, so this is a
534327557Sjkim	 * cheap way to test for that.
535327557Sjkim	 */
536327557Sjkim	if (ns->data.nsze == 0)
537327557Sjkim		return (ENXIO);
538327557Sjkim
539327557Sjkim	/*
540327557Sjkim	 * Note: format is a 0-based value, so > is appropriate here,
541327557Sjkim	 *  not >=.
542327557Sjkim	 */
543327557Sjkim	if (ns->data.flbas.format > ns->data.nlbaf) {
544327557Sjkim		printf("lba format %d exceeds number supported (%d)\n",
545327557Sjkim		    ns->data.flbas.format, ns->data.nlbaf+1);
546327557Sjkim		return (ENXIO);
547327557Sjkim	}
548327557Sjkim
549327557Sjkim	if (ctrlr->cdata.oncs.dsm)
550327557Sjkim		ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED;
551233250Sjkim
552327557Sjkim	if (ctrlr->cdata.vwc.present)
553327557Sjkim		ns->flags |= NVME_NS_FLUSH_SUPPORTED;
554327557Sjkim
555327557Sjkim	/*
556327557Sjkim	 * cdev may have already been created, if we are reconstructing the
557327557Sjkim	 *  namespace after a controller-level reset.
558327557Sjkim	 */
559233250Sjkim	if (ns->cdev != NULL)
560327557Sjkim		return (0);
561233250Sjkim
562327557Sjkim	/*
563327557Sjkim	 * Namespace IDs start at 1, so we need to subtract 1 to create a
564327557Sjkim	 *  correct unit number.
565327557Sjkim	 */
566118611Snjl	unit = device_get_unit(ctrlr->dev) * NVME_MAX_NAMESPACES + ns->id - 1;
567327557Sjkim
568327557Sjkim	make_dev_args_init(&md_args);
569327557Sjkim	md_args.mda_devsw = &nvme_ns_cdevsw;
570327557Sjkim	md_args.mda_unit = unit;
571327557Sjkim	md_args.mda_mode = 0600;
572327557Sjkim	md_args.mda_si_drv1 = ns;
573249112Sjkim	res = make_dev_s(&md_args, &ns->cdev, "nvme%dns%d",
574327557Sjkim	    device_get_unit(ctrlr->dev), ns->id);
575118611Snjl	if (res != 0)
576327557Sjkim		return (ENXIO);
577118611Snjl
578118611Snjl#ifdef NVME_UNMAPPED_BIO_SUPPORT
579327557Sjkim	ns->cdev->si_flags |= SI_UNMAPPED;
580118611Snjl#endif
581118611Snjl
582327557Sjkim	return (0);
583327557Sjkim}
584327557Sjkim
585327557Sjkimvoid nvme_ns_destruct(struct nvme_namespace *ns)
586327557Sjkim{
587327557Sjkim
588327557Sjkim	if (ns->cdev != NULL)
589118611Snjl		destroy_dev(ns->cdev);
590327557Sjkim}
591327557Sjkim