1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4 */
5
6#include <linux/blkdev.h>
7#include <linux/vmalloc.h>
8#include "nvme.h"
9
10static int nvme_set_max_append(struct nvme_ctrl *ctrl)
11{
12	struct nvme_command c = { };
13	struct nvme_id_ctrl_zns *id;
14	int status;
15
16	id = kzalloc(sizeof(*id), GFP_KERNEL);
17	if (!id)
18		return -ENOMEM;
19
20	c.identify.opcode = nvme_admin_identify;
21	c.identify.cns = NVME_ID_CNS_CS_CTRL;
22	c.identify.csi = NVME_CSI_ZNS;
23
24	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
25	if (status) {
26		kfree(id);
27		return status;
28	}
29
30	if (id->zasl)
31		ctrl->max_zone_append = 1 << (id->zasl + 3);
32	else
33		ctrl->max_zone_append = ctrl->max_hw_sectors;
34	kfree(id);
35	return 0;
36}
37
38int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
39		struct nvme_zone_info *zi)
40{
41	struct nvme_effects_log *log = ns->head->effects;
42	struct nvme_command c = { };
43	struct nvme_id_ns_zns *id;
44	int status;
45
46	/* Driver requires zone append support */
47	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
48			NVME_CMD_EFFECTS_CSUPP)) {
49		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
50			dev_warn(ns->ctrl->device,
51				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
52				 ns->head->ns_id);
53	} else {
54		set_bit(NVME_NS_FORCE_RO, &ns->flags);
55		dev_warn(ns->ctrl->device,
56			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
57			 ns->head->ns_id);
58	}
59
60	/* Lazily query controller append limit for the first zoned namespace */
61	if (!ns->ctrl->max_zone_append) {
62		status = nvme_set_max_append(ns->ctrl);
63		if (status)
64			return status;
65	}
66
67	id = kzalloc(sizeof(*id), GFP_KERNEL);
68	if (!id)
69		return -ENOMEM;
70
71	c.identify.opcode = nvme_admin_identify;
72	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
73	c.identify.cns = NVME_ID_CNS_CS_NS;
74	c.identify.csi = NVME_CSI_ZNS;
75
76	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
77	if (status)
78		goto free_data;
79
80	/*
81	 * We currently do not handle devices requiring any of the zoned
82	 * operation characteristics.
83	 */
84	if (id->zoc) {
85		dev_warn(ns->ctrl->device,
86			"zone operations:%x not supported for namespace:%u\n",
87			le16_to_cpu(id->zoc), ns->head->ns_id);
88		status = -ENODEV;
89		goto free_data;
90	}
91
92	zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
93	if (!is_power_of_2(zi->zone_size)) {
94		dev_warn(ns->ctrl->device,
95			"invalid zone size: %llu for namespace: %u\n",
96			zi->zone_size, ns->head->ns_id);
97		status = -ENODEV;
98		goto free_data;
99	}
100	zi->max_open_zones = le32_to_cpu(id->mor) + 1;
101	zi->max_active_zones = le32_to_cpu(id->mar) + 1;
102
103free_data:
104	kfree(id);
105	return status;
106}
107
108void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
109		struct nvme_zone_info *zi)
110{
111	lim->zoned = 1;
112	lim->max_open_zones = zi->max_open_zones;
113	lim->max_active_zones = zi->max_active_zones;
114	lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
115	lim->chunk_sectors = ns->head->zsze =
116		nvme_lba_to_sect(ns->head, zi->zone_size);
117	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
118}
119
120static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
121					  unsigned int nr_zones, size_t *buflen)
122{
123	struct request_queue *q = ns->disk->queue;
124	size_t bufsize;
125	void *buf;
126
127	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
128				   sizeof(struct nvme_zone_descriptor);
129
130	nr_zones = min_t(unsigned int, nr_zones,
131			 get_capacity(ns->disk) >> ilog2(ns->head->zsze));
132
133	bufsize = sizeof(struct nvme_zone_report) +
134		nr_zones * sizeof(struct nvme_zone_descriptor);
135	bufsize = min_t(size_t, bufsize,
136			queue_max_hw_sectors(q) << SECTOR_SHIFT);
137	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
138
139	while (bufsize >= min_bufsize) {
140		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
141		if (buf) {
142			*buflen = bufsize;
143			return buf;
144		}
145		bufsize >>= 1;
146	}
147	return NULL;
148}
149
150static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl,
151				 struct nvme_ns_head *head,
152				 struct nvme_zone_descriptor *entry,
153				 unsigned int idx, report_zones_cb cb,
154				 void *data)
155{
156	struct blk_zone zone = { };
157
158	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
159		dev_err(ctrl->device, "invalid zone type %#x\n",
160				entry->zt);
161		return -EINVAL;
162	}
163
164	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
165	zone.cond = entry->zs >> 4;
166	zone.len = head->zsze;
167	zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap));
168	zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba));
169	if (zone.cond == BLK_ZONE_COND_FULL)
170		zone.wp = zone.start + zone.len;
171	else
172		zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp));
173
174	return cb(&zone, idx, data);
175}
176
177int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
178		unsigned int nr_zones, report_zones_cb cb, void *data)
179{
180	struct nvme_zone_report *report;
181	struct nvme_command c = { };
182	int ret, zone_idx = 0;
183	unsigned int nz, i;
184	size_t buflen;
185
186	if (ns->head->ids.csi != NVME_CSI_ZNS)
187		return -EINVAL;
188
189	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
190	if (!report)
191		return -ENOMEM;
192
193	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
194	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
195	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
196	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
197	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
198	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
199
200	sector &= ~(ns->head->zsze - 1);
201	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
202		memset(report, 0, buflen);
203
204		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector));
205		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
206		if (ret) {
207			if (ret > 0)
208				ret = -EIO;
209			goto out_free;
210		}
211
212		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
213		if (!nz)
214			break;
215
216		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
217			ret = nvme_zone_parse_entry(ns->ctrl, ns->head,
218						    &report->entries[i],
219						    zone_idx, cb, data);
220			if (ret)
221				goto out_free;
222			zone_idx++;
223		}
224
225		sector += ns->head->zsze * nz;
226	}
227
228	if (zone_idx > 0)
229		ret = zone_idx;
230	else
231		ret = -EINVAL;
232out_free:
233	kvfree(report);
234	return ret;
235}
236
237blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
238		struct nvme_command *c, enum nvme_zone_mgmt_action action)
239{
240	memset(c, 0, sizeof(*c));
241
242	c->zms.opcode = nvme_cmd_zone_mgmt_send;
243	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
244	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req)));
245	c->zms.zsa = action;
246
247	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
248		c->zms.select_all = 1;
249
250	return BLK_STS_OK;
251}
252