iser_memory.c revision 331769
1/* $FreeBSD: stable/11/sys/dev/iser/iser_memory.c 331769 2018-03-30 18:06:29Z hselasky $ */
2/*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include "icl_iser.h"
28
29static struct fast_reg_descriptor *
30iser_reg_desc_get(struct ib_conn *ib_conn)
31{
32	struct fast_reg_descriptor *desc;
33
34	mtx_lock(&ib_conn->lock);
35	desc = list_first_entry(&ib_conn->fastreg.pool,
36				struct fast_reg_descriptor, list);
37	list_del(&desc->list);
38	mtx_unlock(&ib_conn->lock);
39
40	return (desc);
41}
42
43static void
44iser_reg_desc_put(struct ib_conn *ib_conn,
45		  struct fast_reg_descriptor *desc)
46{
47	mtx_lock(&ib_conn->lock);
48	list_add(&desc->list, &ib_conn->fastreg.pool);
49	mtx_unlock(&ib_conn->lock);
50}
51
52#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
53
54/**
55 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
56 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
57 * the number of entries which are aligned correctly. Supports the case where
58 * consecutive SG elements are actually fragments of the same physcial page.
59 */
60static int
61iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev)
62{
63	struct scatterlist *sg, *sgl, *next_sg = NULL;
64	u64 start_addr, end_addr;
65	int i, ret_len, start_check = 0;
66
67	if (data->dma_nents == 1)
68		return (1);
69
70	sgl = data->sgl;
71	start_addr  = ib_sg_dma_address(ibdev, sgl);
72
73	for_each_sg(sgl, sg, data->dma_nents, i) {
74		if (start_check && !IS_4K_ALIGNED(start_addr))
75			break;
76
77		next_sg = sg_next(sg);
78		if (!next_sg)
79			break;
80
81		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
82		start_addr  = ib_sg_dma_address(ibdev, next_sg);
83
84		if (end_addr == start_addr) {
85			start_check = 0;
86			continue;
87		} else
88			start_check = 1;
89
90		if (!IS_4K_ALIGNED(end_addr))
91			break;
92	}
93	ret_len = (next_sg) ? i : i+1;
94
95	return (ret_len);
96}
97
98void
99iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu,
100			 struct iser_data_buf *data,
101			 enum dma_data_direction dir)
102{
103	struct ib_device *dev;
104
105	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
106	ib_dma_unmap_sg(dev, data->sgl, data->size, dir);
107}
108
109static int
110iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
111	     struct iser_mem_reg *reg)
112{
113	struct scatterlist *sg = mem->sgl;
114
115	reg->sge.lkey = device->mr->lkey;
116	reg->rkey = device->mr->rkey;
117	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
118	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
119
120	return (0);
121}
122
123/**
124 * TODO: This should be a verb
125 * iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used
126 * for calculating a new rkey for type 2 memory windows.
127 * @rkey - the rkey to increment.
128 */
129static inline u32
130iser_ib_inc_rkey(u32 rkey)
131{
132	const u32 mask = 0x000000ff;
133
134	return (((rkey + 1) & mask) | (rkey & ~mask));
135}
136
137static void
138iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
139{
140	u32 rkey;
141
142	memset(inv_wr, 0, sizeof(*inv_wr));
143	inv_wr->opcode = IB_WR_LOCAL_INV;
144	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
145	inv_wr->ex.invalidate_rkey = mr->rkey;
146
147	rkey = iser_ib_inc_rkey(mr->rkey);
148	ib_update_fast_reg_key(mr, rkey);
149}
150
151static int
152iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu,
153		 struct iser_data_buf *mem,
154		 struct iser_reg_resources *rsc,
155		 struct iser_mem_reg *reg)
156{
157	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
158	struct iser_device *device = ib_conn->device;
159	struct ib_mr *mr = rsc->mr;
160	struct ib_reg_wr fastreg_wr;
161	struct ib_send_wr inv_wr;
162	struct ib_send_wr *bad_wr, *wr = NULL;
163	int ret, n;
164
165	/* if there a single dma entry, dma mr suffices */
166	if (mem->dma_nents == 1)
167		return iser_reg_dma(device, mem, reg);
168
169	if (!rsc->mr_valid) {
170		iser_inv_rkey(&inv_wr, mr);
171		wr = &inv_wr;
172	}
173
174	n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K);
175	if (unlikely(n != mem->size)) {
176		ISER_ERR("failed to map sg (%d/%d)\n", n, mem->size);
177		return n < 0 ? n : -EINVAL;
178	}
179	/* Prepare FASTREG WR */
180	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
181	fastreg_wr.wr.opcode = IB_WR_REG_MR;
182	fastreg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
183	fastreg_wr.wr.num_sge = 0;
184	fastreg_wr.mr = mr;
185	fastreg_wr.key = mr->rkey;
186	fastreg_wr.access = IB_ACCESS_LOCAL_WRITE  |
187			    IB_ACCESS_REMOTE_WRITE |
188			    IB_ACCESS_REMOTE_READ;
189
190	if (!wr)
191		wr = &fastreg_wr.wr;
192	else
193		wr->next = &fastreg_wr.wr;
194
195	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
196	if (ret) {
197		ISER_ERR("fast registration failed, ret:%d", ret);
198		return (ret);
199	}
200	rsc->mr_valid = 0;
201
202	reg->sge.lkey = mr->lkey;
203	reg->rkey = mr->rkey;
204	reg->sge.addr = mr->iova;
205	reg->sge.length = mr->length;
206
207	return (ret);
208}
209
210/**
211 * iser_reg_rdma_mem - Registers memory intended for RDMA,
212 * using Fast Registration WR (if possible) obtaining rkey and va
213 *
214 * returns 0 on success, errno code on failure
215 */
216int
217iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu,
218		  enum iser_data_dir cmd_dir)
219{
220	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
221	struct iser_device   *device = ib_conn->device;
222	struct ib_device     *ibdev = device->ib_device;
223	struct iser_data_buf *mem = &iser_pdu->data[cmd_dir];
224	struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir];
225	struct fast_reg_descriptor *desc = NULL;
226	int err, aligned_len;
227
228	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
229	if (aligned_len != mem->dma_nents) {
230		ISER_ERR("bounce buffer is not supported");
231		return 1;
232	}
233
234	if (mem->dma_nents != 1) {
235		desc = iser_reg_desc_get(ib_conn);
236		mem_reg->mem_h = desc;
237	}
238
239	err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL,
240				       mem_reg);
241	if (err)
242		goto err_reg;
243
244	return (0);
245
246err_reg:
247	if (desc)
248		iser_reg_desc_put(ib_conn, desc);
249
250	return (err);
251}
252
253void
254iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu,
255		    enum iser_data_dir cmd_dir)
256{
257	struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir];
258
259	if (!reg->mem_h)
260		return;
261
262	iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn,
263			  reg->mem_h);
264	reg->mem_h = NULL;
265}
266
267int
268iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu,
269		       struct iser_data_buf *data,
270		       enum iser_data_dir iser_dir,
271		       enum dma_data_direction dma_dir)
272{
273	struct ib_device *dev;
274
275	iser_pdu->dir[iser_dir] = 1;
276	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
277
278	data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir);
279	if (data->dma_nents == 0) {
280		ISER_ERR("dma_map_sg failed");
281		return (EINVAL);
282	}
283
284	return (0);
285}
286