1/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
2/*
3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the BSD-type
9 * license below:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 *      Redistributions of source code must retain the above copyright
16 *      notice, this list of conditions and the following disclaimer.
17 *
18 *      Redistributions in binary form must reproduce the above
19 *      copyright notice, this list of conditions and the following
20 *      disclaimer in the documentation and/or other materials provided
21 *      with the distribution.
22 *
23 *      Neither the name of the Network Appliance, Inc. nor the names of
24 *      its contributors may be used to endorse or promote products
25 *      derived from this software without specific prior written
26 *      permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Author: Tom Tucker <tom@opengridcomputing.com>
41 */
42
43#ifndef SVC_RDMA_H
44#define SVC_RDMA_H
45#include <linux/llist.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/sunrpc/svcsock.h>
48#include <linux/sunrpc/rpc_rdma.h>
49#include <linux/sunrpc/rpc_rdma_cid.h>
50#include <linux/sunrpc/svc_rdma_pcl.h>
51
52#include <linux/percpu_counter.h>
53#include <rdma/ib_verbs.h>
54#include <rdma/rdma_cm.h>
55
56/* Default and maximum inline threshold sizes */
57enum {
58	RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1,
59	RPCRDMA_DEF_INLINE_THRESH = 4096,
60	RPCRDMA_MAX_INLINE_THRESH = 65536
61};
62
63/* RPC/RDMA parameters and stats */
64extern unsigned int svcrdma_ord;
65extern unsigned int svcrdma_max_requests;
66extern unsigned int svcrdma_max_bc_requests;
67extern unsigned int svcrdma_max_req_size;
68extern struct workqueue_struct *svcrdma_wq;
69
70extern struct percpu_counter svcrdma_stat_read;
71extern struct percpu_counter svcrdma_stat_recv;
72extern struct percpu_counter svcrdma_stat_sq_starve;
73extern struct percpu_counter svcrdma_stat_write;
74
75struct svcxprt_rdma {
76	struct svc_xprt      sc_xprt;		/* SVC transport structure */
77	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
78	struct list_head     sc_accept_q;	/* Conn. waiting accept */
79	int		     sc_ord;		/* RDMA read limit */
80	int                  sc_max_send_sges;
81	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
82
83	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
84	unsigned int	     sc_sq_depth;	/* Depth of SQ */
85	__be32		     sc_fc_credits;	/* Forward credits */
86	u32		     sc_max_requests;	/* Max requests */
87	u32		     sc_max_bc_requests;/* Backward credits */
88	int                  sc_max_req_size;	/* Size of each RQ WR buf */
89	u8		     sc_port_num;
90
91	struct ib_pd         *sc_pd;
92
93	spinlock_t	     sc_send_lock;
94	struct llist_head    sc_send_ctxts;
95	spinlock_t	     sc_rw_ctxt_lock;
96	struct llist_head    sc_rw_ctxts;
97
98	u32		     sc_pending_recvs;
99	u32		     sc_recv_batch;
100	struct list_head     sc_rq_dto_q;
101	struct list_head     sc_read_complete_q;
102	spinlock_t	     sc_rq_dto_lock;
103	struct ib_qp         *sc_qp;
104	struct ib_cq         *sc_rq_cq;
105	struct ib_cq         *sc_sq_cq;
106
107	spinlock_t	     sc_lock;		/* transport lock */
108
109	wait_queue_head_t    sc_send_wait;	/* SQ exhaustion waitlist */
110	unsigned long	     sc_flags;
111	struct work_struct   sc_work;
112
113	struct llist_head    sc_recv_ctxts;
114
115	atomic_t	     sc_completion_ids;
116};
117/* sc_flags */
118#define RDMAXPRT_CONN_PENDING	3
119
120static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
121{
122	struct svc_xprt *xprt = rqstp->rq_xprt;
123
124	return container_of(xprt, struct svcxprt_rdma, sc_xprt);
125}
126
127/*
128 * Default connection parameters
129 */
130enum {
131	RPCRDMA_LISTEN_BACKLOG	= 10,
132	RPCRDMA_MAX_REQUESTS	= 64,
133	RPCRDMA_MAX_BC_REQUESTS	= 2,
134};
135
136#define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
137
138/**
139 * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID
140 * @rdma: controlling transport
141 * @cid: completion ID to initialize
142 */
143static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
144					  struct rpc_rdma_cid *cid)
145{
146	cid->ci_queue_id = rdma->sc_rq_cq->res.id;
147	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
148}
149
150/**
151 * svc_rdma_send_cid_init - Initialize a Send Queue completion ID
152 * @rdma: controlling transport
153 * @cid: completion ID to initialize
154 */
155static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
156					  struct rpc_rdma_cid *cid)
157{
158	cid->ci_queue_id = rdma->sc_sq_cq->res.id;
159	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
160}
161
162/*
163 * A chunk context tracks all I/O for moving one Read or Write
164 * chunk. This is a set of rdma_rw's that handle data movement
165 * for all segments of one chunk.
166 */
167struct svc_rdma_chunk_ctxt {
168	struct rpc_rdma_cid	cc_cid;
169	struct ib_cqe		cc_cqe;
170	struct list_head	cc_rwctxts;
171	ktime_t			cc_posttime;
172	int			cc_sqecount;
173};
174
175struct svc_rdma_recv_ctxt {
176	struct llist_node	rc_node;
177	struct list_head	rc_list;
178	struct ib_recv_wr	rc_recv_wr;
179	struct ib_cqe		rc_cqe;
180	struct rpc_rdma_cid	rc_cid;
181	struct ib_sge		rc_recv_sge;
182	void			*rc_recv_buf;
183	struct xdr_stream	rc_stream;
184	u32			rc_byte_len;
185	u32			rc_inv_rkey;
186	__be32			rc_msgtype;
187
188	/* State for pulling a Read chunk */
189	unsigned int		rc_pageoff;
190	unsigned int		rc_curpage;
191	unsigned int		rc_readbytes;
192	struct xdr_buf		rc_saved_arg;
193	struct svc_rdma_chunk_ctxt	rc_cc;
194
195	struct svc_rdma_pcl	rc_call_pcl;
196
197	struct svc_rdma_pcl	rc_read_pcl;
198	struct svc_rdma_chunk	*rc_cur_result_payload;
199	struct svc_rdma_pcl	rc_write_pcl;
200	struct svc_rdma_pcl	rc_reply_pcl;
201
202	unsigned int		rc_page_count;
203	struct page		*rc_pages[RPCSVC_MAXPAGES];
204};
205
206/*
207 * State for sending a Write chunk.
208 *  - Tracks progress of writing one chunk over all its segments
209 *  - Stores arguments for the SGL constructor functions
210 */
211struct svc_rdma_write_info {
212	struct svcxprt_rdma	*wi_rdma;
213
214	const struct svc_rdma_chunk	*wi_chunk;
215
216	/* write state of this chunk */
217	unsigned int		wi_seg_off;
218	unsigned int		wi_seg_no;
219
220	/* SGL constructor arguments */
221	const struct xdr_buf	*wi_xdr;
222	unsigned char		*wi_base;
223	unsigned int		wi_next_off;
224
225	struct svc_rdma_chunk_ctxt	wi_cc;
226	struct work_struct	wi_work;
227};
228
229struct svc_rdma_send_ctxt {
230	struct llist_node	sc_node;
231	struct rpc_rdma_cid	sc_cid;
232	struct work_struct	sc_work;
233
234	struct svcxprt_rdma	*sc_rdma;
235	struct ib_send_wr	sc_send_wr;
236	struct ib_send_wr	*sc_wr_chain;
237	int			sc_sqecount;
238	struct ib_cqe		sc_cqe;
239	struct xdr_buf		sc_hdrbuf;
240	struct xdr_stream	sc_stream;
241	struct svc_rdma_write_info sc_reply_info;
242	void			*sc_xprt_buf;
243	int			sc_page_count;
244	int			sc_cur_sge_no;
245	struct page		*sc_pages[RPCSVC_MAXPAGES];
246	struct ib_sge		sc_sges[];
247};
248
249/* svc_rdma_backchannel.c */
250extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
251				     struct svc_rdma_recv_ctxt *rctxt);
252
253/* svc_rdma_recvfrom.c */
254extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
255extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
256extern struct svc_rdma_recv_ctxt *
257		svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma);
258extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
259				   struct svc_rdma_recv_ctxt *ctxt);
260extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
261extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
262extern int svc_rdma_recvfrom(struct svc_rqst *);
263
264/* svc_rdma_rw.c */
265extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
266			     struct svc_rdma_chunk_ctxt *cc);
267extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
268extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
269			     struct svc_rdma_chunk_ctxt *cc);
270extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
271				struct svc_rdma_chunk_ctxt *cc,
272				enum dma_data_direction dir);
273extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
274					 struct svc_rdma_send_ctxt *ctxt);
275extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
276				    const struct svc_rdma_recv_ctxt *rctxt,
277				    const struct xdr_buf *xdr);
278extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
279					const struct svc_rdma_pcl *write_pcl,
280					const struct svc_rdma_pcl *reply_pcl,
281					struct svc_rdma_send_ctxt *sctxt,
282					const struct xdr_buf *xdr);
283extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
284				      struct svc_rqst *rqstp,
285				      struct svc_rdma_recv_ctxt *head);
286
287/* svc_rdma_sendto.c */
288extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma);
289extern struct svc_rdma_send_ctxt *
290		svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
291extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
292				   struct svc_rdma_send_ctxt *ctxt);
293extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
294			      struct svc_rdma_send_ctxt *ctxt);
295extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
296				  struct svc_rdma_send_ctxt *sctxt,
297				  const struct svc_rdma_pcl *write_pcl,
298				  const struct svc_rdma_pcl *reply_pcl,
299				  const struct xdr_buf *xdr);
300extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
301				    struct svc_rdma_send_ctxt *sctxt,
302				    struct svc_rdma_recv_ctxt *rctxt,
303				    int status);
304extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
305extern int svc_rdma_sendto(struct svc_rqst *);
306extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
307				   unsigned int length);
308
309/* svc_rdma_transport.c */
310extern struct svc_xprt_class svc_rdma_class;
311#ifdef CONFIG_SUNRPC_BACKCHANNEL
312extern struct svc_xprt_class svc_rdma_bc_class;
313#endif
314
315/* svc_rdma.c */
316extern int svc_rdma_init(void);
317extern void svc_rdma_cleanup(void);
318
319#endif
320