1219820Sjeff#ifndef _SDP_H_
2219820Sjeff#define _SDP_H_
3219820Sjeff
4324685Shselasky#define	LINUXKPI_PARAM_PREFIX ib_sdp_
5324685Shselasky
6219820Sjeff#include "opt_ddb.h"
7219820Sjeff#include "opt_inet.h"
8219820Sjeff#include "opt_ofed.h"
9219820Sjeff
10219820Sjeff#include <sys/param.h>
11219820Sjeff#include <sys/systm.h>
12219820Sjeff#include <sys/malloc.h>
13219820Sjeff#include <sys/kernel.h>
14219820Sjeff#include <sys/sysctl.h>
15219820Sjeff#include <sys/mbuf.h>
16219820Sjeff#include <sys/lock.h>
17219820Sjeff#include <sys/rwlock.h>
18219820Sjeff#include <sys/socket.h>
19219820Sjeff#include <sys/socketvar.h>
20219820Sjeff#include <sys/protosw.h>
21219820Sjeff#include <sys/proc.h>
22219820Sjeff#include <sys/jail.h>
23219820Sjeff#include <sys/domain.h>
24219820Sjeff
25219820Sjeff#ifdef DDB
26219820Sjeff#include <ddb/ddb.h>
27219820Sjeff#endif
28219820Sjeff
29219820Sjeff#include <net/if.h>
30219820Sjeff#include <net/route.h>
31219820Sjeff#include <net/vnet.h>
32219820Sjeff
33219820Sjeff#include <netinet/in.h>
34219820Sjeff#include <netinet/in_systm.h>
35219820Sjeff#include <netinet/in_var.h>
36219820Sjeff#include <netinet/in_pcb.h>
37219820Sjeff#include <netinet/tcp.h>
38219820Sjeff#include <netinet/tcp_fsm.h>
39219820Sjeff#include <netinet/tcp_timer.h>
40219820Sjeff#include <netinet/tcp_var.h>
41219820Sjeff
42219820Sjeff#include <linux/device.h>
43219820Sjeff#include <linux/err.h>
44219820Sjeff#include <linux/sched.h>
45219820Sjeff#include <linux/workqueue.h>
46219820Sjeff#include <linux/wait.h>
47219820Sjeff#include <linux/module.h>
48219820Sjeff#include <linux/moduleparam.h>
49219820Sjeff#include <linux/pci.h>
50219820Sjeff
51219820Sjeff#include <rdma/ib_verbs.h>
52219820Sjeff#include <rdma/rdma_cm.h>
53219820Sjeff#include <rdma/ib_cm.h>
54219820Sjeff#include <rdma/sdp_socket.h>
55219820Sjeff#include <rdma/ib_fmr_pool.h>
56219820Sjeff
57219820Sjeff#ifdef SDP_DEBUG
58219820Sjeff#define	CONFIG_INFINIBAND_SDP_DEBUG
59219820Sjeff#endif
60219820Sjeff
61219820Sjeff#include "sdp_dbg.h"
62219820Sjeff
63219820Sjeff#undef LIST_HEAD
64219820Sjeff/* From sys/queue.h */
65219820Sjeff#define LIST_HEAD(name, type)                                           \
66219820Sjeffstruct name {                                                           \
67219820Sjeff        struct type *lh_first;  /* first element */                     \
68219820Sjeff}
69219820Sjeff
70219820Sjeff/* Interval between sucessive polls in the Tx routine when polling is used
71219820Sjeff   instead of interrupts (in per-core Tx rings) - should be power of 2 */
72219820Sjeff#define SDP_TX_POLL_MODER	16
73219820Sjeff#define SDP_TX_POLL_TIMEOUT	(HZ / 20)
74219820Sjeff#define SDP_NAGLE_TIMEOUT (HZ / 10)
75219820Sjeff
76219820Sjeff#define SDP_SRCAVAIL_CANCEL_TIMEOUT (HZ * 5)
77219820Sjeff#define SDP_SRCAVAIL_ADV_TIMEOUT (1 * HZ)
78219820Sjeff#define SDP_SRCAVAIL_PAYLOAD_LEN 1
79219820Sjeff
80219820Sjeff#define SDP_RESOLVE_TIMEOUT 1000
81219820Sjeff#define SDP_ROUTE_TIMEOUT 1000
82219820Sjeff#define SDP_RETRY_COUNT 5
83219820Sjeff#define SDP_KEEPALIVE_TIME (120 * 60 * HZ)
84219820Sjeff#define SDP_FIN_WAIT_TIMEOUT (60 * HZ) /* like TCP_FIN_TIMEOUT */
85219820Sjeff
86219820Sjeff#define SDP_TX_SIZE 0x40
87219820Sjeff#define SDP_RX_SIZE 0x40
88219820Sjeff
89219820Sjeff#define SDP_FMR_SIZE (MIN(0x1000, PAGE_SIZE) / sizeof(u64))
90219820Sjeff#define SDP_FMR_POOL_SIZE	1024
91219820Sjeff#define SDP_FMR_DIRTY_SIZE	( SDP_FMR_POOL_SIZE / 4 )
92219820Sjeff
93219820Sjeff#define SDP_MAX_RDMA_READ_LEN (PAGE_SIZE * (SDP_FMR_SIZE - 2))
94219820Sjeff
95219820Sjeff/* mb inlined data len - rest will be rx'ed into frags */
96219820Sjeff#define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh))
97219820Sjeff
98219820Sjeff/* limit tx payload len, if the sink supports bigger buffers than the source
99219820Sjeff * can handle.
100219820Sjeff * or rx fragment size (limited by sge->length size) */
101219820Sjeff#define	SDP_MAX_PACKET	(1 << 16)
102219820Sjeff#define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE)
103219820Sjeff
104219820Sjeff#define SDP_MAX_RECV_SGES (SDP_MAX_PACKET / MCLBYTES)
105219820Sjeff#define SDP_MAX_SEND_SGES (SDP_MAX_PACKET / MCLBYTES) + 2
106219820Sjeff
107219820Sjeff#define SDP_NUM_WC 4
108219820Sjeff
109219820Sjeff#define SDP_DEF_ZCOPY_THRESH 64*1024
110219820Sjeff#define SDP_MIN_ZCOPY_THRESH PAGE_SIZE
111219820Sjeff#define SDP_MAX_ZCOPY_THRESH 1048576
112219820Sjeff
113219820Sjeff#define SDP_OP_RECV 0x800000000LL
114219820Sjeff#define SDP_OP_SEND 0x400000000LL
115219820Sjeff#define SDP_OP_RDMA 0x200000000LL
116219820Sjeff#define SDP_OP_NOP  0x100000000LL
117219820Sjeff
118219820Sjeff/* how long (in jiffies) to block sender till tx completion*/
119219820Sjeff#define SDP_BZCOPY_POLL_TIMEOUT (HZ / 10)
120219820Sjeff
121219820Sjeff#define SDP_AUTO_CONF	0xffff
122219820Sjeff#define AUTO_MOD_DELAY (HZ / 4)
123219820Sjeff
124219820Sjeffstruct sdp_mb_cb {
125219820Sjeff	__u32		seq;		/* Starting sequence number	*/
126219820Sjeff	struct bzcopy_state      *bz;
127219820Sjeff	struct rx_srcavail_state *rx_sa;
128219820Sjeff	struct tx_srcavail_state *tx_sa;
129219820Sjeff};
130219820Sjeff
131219820Sjeff#define	M_PUSH	M_PROTO1	/* Do a 'push'. */
132219820Sjeff#define	M_URG	M_PROTO2	/* Mark as urgent (oob). */
133219820Sjeff
134219820Sjeff#define SDP_SKB_CB(__mb)      ((struct sdp_mb_cb *)&((__mb)->cb[0]))
135219820Sjeff#define BZCOPY_STATE(mb)      (SDP_SKB_CB(mb)->bz)
136219820Sjeff#define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa)
137219820Sjeff#define TX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->tx_sa)
138219820Sjeff
139219820Sjeff#ifndef MIN
140219820Sjeff#define MIN(a, b) (a < b ? a : b)
141219820Sjeff#endif
142219820Sjeff
143219820Sjeff#define ring_head(ring)   (atomic_read(&(ring).head))
144219820Sjeff#define ring_tail(ring)   (atomic_read(&(ring).tail))
145219820Sjeff#define ring_posted(ring) (ring_head(ring) - ring_tail(ring))
146219820Sjeff
147219820Sjeff#define rx_ring_posted(ssk) ring_posted(ssk->rx_ring)
148219820Sjeff#ifdef SDP_ZCOPY
149219820Sjeff#define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \
150219820Sjeff	(ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0))
151219820Sjeff#else
152219820Sjeff#define tx_ring_posted(ssk) ring_posted(ssk->tx_ring)
153219820Sjeff#endif
154219820Sjeff
155219820Sjeffextern int sdp_zcopy_thresh;
156219820Sjeffextern int rcvbuf_initial_size;
157219820Sjeffextern struct workqueue_struct *rx_comp_wq;
158219820Sjeffextern struct ib_client sdp_client;
159219820Sjeff
160219820Sjeffenum sdp_mid {
161219820Sjeff	SDP_MID_HELLO = 0x0,
162219820Sjeff	SDP_MID_HELLO_ACK = 0x1,
163219820Sjeff	SDP_MID_DISCONN = 0x2,
164219820Sjeff	SDP_MID_ABORT = 0x3,
165219820Sjeff	SDP_MID_SENDSM = 0x4,
166219820Sjeff	SDP_MID_RDMARDCOMPL = 0x6,
167219820Sjeff	SDP_MID_SRCAVAIL_CANCEL = 0x8,
168219820Sjeff	SDP_MID_CHRCVBUF = 0xB,
169219820Sjeff	SDP_MID_CHRCVBUF_ACK = 0xC,
170219820Sjeff	SDP_MID_SINKAVAIL = 0xFD,
171219820Sjeff	SDP_MID_SRCAVAIL = 0xFE,
172219820Sjeff	SDP_MID_DATA = 0xFF,
173219820Sjeff};
174219820Sjeff
175219820Sjeffenum sdp_flags {
176219820Sjeff        SDP_OOB_PRES = 1 << 0,
177219820Sjeff        SDP_OOB_PEND = 1 << 1,
178219820Sjeff};
179219820Sjeff
180219820Sjeffenum {
181219820Sjeff	SDP_MIN_TX_CREDITS = 2
182219820Sjeff};
183219820Sjeff
184219820Sjeffenum {
185219820Sjeff	SDP_ERR_ERROR   = -4,
186219820Sjeff	SDP_ERR_FAULT   = -3,
187219820Sjeff	SDP_NEW_SEG     = -2,
188219820Sjeff	SDP_DO_WAIT_MEM = -1
189219820Sjeff};
190219820Sjeff
191219820Sjeffstruct sdp_bsdh {
192219820Sjeff	u8 mid;
193219820Sjeff	u8 flags;
194219820Sjeff	__u16 bufs;
195219820Sjeff	__u32 len;
196219820Sjeff	__u32 mseq;
197219820Sjeff	__u32 mseq_ack;
198219820Sjeff} __attribute__((__packed__));
199219820Sjeff
200219820Sjeffunion cma_ip_addr {
201219820Sjeff	struct in6_addr ip6;
202219820Sjeff	struct {
203219820Sjeff		__u32 pad[3];
204219820Sjeff		__u32 addr;
205219820Sjeff	} ip4;
206219820Sjeff} __attribute__((__packed__));
207219820Sjeff
208219820Sjeff/* TODO: too much? Can I avoid having the src/dst and port here? */
209219820Sjeffstruct sdp_hh {
210219820Sjeff	struct sdp_bsdh bsdh;
211219820Sjeff	u8 majv_minv;
212219820Sjeff	u8 ipv_cap;
213219820Sjeff	u8 rsvd1;
214219820Sjeff	u8 max_adverts;
215219820Sjeff	__u32 desremrcvsz;
216219820Sjeff	__u32 localrcvsz;
217219820Sjeff	__u16 port;
218219820Sjeff	__u16 rsvd2;
219219820Sjeff	union cma_ip_addr src_addr;
220219820Sjeff	union cma_ip_addr dst_addr;
221219820Sjeff	u8 rsvd3[IB_CM_REQ_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 48];
222219820Sjeff} __attribute__((__packed__));
223219820Sjeff
224219820Sjeffstruct sdp_hah {
225219820Sjeff	struct sdp_bsdh bsdh;
226219820Sjeff	u8 majv_minv;
227219820Sjeff	u8 ipv_cap;
228219820Sjeff	u8 rsvd1;
229219820Sjeff	u8 ext_max_adverts;
230219820Sjeff	__u32 actrcvsz;
231219820Sjeff	u8 rsvd2[IB_CM_REP_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 8];
232219820Sjeff} __attribute__((__packed__));
233219820Sjeff
234219820Sjeffstruct sdp_rrch {
235219820Sjeff	__u32 len;
236219820Sjeff} __attribute__((__packed__));
237219820Sjeff
238219820Sjeffstruct sdp_srcah {
239219820Sjeff	__u32 len;
240219820Sjeff	__u32 rkey;
241219820Sjeff	__u64 vaddr;
242219820Sjeff} __attribute__((__packed__));
243219820Sjeff
244219820Sjeffstruct sdp_buf {
245219820Sjeff        struct mbuf *mb;
246219820Sjeff        u64             mapping[SDP_MAX_SEND_SGES];
247219820Sjeff} __attribute__((__packed__));
248219820Sjeff
249219820Sjeffstruct sdp_chrecvbuf {
250219820Sjeff	u32 size;
251219820Sjeff} __attribute__((__packed__));
252219820Sjeff
253219820Sjeff/* Context used for synchronous zero copy bcopy (BZCOPY) */
254219820Sjeffstruct bzcopy_state {
255219820Sjeff	unsigned char __user  *u_base;
256219820Sjeff	int                    u_len;
257219820Sjeff	int                    left;
258219820Sjeff	int                    page_cnt;
259219820Sjeff	int                    cur_page;
260219820Sjeff	int                    cur_offset;
261219820Sjeff	int                    busy;
262219820Sjeff	struct sdp_sock      *ssk;
263219820Sjeff	struct page         **pages;
264219820Sjeff};
265219820Sjeff
266219820Sjeffenum rx_sa_flag {
267219820Sjeff	RX_SA_ABORTED    = 2,
268219820Sjeff};
269219820Sjeff
270219820Sjeffenum tx_sa_flag {
271219820Sjeff	TX_SA_SENDSM     = 0x01,
272219820Sjeff	TX_SA_CROSS_SEND = 0x02,
273219820Sjeff	TX_SA_INTRRUPTED = 0x04,
274219820Sjeff	TX_SA_TIMEDOUT   = 0x08,
275219820Sjeff	TX_SA_ERROR      = 0x10,
276219820Sjeff};
277219820Sjeff
278219820Sjeffstruct rx_srcavail_state {
279219820Sjeff	/* Advertised buffer stuff */
280219820Sjeff	u32 mseq;
281219820Sjeff	u32 used;
282219820Sjeff	u32 reported;
283219820Sjeff	u32 len;
284219820Sjeff	u32 rkey;
285219820Sjeff	u64 vaddr;
286219820Sjeff
287219820Sjeff	/* Dest buff info */
288219820Sjeff	struct ib_umem *umem;
289219820Sjeff	struct ib_pool_fmr *fmr;
290219820Sjeff
291219820Sjeff	/* Utility */
292219820Sjeff	u8  busy;
293219820Sjeff	enum rx_sa_flag  flags;
294219820Sjeff};
295219820Sjeff
296219820Sjeffstruct tx_srcavail_state {
297219820Sjeff	/* Data below 'busy' will be reset */
298219820Sjeff	u8		busy;
299219820Sjeff
300219820Sjeff	struct ib_umem *umem;
301219820Sjeff	struct ib_pool_fmr *fmr;
302219820Sjeff
303219820Sjeff	u32		bytes_sent;
304219820Sjeff	u32		bytes_acked;
305219820Sjeff
306219820Sjeff	enum tx_sa_flag	abort_flags;
307219820Sjeff	u8		posted;
308219820Sjeff
309219820Sjeff	u32		mseq;
310219820Sjeff};
311219820Sjeff
312219820Sjeffstruct sdp_tx_ring {
313219820Sjeff#ifdef SDP_ZCOPY
314219820Sjeff	struct rx_srcavail_state *rdma_inflight;
315219820Sjeff#endif
316219820Sjeff	struct sdp_buf   	*buffer;
317219820Sjeff	atomic_t          	head;
318219820Sjeff	atomic_t          	tail;
319219820Sjeff	struct ib_cq 	 	*cq;
320219820Sjeff
321219820Sjeff	atomic_t 	  	credits;
322219820Sjeff#define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits))
323219820Sjeff
324219820Sjeff	struct callout		timer;
325219820Sjeff	u16 		  	poll_cnt;
326219820Sjeff};
327219820Sjeff
328219820Sjeffstruct sdp_rx_ring {
329219820Sjeff	struct sdp_buf   *buffer;
330219820Sjeff	atomic_t          head;
331219820Sjeff	atomic_t          tail;
332219820Sjeff	struct ib_cq 	 *cq;
333219820Sjeff
334219820Sjeff	int		 destroyed;
335219820Sjeff	struct rwlock	 destroyed_lock;
336219820Sjeff};
337219820Sjeff
338219820Sjeffstruct sdp_device {
339219820Sjeff	struct ib_pd 		*pd;
340219820Sjeff	struct ib_mr 		*mr;
341219820Sjeff	struct ib_fmr_pool 	*fmr_pool;
342219820Sjeff};
343219820Sjeff
344219820Sjeffstruct sdp_moderation {
345219820Sjeff	unsigned long last_moder_packets;
346219820Sjeff	unsigned long last_moder_tx_packets;
347219820Sjeff	unsigned long last_moder_bytes;
348219820Sjeff	unsigned long last_moder_jiffies;
349219820Sjeff	int last_moder_time;
350219820Sjeff	u16 rx_usecs;
351219820Sjeff	u16 rx_frames;
352219820Sjeff	u16 tx_usecs;
353219820Sjeff	u32 pkt_rate_low;
354219820Sjeff	u16 rx_usecs_low;
355219820Sjeff	u32 pkt_rate_high;
356219820Sjeff	u16 rx_usecs_high;
357219820Sjeff	u16 sample_interval;
358219820Sjeff	u16 adaptive_rx_coal;
359219820Sjeff	u32 msg_enable;
360219820Sjeff
361219820Sjeff	int moder_cnt;
362219820Sjeff	int moder_time;
363219820Sjeff};
364219820Sjeff
365219820Sjeff/* These are flags fields. */
366219820Sjeff#define	SDP_TIMEWAIT	0x0001		/* In ssk timewait state. */
367219820Sjeff#define	SDP_DROPPED	0x0002		/* Socket has been dropped. */
368219820Sjeff#define	SDP_SOCKREF	0x0004		/* Holding a sockref for close. */
369219820Sjeff#define	SDP_NODELAY	0x0008		/* Disble nagle. */
370219820Sjeff#define	SDP_NEEDFIN	0x0010		/* Send a fin on the next tx. */
371219820Sjeff#define	SDP_DREQWAIT	0x0020		/* Waiting on DREQ. */
372219820Sjeff#define	SDP_DESTROY	0x0040		/* Being destroyed. */
373219820Sjeff#define	SDP_DISCON	0x0080		/* rdma_disconnect is owed. */
374219820Sjeff
375219820Sjeff/* These are oobflags */
376219820Sjeff#define	SDP_HADOOB	0x0001		/* Had OOB data. */
377219820Sjeff#define	SDP_HAVEOOB	0x0002		/* Have OOB data. */
378219820Sjeff
379219820Sjeffstruct sdp_sock {
380219820Sjeff	LIST_ENTRY(sdp_sock) list;
381219820Sjeff	struct socket *socket;
382219820Sjeff	struct rdma_cm_id *id;
383219820Sjeff	struct ib_device *ib_device;
384219820Sjeff	struct sdp_device *sdp_dev;
385219820Sjeff	struct ib_qp *qp;
386219820Sjeff	struct ucred *cred;
387219820Sjeff	struct callout keep2msl;	/* 2msl and keepalive timer. */
388219820Sjeff	struct callout nagle_timer;	/* timeout waiting for ack */
389219820Sjeff	struct ib_ucontext context;
390219820Sjeff	in_port_t lport;
391219820Sjeff	in_addr_t laddr;
392219820Sjeff	in_port_t fport;
393219820Sjeff	in_addr_t faddr;
394219820Sjeff	int flags;
395219820Sjeff	int oobflags;		/* protected by rx lock. */
396219820Sjeff	int state;
397219820Sjeff	int softerror;
398219820Sjeff	int recv_bytes;		/* Bytes per recv. buf including header */
399219820Sjeff	int xmit_size_goal;
400219820Sjeff	char iobc;
401219820Sjeff
402219820Sjeff	struct sdp_rx_ring rx_ring;
403219820Sjeff	struct sdp_tx_ring tx_ring;
404219820Sjeff	struct rwlock	lock;
405219820Sjeff	struct mbuf *rx_ctl_q;
406219820Sjeff	struct mbuf *rx_ctl_tail;
407219820Sjeff
408219820Sjeff	int qp_active;	/* XXX Flag. */
409219820Sjeff	int max_sge;
410219820Sjeff	struct work_struct rx_comp_work;
411219820Sjeff#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt))
412219820Sjeff	atomic_t rcv_nxt;
413219820Sjeff
414219820Sjeff	/* SDP specific */
415219820Sjeff	atomic_t mseq_ack;
416219820Sjeff#define mseq_ack(ssk) (atomic_read(&ssk->mseq_ack))
417219820Sjeff	unsigned max_bufs;	/* Initial buffers offered by other side */
418219820Sjeff	unsigned min_bufs;	/* Low water mark to wake senders */
419219820Sjeff
420219820Sjeff	unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */
421219820Sjeff
422219820Sjeff	atomic_t               remote_credits;
423219820Sjeff#define remote_credits(ssk) (atomic_read(&ssk->remote_credits))
424219820Sjeff	int 		  poll_cq;
425219820Sjeff
426219820Sjeff	/* SDP slow start */
427219820Sjeff	int recv_request_head; 	/* mark the rx_head when the resize request
428219820Sjeff				   was recieved */
429219820Sjeff	int recv_request; 	/* XXX flag if request to resize was recieved */
430219820Sjeff
431219820Sjeff	unsigned long tx_packets;
432219820Sjeff	unsigned long rx_packets;
433219820Sjeff	unsigned long tx_bytes;
434219820Sjeff	unsigned long rx_bytes;
435219820Sjeff	struct sdp_moderation auto_mod;
436219820Sjeff	struct task shutdown_task;
437219820Sjeff#ifdef SDP_ZCOPY
438219820Sjeff	struct tx_srcavail_state *tx_sa;
439219820Sjeff	struct rx_srcavail_state *rx_sa;
440219820Sjeff	spinlock_t tx_sa_lock;
441219820Sjeff	struct delayed_work srcavail_cancel_work;
442219820Sjeff	int srcavail_cancel_mseq;
443219820Sjeff	/* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */
444219820Sjeff	int zcopy_thresh;
445219820Sjeff#endif
446219820Sjeff};
447219820Sjeff
448219820Sjeff#define	sdp_sk(so)	((struct sdp_sock *)(so->so_pcb))
449219820Sjeff
450219820Sjeff#define	SDP_RLOCK(ssk)		rw_rlock(&(ssk)->lock)
451219820Sjeff#define	SDP_WLOCK(ssk)		rw_wlock(&(ssk)->lock)
452219820Sjeff#define	SDP_RUNLOCK(ssk)	rw_runlock(&(ssk)->lock)
453219820Sjeff#define	SDP_WUNLOCK(ssk)	rw_wunlock(&(ssk)->lock)
454219820Sjeff#define	SDP_WLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_WLOCKED)
455219820Sjeff#define	SDP_RLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_RLOCKED)
456219820Sjeff#define	SDP_LOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_LOCKED)
457219820Sjeff
458219820Sjeffstatic inline void tx_sa_reset(struct tx_srcavail_state *tx_sa)
459219820Sjeff{
460219820Sjeff	memset((void *)&tx_sa->busy, 0,
461219820Sjeff			sizeof(*tx_sa) - offsetof(typeof(*tx_sa), busy));
462219820Sjeff}
463219820Sjeff
464219820Sjeffstatic inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring)
465219820Sjeff{
466219820Sjeff	rw_runlock(&rx_ring->destroyed_lock);
467219820Sjeff}
468219820Sjeff
469219820Sjeffstatic inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring)
470219820Sjeff{
471219820Sjeff	rw_rlock(&rx_ring->destroyed_lock);
472219820Sjeff	if (rx_ring->destroyed) {
473219820Sjeff		rx_ring_unlock(rx_ring);
474219820Sjeff		return 0;
475219820Sjeff	}
476219820Sjeff	return 1;
477219820Sjeff}
478219820Sjeff
479219820Sjeffstatic inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring)
480219820Sjeff{
481219820Sjeff	rw_wlock(&rx_ring->destroyed_lock);
482219820Sjeff	rx_ring->destroyed = 1;
483219820Sjeff	rw_wunlock(&rx_ring->destroyed_lock);
484219820Sjeff}
485219820Sjeff
486219820Sjeffstatic inline void sdp_arm_rx_cq(struct sdp_sock *ssk)
487219820Sjeff{
488219820Sjeff	sdp_prf(ssk->socket, NULL, "Arming RX cq");
489219820Sjeff	sdp_dbg_data(ssk->socket, "Arming RX cq\n");
490219820Sjeff
491219820Sjeff	ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP);
492219820Sjeff}
493219820Sjeff
494219820Sjeffstatic inline void sdp_arm_tx_cq(struct sdp_sock *ssk)
495219820Sjeff{
496219820Sjeff	sdp_prf(ssk->socket, NULL, "Arming TX cq");
497219820Sjeff	sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n",
498219820Sjeff		tx_credits(ssk), tx_ring_posted(ssk));
499219820Sjeff
500219820Sjeff	ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP);
501219820Sjeff}
502219820Sjeff
503219820Sjeff/* return the min of:
504219820Sjeff * - tx credits
505219820Sjeff * - free slots in tx_ring (not including SDP_MIN_TX_CREDITS
506219820Sjeff */
507219820Sjeffstatic inline int tx_slots_free(struct sdp_sock *ssk)
508219820Sjeff{
509219820Sjeff	int min_free;
510219820Sjeff
511219820Sjeff	min_free = MIN(tx_credits(ssk),
512219820Sjeff			SDP_TX_SIZE - tx_ring_posted(ssk));
513219820Sjeff	if (min_free < SDP_MIN_TX_CREDITS)
514219820Sjeff		return 0;
515219820Sjeff
516219820Sjeff	return min_free - SDP_MIN_TX_CREDITS;
517219820Sjeff};
518219820Sjeff
519219820Sjeff/* utilities */
520219820Sjeffstatic inline char *mid2str(int mid)
521219820Sjeff{
522219820Sjeff#define ENUM2STR(e) [e] = #e
523219820Sjeff	static char *mid2str[] = {
524219820Sjeff		ENUM2STR(SDP_MID_HELLO),
525219820Sjeff		ENUM2STR(SDP_MID_HELLO_ACK),
526219820Sjeff		ENUM2STR(SDP_MID_ABORT),
527219820Sjeff		ENUM2STR(SDP_MID_DISCONN),
528219820Sjeff		ENUM2STR(SDP_MID_SENDSM),
529219820Sjeff		ENUM2STR(SDP_MID_RDMARDCOMPL),
530219820Sjeff		ENUM2STR(SDP_MID_SRCAVAIL_CANCEL),
531219820Sjeff		ENUM2STR(SDP_MID_CHRCVBUF),
532219820Sjeff		ENUM2STR(SDP_MID_CHRCVBUF_ACK),
533219820Sjeff		ENUM2STR(SDP_MID_DATA),
534219820Sjeff		ENUM2STR(SDP_MID_SRCAVAIL),
535219820Sjeff		ENUM2STR(SDP_MID_SINKAVAIL),
536219820Sjeff	};
537219820Sjeff
538219820Sjeff	if (mid >= ARRAY_SIZE(mid2str))
539219820Sjeff		return NULL;
540219820Sjeff
541219820Sjeff	return mid2str[mid];
542219820Sjeff}
543219820Sjeff
544219820Sjeffstatic inline struct mbuf *
545219820Sjeffsdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait)
546219820Sjeff{
547219820Sjeff	struct sdp_bsdh *h;
548219820Sjeff	struct mbuf *mb;
549219820Sjeff
550219820Sjeff	MGETHDR(mb, wait, MT_DATA);
551219820Sjeff	if (mb == NULL)
552219820Sjeff		return (NULL);
553219820Sjeff	mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh);
554219820Sjeff	h = mtod(mb, struct sdp_bsdh *);
555219820Sjeff	h->mid = mid;
556219820Sjeff
557219820Sjeff	return mb;
558219820Sjeff}
559219820Sjeffstatic inline struct mbuf *
560219820Sjeffsdp_alloc_mb_data(struct socket *sk, int wait)
561219820Sjeff{
562219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait);
563219820Sjeff}
564219820Sjeff
565219820Sjeffstatic inline struct mbuf *
566219820Sjeffsdp_alloc_mb_disconnect(struct socket *sk, int wait)
567219820Sjeff{
568219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait);
569219820Sjeff}
570219820Sjeff
571219820Sjeffstatic inline void *
572219820Sjeffmb_put(struct mbuf *mb, int len)
573219820Sjeff{
574219820Sjeff	uint8_t *data;
575219820Sjeff
576219820Sjeff	data = mb->m_data;
577219820Sjeff	data += mb->m_len;
578219820Sjeff	mb->m_len += len;
579219820Sjeff	return (void *)data;
580219820Sjeff}
581219820Sjeff
582219820Sjeffstatic inline struct mbuf *
583219820Sjeffsdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait)
584219820Sjeff{
585219820Sjeff	struct mbuf *mb;
586219820Sjeff	struct sdp_chrecvbuf *resp_size;
587219820Sjeff
588219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait);
589219820Sjeff	if (mb == NULL)
590219820Sjeff		return (NULL);
591219820Sjeff	resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size);
592219820Sjeff	resp_size->size = htonl(size);
593219820Sjeff
594219820Sjeff	return mb;
595219820Sjeff}
596219820Sjeff
597219820Sjeffstatic inline struct mbuf *
598219820Sjeffsdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait)
599219820Sjeff{
600219820Sjeff	struct mbuf *mb;
601219820Sjeff	struct sdp_srcah *srcah;
602219820Sjeff
603219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait);
604219820Sjeff	if (mb == NULL)
605219820Sjeff		return (NULL);
606219820Sjeff	srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah));
607219820Sjeff	srcah->len = htonl(len);
608219820Sjeff	srcah->rkey = htonl(rkey);
609219820Sjeff	srcah->vaddr = cpu_to_be64(vaddr);
610219820Sjeff
611219820Sjeff	return mb;
612219820Sjeff}
613219820Sjeff
614219820Sjeffstatic inline struct mbuf *
615219820Sjeffsdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait)
616219820Sjeff{
617219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait);
618219820Sjeff}
619219820Sjeff
620219820Sjeffstatic inline struct mbuf *
621219820Sjeffsdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait)
622219820Sjeff{
623219820Sjeff	struct mbuf *mb;
624219820Sjeff	struct sdp_rrch *rrch;
625219820Sjeff
626219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait);
627219820Sjeff	if (mb == NULL)
628219820Sjeff		return (NULL);
629219820Sjeff	rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch));
630219820Sjeff	rrch->len = htonl(len);
631219820Sjeff
632219820Sjeff	return mb;
633219820Sjeff}
634219820Sjeff
635219820Sjeffstatic inline struct mbuf *
636219820Sjeffsdp_alloc_mb_sendsm(struct socket *sk, int wait)
637219820Sjeff{
638219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait);
639219820Sjeff}
640219820Sjeffstatic inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk)
641219820Sjeff{
642219820Sjeff	return SDP_TX_SIZE - tx_ring_posted(ssk);
643219820Sjeff}
644219820Sjeff
645219820Sjeffstatic inline int credit_update_needed(struct sdp_sock *ssk)
646219820Sjeff{
647219820Sjeff	int c;
648219820Sjeff
649219820Sjeff	c = remote_credits(ssk);
650219820Sjeff	if (likely(c > SDP_MIN_TX_CREDITS))
651219820Sjeff		c += c/2;
652219820Sjeff	return unlikely(c < rx_ring_posted(ssk)) &&
653219820Sjeff	    likely(tx_credits(ssk) > 0) &&
654219820Sjeff	    likely(sdp_tx_ring_slots_left(ssk));
655219820Sjeff}
656219820Sjeff
657219820Sjeff
658219820Sjeff#define SDPSTATS_COUNTER_INC(stat)
659219820Sjeff#define SDPSTATS_COUNTER_ADD(stat, val)
660219820Sjeff#define SDPSTATS_COUNTER_MID_INC(stat, mid)
661219820Sjeff#define SDPSTATS_HIST_LINEAR(stat, size)
662219820Sjeff#define SDPSTATS_HIST(stat, size)
663219820Sjeff
664219820Sjeffstatic inline void
665219820Sjeffsdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf,
666219820Sjeff    enum dma_data_direction dir)
667219820Sjeff{
668219820Sjeff	struct ib_device *dev;
669219820Sjeff	struct mbuf *mb;
670219820Sjeff	int i;
671219820Sjeff
672219820Sjeff	dev = ssk->ib_device;
673219820Sjeff	for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++)
674219820Sjeff		ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir);
675219820Sjeff}
676219820Sjeff
677219820Sjeff/* sdp_main.c */
678219820Sjeffvoid sdp_set_default_moderation(struct sdp_sock *ssk);
679219820Sjeffvoid sdp_start_keepalive_timer(struct socket *sk);
680219820Sjeffvoid sdp_urg(struct sdp_sock *ssk, struct mbuf *mb);
681219820Sjeffvoid sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk);
682219820Sjeffvoid sdp_abort(struct socket *sk);
683219820Sjeffstruct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error);
684219820Sjeff
685219820Sjeff
686219820Sjeff/* sdp_cma.c */
687219820Sjeffint sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *);
688219820Sjeff
689219820Sjeff/* sdp_tx.c */
690219820Sjeffint sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device);
691219820Sjeffvoid sdp_tx_ring_destroy(struct sdp_sock *ssk);
692219820Sjeffint sdp_xmit_poll(struct sdp_sock *ssk, int force);
693219820Sjeffvoid sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb);
694219820Sjeffvoid sdp_post_sends(struct sdp_sock *ssk, int wait);
695219820Sjeffvoid sdp_post_keepalive(struct sdp_sock *ssk);
696219820Sjeff
697219820Sjeff/* sdp_rx.c */
698219820Sjeffvoid sdp_rx_ring_init(struct sdp_sock *ssk);
699219820Sjeffint sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device);
700219820Sjeffvoid sdp_rx_ring_destroy(struct sdp_sock *ssk);
701219820Sjeffint sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size);
702219820Sjeffint sdp_init_buffers(struct sdp_sock *ssk, u32 new_size);
703219820Sjeffvoid sdp_do_posts(struct sdp_sock *ssk);
704219820Sjeffvoid sdp_rx_comp_full(struct sdp_sock *ssk);
705219820Sjeff
706219820Sjeff/* sdp_zcopy.c */
707271127Shselaskystruct kiocb;
708219820Sjeffint sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov);
709219820Sjeffint sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah);
710219820Sjeffvoid sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack);
711219820Sjeffvoid sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack,
712219820Sjeff		u32 bytes_completed);
713219820Sjeffint sdp_handle_rdma_read_cqe(struct sdp_sock *ssk);
714219820Sjeffint sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb,
715219820Sjeff		unsigned long *used);
716219820Sjeffint sdp_post_rdma_rd_compl(struct sdp_sock *ssk,
717219820Sjeff		struct rx_srcavail_state *rx_sa);
718219820Sjeffint sdp_post_sendsm(struct socket *sk);
719219820Sjeffvoid srcavail_cancel_timeout(struct work_struct *work);
720219820Sjeffvoid sdp_abort_srcavail(struct socket *sk);
721219820Sjeffvoid sdp_abort_rdma_read(struct socket *sk);
722219820Sjeffint sdp_process_rx(struct sdp_sock *ssk);
723219820Sjeff
724219820Sjeff#endif
725