1219820Sjeff#ifndef _SDP_H_
2219820Sjeff#define _SDP_H_
3219820Sjeff
4219820Sjeff#include "opt_ddb.h"
5219820Sjeff#include "opt_inet.h"
6219820Sjeff#include "opt_ofed.h"
7219820Sjeff
8219820Sjeff#include <sys/param.h>
9219820Sjeff#include <sys/systm.h>
10219820Sjeff#include <sys/malloc.h>
11219820Sjeff#include <sys/kernel.h>
12219820Sjeff#include <sys/sysctl.h>
13219820Sjeff#include <sys/mbuf.h>
14219820Sjeff#include <sys/lock.h>
15219820Sjeff#include <sys/rwlock.h>
16219820Sjeff#include <sys/socket.h>
17219820Sjeff#include <sys/socketvar.h>
18219820Sjeff#include <sys/protosw.h>
19219820Sjeff#include <sys/proc.h>
20219820Sjeff#include <sys/jail.h>
21219820Sjeff#include <sys/domain.h>
22219820Sjeff
23219820Sjeff#ifdef DDB
24219820Sjeff#include <ddb/ddb.h>
25219820Sjeff#endif
26219820Sjeff
27219820Sjeff#include <net/if.h>
28219820Sjeff#include <net/route.h>
29219820Sjeff#include <net/vnet.h>
30219820Sjeff
31219820Sjeff#include <netinet/in.h>
32219820Sjeff#include <netinet/in_systm.h>
33219820Sjeff#include <netinet/in_var.h>
34219820Sjeff#include <netinet/in_pcb.h>
35219820Sjeff#include <netinet/tcp.h>
36219820Sjeff#include <netinet/tcp_fsm.h>
37219820Sjeff#include <netinet/tcp_timer.h>
38219820Sjeff#include <netinet/tcp_var.h>
39219820Sjeff
40219820Sjeff#include <linux/device.h>
41219820Sjeff#include <linux/err.h>
42219820Sjeff#include <linux/sched.h>
43219820Sjeff#include <linux/workqueue.h>
44219820Sjeff#include <linux/wait.h>
45219820Sjeff#include <linux/module.h>
46219820Sjeff#include <linux/moduleparam.h>
47219820Sjeff#include <linux/pci.h>
48219820Sjeff
49219820Sjeff#include <rdma/ib_verbs.h>
50219820Sjeff#include <rdma/rdma_cm.h>
51219820Sjeff#include <rdma/ib_cm.h>
52219820Sjeff#include <rdma/sdp_socket.h>
53219820Sjeff#include <rdma/ib_fmr_pool.h>
54219820Sjeff
55219820Sjeff#ifdef SDP_DEBUG
56219820Sjeff#define	CONFIG_INFINIBAND_SDP_DEBUG
57219820Sjeff#endif
58219820Sjeff
59219820Sjeff#include "sdp_dbg.h"
60219820Sjeff
61219820Sjeff#undef LIST_HEAD
62219820Sjeff/* From sys/queue.h */
63219820Sjeff#define LIST_HEAD(name, type)                                           \
64219820Sjeffstruct name {                                                           \
65219820Sjeff        struct type *lh_first;  /* first element */                     \
66219820Sjeff}
67219820Sjeff
68219820Sjeff/* Interval between sucessive polls in the Tx routine when polling is used
69219820Sjeff   instead of interrupts (in per-core Tx rings) - should be power of 2 */
70219820Sjeff#define SDP_TX_POLL_MODER	16
71219820Sjeff#define SDP_TX_POLL_TIMEOUT	(HZ / 20)
72219820Sjeff#define SDP_NAGLE_TIMEOUT (HZ / 10)
73219820Sjeff
74219820Sjeff#define SDP_SRCAVAIL_CANCEL_TIMEOUT (HZ * 5)
75219820Sjeff#define SDP_SRCAVAIL_ADV_TIMEOUT (1 * HZ)
76219820Sjeff#define SDP_SRCAVAIL_PAYLOAD_LEN 1
77219820Sjeff
78219820Sjeff#define SDP_RESOLVE_TIMEOUT 1000
79219820Sjeff#define SDP_ROUTE_TIMEOUT 1000
80219820Sjeff#define SDP_RETRY_COUNT 5
81219820Sjeff#define SDP_KEEPALIVE_TIME (120 * 60 * HZ)
82219820Sjeff#define SDP_FIN_WAIT_TIMEOUT (60 * HZ) /* like TCP_FIN_TIMEOUT */
83219820Sjeff
84219820Sjeff#define SDP_TX_SIZE 0x40
85219820Sjeff#define SDP_RX_SIZE 0x40
86219820Sjeff
87219820Sjeff#define SDP_FMR_SIZE (MIN(0x1000, PAGE_SIZE) / sizeof(u64))
88219820Sjeff#define SDP_FMR_POOL_SIZE	1024
89219820Sjeff#define SDP_FMR_DIRTY_SIZE	( SDP_FMR_POOL_SIZE / 4 )
90219820Sjeff
91219820Sjeff#define SDP_MAX_RDMA_READ_LEN (PAGE_SIZE * (SDP_FMR_SIZE - 2))
92219820Sjeff
93219820Sjeff/* mb inlined data len - rest will be rx'ed into frags */
94219820Sjeff#define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh))
95219820Sjeff
96219820Sjeff/* limit tx payload len, if the sink supports bigger buffers than the source
97219820Sjeff * can handle.
98219820Sjeff * or rx fragment size (limited by sge->length size) */
99219820Sjeff#define	SDP_MAX_PACKET	(1 << 16)
100219820Sjeff#define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE)
101219820Sjeff
102219820Sjeff#define SDP_MAX_RECV_SGES (SDP_MAX_PACKET / MCLBYTES)
103219820Sjeff#define SDP_MAX_SEND_SGES (SDP_MAX_PACKET / MCLBYTES) + 2
104219820Sjeff
105219820Sjeff#define SDP_NUM_WC 4
106219820Sjeff
107219820Sjeff#define SDP_DEF_ZCOPY_THRESH 64*1024
108219820Sjeff#define SDP_MIN_ZCOPY_THRESH PAGE_SIZE
109219820Sjeff#define SDP_MAX_ZCOPY_THRESH 1048576
110219820Sjeff
111219820Sjeff#define SDP_OP_RECV 0x800000000LL
112219820Sjeff#define SDP_OP_SEND 0x400000000LL
113219820Sjeff#define SDP_OP_RDMA 0x200000000LL
114219820Sjeff#define SDP_OP_NOP  0x100000000LL
115219820Sjeff
116219820Sjeff/* how long (in jiffies) to block sender till tx completion*/
117219820Sjeff#define SDP_BZCOPY_POLL_TIMEOUT (HZ / 10)
118219820Sjeff
119219820Sjeff#define SDP_AUTO_CONF	0xffff
120219820Sjeff#define AUTO_MOD_DELAY (HZ / 4)
121219820Sjeff
122219820Sjeffstruct sdp_mb_cb {
123219820Sjeff	__u32		seq;		/* Starting sequence number	*/
124219820Sjeff	struct bzcopy_state      *bz;
125219820Sjeff	struct rx_srcavail_state *rx_sa;
126219820Sjeff	struct tx_srcavail_state *tx_sa;
127219820Sjeff};
128219820Sjeff
129219820Sjeff#define	M_PUSH	M_PROTO1	/* Do a 'push'. */
130219820Sjeff#define	M_URG	M_PROTO2	/* Mark as urgent (oob). */
131219820Sjeff
132219820Sjeff#define SDP_SKB_CB(__mb)      ((struct sdp_mb_cb *)&((__mb)->cb[0]))
133219820Sjeff#define BZCOPY_STATE(mb)      (SDP_SKB_CB(mb)->bz)
134219820Sjeff#define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa)
135219820Sjeff#define TX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->tx_sa)
136219820Sjeff
137219820Sjeff#ifndef MIN
138219820Sjeff#define MIN(a, b) (a < b ? a : b)
139219820Sjeff#endif
140219820Sjeff
141219820Sjeff#define ring_head(ring)   (atomic_read(&(ring).head))
142219820Sjeff#define ring_tail(ring)   (atomic_read(&(ring).tail))
143219820Sjeff#define ring_posted(ring) (ring_head(ring) - ring_tail(ring))
144219820Sjeff
145219820Sjeff#define rx_ring_posted(ssk) ring_posted(ssk->rx_ring)
146219820Sjeff#ifdef SDP_ZCOPY
147219820Sjeff#define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \
148219820Sjeff	(ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0))
149219820Sjeff#else
150219820Sjeff#define tx_ring_posted(ssk) ring_posted(ssk->tx_ring)
151219820Sjeff#endif
152219820Sjeff
153219820Sjeffextern int sdp_zcopy_thresh;
154219820Sjeffextern int rcvbuf_initial_size;
155219820Sjeffextern struct workqueue_struct *rx_comp_wq;
156219820Sjeffextern struct ib_client sdp_client;
157219820Sjeff
158219820Sjeffenum sdp_mid {
159219820Sjeff	SDP_MID_HELLO = 0x0,
160219820Sjeff	SDP_MID_HELLO_ACK = 0x1,
161219820Sjeff	SDP_MID_DISCONN = 0x2,
162219820Sjeff	SDP_MID_ABORT = 0x3,
163219820Sjeff	SDP_MID_SENDSM = 0x4,
164219820Sjeff	SDP_MID_RDMARDCOMPL = 0x6,
165219820Sjeff	SDP_MID_SRCAVAIL_CANCEL = 0x8,
166219820Sjeff	SDP_MID_CHRCVBUF = 0xB,
167219820Sjeff	SDP_MID_CHRCVBUF_ACK = 0xC,
168219820Sjeff	SDP_MID_SINKAVAIL = 0xFD,
169219820Sjeff	SDP_MID_SRCAVAIL = 0xFE,
170219820Sjeff	SDP_MID_DATA = 0xFF,
171219820Sjeff};
172219820Sjeff
173219820Sjeffenum sdp_flags {
174219820Sjeff        SDP_OOB_PRES = 1 << 0,
175219820Sjeff        SDP_OOB_PEND = 1 << 1,
176219820Sjeff};
177219820Sjeff
178219820Sjeffenum {
179219820Sjeff	SDP_MIN_TX_CREDITS = 2
180219820Sjeff};
181219820Sjeff
182219820Sjeffenum {
183219820Sjeff	SDP_ERR_ERROR   = -4,
184219820Sjeff	SDP_ERR_FAULT   = -3,
185219820Sjeff	SDP_NEW_SEG     = -2,
186219820Sjeff	SDP_DO_WAIT_MEM = -1
187219820Sjeff};
188219820Sjeff
189219820Sjeffstruct sdp_bsdh {
190219820Sjeff	u8 mid;
191219820Sjeff	u8 flags;
192219820Sjeff	__u16 bufs;
193219820Sjeff	__u32 len;
194219820Sjeff	__u32 mseq;
195219820Sjeff	__u32 mseq_ack;
196219820Sjeff} __attribute__((__packed__));
197219820Sjeff
198219820Sjeffunion cma_ip_addr {
199219820Sjeff	struct in6_addr ip6;
200219820Sjeff	struct {
201219820Sjeff		__u32 pad[3];
202219820Sjeff		__u32 addr;
203219820Sjeff	} ip4;
204219820Sjeff} __attribute__((__packed__));
205219820Sjeff
206219820Sjeff/* TODO: too much? Can I avoid having the src/dst and port here? */
207219820Sjeffstruct sdp_hh {
208219820Sjeff	struct sdp_bsdh bsdh;
209219820Sjeff	u8 majv_minv;
210219820Sjeff	u8 ipv_cap;
211219820Sjeff	u8 rsvd1;
212219820Sjeff	u8 max_adverts;
213219820Sjeff	__u32 desremrcvsz;
214219820Sjeff	__u32 localrcvsz;
215219820Sjeff	__u16 port;
216219820Sjeff	__u16 rsvd2;
217219820Sjeff	union cma_ip_addr src_addr;
218219820Sjeff	union cma_ip_addr dst_addr;
219219820Sjeff	u8 rsvd3[IB_CM_REQ_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 48];
220219820Sjeff} __attribute__((__packed__));
221219820Sjeff
222219820Sjeffstruct sdp_hah {
223219820Sjeff	struct sdp_bsdh bsdh;
224219820Sjeff	u8 majv_minv;
225219820Sjeff	u8 ipv_cap;
226219820Sjeff	u8 rsvd1;
227219820Sjeff	u8 ext_max_adverts;
228219820Sjeff	__u32 actrcvsz;
229219820Sjeff	u8 rsvd2[IB_CM_REP_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 8];
230219820Sjeff} __attribute__((__packed__));
231219820Sjeff
232219820Sjeffstruct sdp_rrch {
233219820Sjeff	__u32 len;
234219820Sjeff} __attribute__((__packed__));
235219820Sjeff
236219820Sjeffstruct sdp_srcah {
237219820Sjeff	__u32 len;
238219820Sjeff	__u32 rkey;
239219820Sjeff	__u64 vaddr;
240219820Sjeff} __attribute__((__packed__));
241219820Sjeff
242219820Sjeffstruct sdp_buf {
243219820Sjeff        struct mbuf *mb;
244219820Sjeff        u64             mapping[SDP_MAX_SEND_SGES];
245219820Sjeff} __attribute__((__packed__));
246219820Sjeff
247219820Sjeffstruct sdp_chrecvbuf {
248219820Sjeff	u32 size;
249219820Sjeff} __attribute__((__packed__));
250219820Sjeff
251219820Sjeff/* Context used for synchronous zero copy bcopy (BZCOPY) */
252219820Sjeffstruct bzcopy_state {
253219820Sjeff	unsigned char __user  *u_base;
254219820Sjeff	int                    u_len;
255219820Sjeff	int                    left;
256219820Sjeff	int                    page_cnt;
257219820Sjeff	int                    cur_page;
258219820Sjeff	int                    cur_offset;
259219820Sjeff	int                    busy;
260219820Sjeff	struct sdp_sock      *ssk;
261219820Sjeff	struct page         **pages;
262219820Sjeff};
263219820Sjeff
264219820Sjeffenum rx_sa_flag {
265219820Sjeff	RX_SA_ABORTED    = 2,
266219820Sjeff};
267219820Sjeff
268219820Sjeffenum tx_sa_flag {
269219820Sjeff	TX_SA_SENDSM     = 0x01,
270219820Sjeff	TX_SA_CROSS_SEND = 0x02,
271219820Sjeff	TX_SA_INTRRUPTED = 0x04,
272219820Sjeff	TX_SA_TIMEDOUT   = 0x08,
273219820Sjeff	TX_SA_ERROR      = 0x10,
274219820Sjeff};
275219820Sjeff
276219820Sjeffstruct rx_srcavail_state {
277219820Sjeff	/* Advertised buffer stuff */
278219820Sjeff	u32 mseq;
279219820Sjeff	u32 used;
280219820Sjeff	u32 reported;
281219820Sjeff	u32 len;
282219820Sjeff	u32 rkey;
283219820Sjeff	u64 vaddr;
284219820Sjeff
285219820Sjeff	/* Dest buff info */
286219820Sjeff	struct ib_umem *umem;
287219820Sjeff	struct ib_pool_fmr *fmr;
288219820Sjeff
289219820Sjeff	/* Utility */
290219820Sjeff	u8  busy;
291219820Sjeff	enum rx_sa_flag  flags;
292219820Sjeff};
293219820Sjeff
294219820Sjeffstruct tx_srcavail_state {
295219820Sjeff	/* Data below 'busy' will be reset */
296219820Sjeff	u8		busy;
297219820Sjeff
298219820Sjeff	struct ib_umem *umem;
299219820Sjeff	struct ib_pool_fmr *fmr;
300219820Sjeff
301219820Sjeff	u32		bytes_sent;
302219820Sjeff	u32		bytes_acked;
303219820Sjeff
304219820Sjeff	enum tx_sa_flag	abort_flags;
305219820Sjeff	u8		posted;
306219820Sjeff
307219820Sjeff	u32		mseq;
308219820Sjeff};
309219820Sjeff
310219820Sjeffstruct sdp_tx_ring {
311219820Sjeff#ifdef SDP_ZCOPY
312219820Sjeff	struct rx_srcavail_state *rdma_inflight;
313219820Sjeff#endif
314219820Sjeff	struct sdp_buf   	*buffer;
315219820Sjeff	atomic_t          	head;
316219820Sjeff	atomic_t          	tail;
317219820Sjeff	struct ib_cq 	 	*cq;
318219820Sjeff
319219820Sjeff	atomic_t 	  	credits;
320219820Sjeff#define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits))
321219820Sjeff
322219820Sjeff	struct callout		timer;
323219820Sjeff	u16 		  	poll_cnt;
324219820Sjeff};
325219820Sjeff
326219820Sjeffstruct sdp_rx_ring {
327219820Sjeff	struct sdp_buf   *buffer;
328219820Sjeff	atomic_t          head;
329219820Sjeff	atomic_t          tail;
330219820Sjeff	struct ib_cq 	 *cq;
331219820Sjeff
332219820Sjeff	int		 destroyed;
333219820Sjeff	struct rwlock	 destroyed_lock;
334219820Sjeff};
335219820Sjeff
336219820Sjeffstruct sdp_device {
337219820Sjeff	struct ib_pd 		*pd;
338219820Sjeff	struct ib_mr 		*mr;
339219820Sjeff	struct ib_fmr_pool 	*fmr_pool;
340219820Sjeff};
341219820Sjeff
342219820Sjeffstruct sdp_moderation {
343219820Sjeff	unsigned long last_moder_packets;
344219820Sjeff	unsigned long last_moder_tx_packets;
345219820Sjeff	unsigned long last_moder_bytes;
346219820Sjeff	unsigned long last_moder_jiffies;
347219820Sjeff	int last_moder_time;
348219820Sjeff	u16 rx_usecs;
349219820Sjeff	u16 rx_frames;
350219820Sjeff	u16 tx_usecs;
351219820Sjeff	u32 pkt_rate_low;
352219820Sjeff	u16 rx_usecs_low;
353219820Sjeff	u32 pkt_rate_high;
354219820Sjeff	u16 rx_usecs_high;
355219820Sjeff	u16 sample_interval;
356219820Sjeff	u16 adaptive_rx_coal;
357219820Sjeff	u32 msg_enable;
358219820Sjeff
359219820Sjeff	int moder_cnt;
360219820Sjeff	int moder_time;
361219820Sjeff};
362219820Sjeff
363219820Sjeff/* These are flags fields. */
364219820Sjeff#define	SDP_TIMEWAIT	0x0001		/* In ssk timewait state. */
365219820Sjeff#define	SDP_DROPPED	0x0002		/* Socket has been dropped. */
366219820Sjeff#define	SDP_SOCKREF	0x0004		/* Holding a sockref for close. */
367219820Sjeff#define	SDP_NODELAY	0x0008		/* Disble nagle. */
368219820Sjeff#define	SDP_NEEDFIN	0x0010		/* Send a fin on the next tx. */
369219820Sjeff#define	SDP_DREQWAIT	0x0020		/* Waiting on DREQ. */
370219820Sjeff#define	SDP_DESTROY	0x0040		/* Being destroyed. */
371219820Sjeff#define	SDP_DISCON	0x0080		/* rdma_disconnect is owed. */
372219820Sjeff
373219820Sjeff/* These are oobflags */
374219820Sjeff#define	SDP_HADOOB	0x0001		/* Had OOB data. */
375219820Sjeff#define	SDP_HAVEOOB	0x0002		/* Have OOB data. */
376219820Sjeff
377219820Sjeffstruct sdp_sock {
378219820Sjeff	LIST_ENTRY(sdp_sock) list;
379219820Sjeff	struct socket *socket;
380219820Sjeff	struct rdma_cm_id *id;
381219820Sjeff	struct ib_device *ib_device;
382219820Sjeff	struct sdp_device *sdp_dev;
383219820Sjeff	struct ib_qp *qp;
384219820Sjeff	struct ucred *cred;
385219820Sjeff	struct callout keep2msl;	/* 2msl and keepalive timer. */
386219820Sjeff	struct callout nagle_timer;	/* timeout waiting for ack */
387219820Sjeff	struct ib_ucontext context;
388219820Sjeff	in_port_t lport;
389219820Sjeff	in_addr_t laddr;
390219820Sjeff	in_port_t fport;
391219820Sjeff	in_addr_t faddr;
392219820Sjeff	int flags;
393219820Sjeff	int oobflags;		/* protected by rx lock. */
394219820Sjeff	int state;
395219820Sjeff	int softerror;
396219820Sjeff	int recv_bytes;		/* Bytes per recv. buf including header */
397219820Sjeff	int xmit_size_goal;
398219820Sjeff	char iobc;
399219820Sjeff
400219820Sjeff	struct sdp_rx_ring rx_ring;
401219820Sjeff	struct sdp_tx_ring tx_ring;
402219820Sjeff	struct rwlock	lock;
403219820Sjeff	struct mbuf *rx_ctl_q;
404219820Sjeff	struct mbuf *rx_ctl_tail;
405219820Sjeff
406219820Sjeff	int qp_active;	/* XXX Flag. */
407219820Sjeff	int max_sge;
408219820Sjeff	struct work_struct rx_comp_work;
409219820Sjeff#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt))
410219820Sjeff	atomic_t rcv_nxt;
411219820Sjeff
412219820Sjeff	/* SDP specific */
413219820Sjeff	atomic_t mseq_ack;
414219820Sjeff#define mseq_ack(ssk) (atomic_read(&ssk->mseq_ack))
415219820Sjeff	unsigned max_bufs;	/* Initial buffers offered by other side */
416219820Sjeff	unsigned min_bufs;	/* Low water mark to wake senders */
417219820Sjeff
418219820Sjeff	unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */
419219820Sjeff
420219820Sjeff	atomic_t               remote_credits;
421219820Sjeff#define remote_credits(ssk) (atomic_read(&ssk->remote_credits))
422219820Sjeff	int 		  poll_cq;
423219820Sjeff
424219820Sjeff	/* SDP slow start */
425219820Sjeff	int recv_request_head; 	/* mark the rx_head when the resize request
426219820Sjeff				   was recieved */
427219820Sjeff	int recv_request; 	/* XXX flag if request to resize was recieved */
428219820Sjeff
429219820Sjeff	unsigned long tx_packets;
430219820Sjeff	unsigned long rx_packets;
431219820Sjeff	unsigned long tx_bytes;
432219820Sjeff	unsigned long rx_bytes;
433219820Sjeff	struct sdp_moderation auto_mod;
434219820Sjeff	struct task shutdown_task;
435219820Sjeff#ifdef SDP_ZCOPY
436219820Sjeff	struct tx_srcavail_state *tx_sa;
437219820Sjeff	struct rx_srcavail_state *rx_sa;
438219820Sjeff	spinlock_t tx_sa_lock;
439219820Sjeff	struct delayed_work srcavail_cancel_work;
440219820Sjeff	int srcavail_cancel_mseq;
441219820Sjeff	/* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */
442219820Sjeff	int zcopy_thresh;
443219820Sjeff#endif
444219820Sjeff};
445219820Sjeff
446219820Sjeff#define	sdp_sk(so)	((struct sdp_sock *)(so->so_pcb))
447219820Sjeff
448219820Sjeff#define	SDP_RLOCK(ssk)		rw_rlock(&(ssk)->lock)
449219820Sjeff#define	SDP_WLOCK(ssk)		rw_wlock(&(ssk)->lock)
450219820Sjeff#define	SDP_RUNLOCK(ssk)	rw_runlock(&(ssk)->lock)
451219820Sjeff#define	SDP_WUNLOCK(ssk)	rw_wunlock(&(ssk)->lock)
452219820Sjeff#define	SDP_WLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_WLOCKED)
453219820Sjeff#define	SDP_RLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_RLOCKED)
454219820Sjeff#define	SDP_LOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_LOCKED)
455219820Sjeff
456219820Sjeffstatic inline void tx_sa_reset(struct tx_srcavail_state *tx_sa)
457219820Sjeff{
458219820Sjeff	memset((void *)&tx_sa->busy, 0,
459219820Sjeff			sizeof(*tx_sa) - offsetof(typeof(*tx_sa), busy));
460219820Sjeff}
461219820Sjeff
462219820Sjeffstatic inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring)
463219820Sjeff{
464219820Sjeff	rw_runlock(&rx_ring->destroyed_lock);
465219820Sjeff}
466219820Sjeff
467219820Sjeffstatic inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring)
468219820Sjeff{
469219820Sjeff	rw_rlock(&rx_ring->destroyed_lock);
470219820Sjeff	if (rx_ring->destroyed) {
471219820Sjeff		rx_ring_unlock(rx_ring);
472219820Sjeff		return 0;
473219820Sjeff	}
474219820Sjeff	return 1;
475219820Sjeff}
476219820Sjeff
477219820Sjeffstatic inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring)
478219820Sjeff{
479219820Sjeff	rw_wlock(&rx_ring->destroyed_lock);
480219820Sjeff	rx_ring->destroyed = 1;
481219820Sjeff	rw_wunlock(&rx_ring->destroyed_lock);
482219820Sjeff}
483219820Sjeff
484219820Sjeffstatic inline void sdp_arm_rx_cq(struct sdp_sock *ssk)
485219820Sjeff{
486219820Sjeff	sdp_prf(ssk->socket, NULL, "Arming RX cq");
487219820Sjeff	sdp_dbg_data(ssk->socket, "Arming RX cq\n");
488219820Sjeff
489219820Sjeff	ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP);
490219820Sjeff}
491219820Sjeff
492219820Sjeffstatic inline void sdp_arm_tx_cq(struct sdp_sock *ssk)
493219820Sjeff{
494219820Sjeff	sdp_prf(ssk->socket, NULL, "Arming TX cq");
495219820Sjeff	sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n",
496219820Sjeff		tx_credits(ssk), tx_ring_posted(ssk));
497219820Sjeff
498219820Sjeff	ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP);
499219820Sjeff}
500219820Sjeff
501219820Sjeff/* return the min of:
502219820Sjeff * - tx credits
503219820Sjeff * - free slots in tx_ring (not including SDP_MIN_TX_CREDITS
504219820Sjeff */
505219820Sjeffstatic inline int tx_slots_free(struct sdp_sock *ssk)
506219820Sjeff{
507219820Sjeff	int min_free;
508219820Sjeff
509219820Sjeff	min_free = MIN(tx_credits(ssk),
510219820Sjeff			SDP_TX_SIZE - tx_ring_posted(ssk));
511219820Sjeff	if (min_free < SDP_MIN_TX_CREDITS)
512219820Sjeff		return 0;
513219820Sjeff
514219820Sjeff	return min_free - SDP_MIN_TX_CREDITS;
515219820Sjeff};
516219820Sjeff
517219820Sjeff/* utilities */
518219820Sjeffstatic inline char *mid2str(int mid)
519219820Sjeff{
520219820Sjeff#define ENUM2STR(e) [e] = #e
521219820Sjeff	static char *mid2str[] = {
522219820Sjeff		ENUM2STR(SDP_MID_HELLO),
523219820Sjeff		ENUM2STR(SDP_MID_HELLO_ACK),
524219820Sjeff		ENUM2STR(SDP_MID_ABORT),
525219820Sjeff		ENUM2STR(SDP_MID_DISCONN),
526219820Sjeff		ENUM2STR(SDP_MID_SENDSM),
527219820Sjeff		ENUM2STR(SDP_MID_RDMARDCOMPL),
528219820Sjeff		ENUM2STR(SDP_MID_SRCAVAIL_CANCEL),
529219820Sjeff		ENUM2STR(SDP_MID_CHRCVBUF),
530219820Sjeff		ENUM2STR(SDP_MID_CHRCVBUF_ACK),
531219820Sjeff		ENUM2STR(SDP_MID_DATA),
532219820Sjeff		ENUM2STR(SDP_MID_SRCAVAIL),
533219820Sjeff		ENUM2STR(SDP_MID_SINKAVAIL),
534219820Sjeff	};
535219820Sjeff
536219820Sjeff	if (mid >= ARRAY_SIZE(mid2str))
537219820Sjeff		return NULL;
538219820Sjeff
539219820Sjeff	return mid2str[mid];
540219820Sjeff}
541219820Sjeff
542219820Sjeffstatic inline struct mbuf *
543219820Sjeffsdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait)
544219820Sjeff{
545219820Sjeff	struct sdp_bsdh *h;
546219820Sjeff	struct mbuf *mb;
547219820Sjeff
548219820Sjeff	MGETHDR(mb, wait, MT_DATA);
549219820Sjeff	if (mb == NULL)
550219820Sjeff		return (NULL);
551219820Sjeff	mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh);
552219820Sjeff	h = mtod(mb, struct sdp_bsdh *);
553219820Sjeff	h->mid = mid;
554219820Sjeff
555219820Sjeff	return mb;
556219820Sjeff}
557219820Sjeffstatic inline struct mbuf *
558219820Sjeffsdp_alloc_mb_data(struct socket *sk, int wait)
559219820Sjeff{
560219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait);
561219820Sjeff}
562219820Sjeff
563219820Sjeffstatic inline struct mbuf *
564219820Sjeffsdp_alloc_mb_disconnect(struct socket *sk, int wait)
565219820Sjeff{
566219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait);
567219820Sjeff}
568219820Sjeff
569219820Sjeffstatic inline void *
570219820Sjeffmb_put(struct mbuf *mb, int len)
571219820Sjeff{
572219820Sjeff	uint8_t *data;
573219820Sjeff
574219820Sjeff	data = mb->m_data;
575219820Sjeff	data += mb->m_len;
576219820Sjeff	mb->m_len += len;
577219820Sjeff	return (void *)data;
578219820Sjeff}
579219820Sjeff
580219820Sjeffstatic inline struct mbuf *
581219820Sjeffsdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait)
582219820Sjeff{
583219820Sjeff	struct mbuf *mb;
584219820Sjeff	struct sdp_chrecvbuf *resp_size;
585219820Sjeff
586219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait);
587219820Sjeff	if (mb == NULL)
588219820Sjeff		return (NULL);
589219820Sjeff	resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size);
590219820Sjeff	resp_size->size = htonl(size);
591219820Sjeff
592219820Sjeff	return mb;
593219820Sjeff}
594219820Sjeff
595219820Sjeffstatic inline struct mbuf *
596219820Sjeffsdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait)
597219820Sjeff{
598219820Sjeff	struct mbuf *mb;
599219820Sjeff	struct sdp_srcah *srcah;
600219820Sjeff
601219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait);
602219820Sjeff	if (mb == NULL)
603219820Sjeff		return (NULL);
604219820Sjeff	srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah));
605219820Sjeff	srcah->len = htonl(len);
606219820Sjeff	srcah->rkey = htonl(rkey);
607219820Sjeff	srcah->vaddr = cpu_to_be64(vaddr);
608219820Sjeff
609219820Sjeff	return mb;
610219820Sjeff}
611219820Sjeff
612219820Sjeffstatic inline struct mbuf *
613219820Sjeffsdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait)
614219820Sjeff{
615219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait);
616219820Sjeff}
617219820Sjeff
618219820Sjeffstatic inline struct mbuf *
619219820Sjeffsdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait)
620219820Sjeff{
621219820Sjeff	struct mbuf *mb;
622219820Sjeff	struct sdp_rrch *rrch;
623219820Sjeff
624219820Sjeff	mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait);
625219820Sjeff	if (mb == NULL)
626219820Sjeff		return (NULL);
627219820Sjeff	rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch));
628219820Sjeff	rrch->len = htonl(len);
629219820Sjeff
630219820Sjeff	return mb;
631219820Sjeff}
632219820Sjeff
633219820Sjeffstatic inline struct mbuf *
634219820Sjeffsdp_alloc_mb_sendsm(struct socket *sk, int wait)
635219820Sjeff{
636219820Sjeff	return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait);
637219820Sjeff}
638219820Sjeffstatic inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk)
639219820Sjeff{
640219820Sjeff	return SDP_TX_SIZE - tx_ring_posted(ssk);
641219820Sjeff}
642219820Sjeff
643219820Sjeffstatic inline int credit_update_needed(struct sdp_sock *ssk)
644219820Sjeff{
645219820Sjeff	int c;
646219820Sjeff
647219820Sjeff	c = remote_credits(ssk);
648219820Sjeff	if (likely(c > SDP_MIN_TX_CREDITS))
649219820Sjeff		c += c/2;
650219820Sjeff	return unlikely(c < rx_ring_posted(ssk)) &&
651219820Sjeff	    likely(tx_credits(ssk) > 0) &&
652219820Sjeff	    likely(sdp_tx_ring_slots_left(ssk));
653219820Sjeff}
654219820Sjeff
655219820Sjeff
656219820Sjeff#define SDPSTATS_COUNTER_INC(stat)
657219820Sjeff#define SDPSTATS_COUNTER_ADD(stat, val)
658219820Sjeff#define SDPSTATS_COUNTER_MID_INC(stat, mid)
659219820Sjeff#define SDPSTATS_HIST_LINEAR(stat, size)
660219820Sjeff#define SDPSTATS_HIST(stat, size)
661219820Sjeff
662219820Sjeffstatic inline void
663219820Sjeffsdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf,
664219820Sjeff    enum dma_data_direction dir)
665219820Sjeff{
666219820Sjeff	struct ib_device *dev;
667219820Sjeff	struct mbuf *mb;
668219820Sjeff	int i;
669219820Sjeff
670219820Sjeff	dev = ssk->ib_device;
671219820Sjeff	for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++)
672219820Sjeff		ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir);
673219820Sjeff}
674219820Sjeff
675219820Sjeff/* sdp_main.c */
676219820Sjeffvoid sdp_set_default_moderation(struct sdp_sock *ssk);
677219820Sjeffvoid sdp_start_keepalive_timer(struct socket *sk);
678219820Sjeffvoid sdp_urg(struct sdp_sock *ssk, struct mbuf *mb);
679219820Sjeffvoid sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk);
680219820Sjeffvoid sdp_abort(struct socket *sk);
681219820Sjeffstruct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error);
682219820Sjeff
683219820Sjeff
684219820Sjeff/* sdp_cma.c */
685219820Sjeffint sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *);
686219820Sjeff
687219820Sjeff/* sdp_tx.c */
688219820Sjeffint sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device);
689219820Sjeffvoid sdp_tx_ring_destroy(struct sdp_sock *ssk);
690219820Sjeffint sdp_xmit_poll(struct sdp_sock *ssk, int force);
691219820Sjeffvoid sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb);
692219820Sjeffvoid sdp_post_sends(struct sdp_sock *ssk, int wait);
693219820Sjeffvoid sdp_post_keepalive(struct sdp_sock *ssk);
694219820Sjeff
695219820Sjeff/* sdp_rx.c */
696219820Sjeffvoid sdp_rx_ring_init(struct sdp_sock *ssk);
697219820Sjeffint sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device);
698219820Sjeffvoid sdp_rx_ring_destroy(struct sdp_sock *ssk);
699219820Sjeffint sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size);
700219820Sjeffint sdp_init_buffers(struct sdp_sock *ssk, u32 new_size);
701219820Sjeffvoid sdp_do_posts(struct sdp_sock *ssk);
702219820Sjeffvoid sdp_rx_comp_full(struct sdp_sock *ssk);
703219820Sjeff
704219820Sjeff/* sdp_zcopy.c */
705271127Shselaskystruct kiocb;
706219820Sjeffint sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov);
707219820Sjeffint sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah);
708219820Sjeffvoid sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack);
709219820Sjeffvoid sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack,
710219820Sjeff		u32 bytes_completed);
711219820Sjeffint sdp_handle_rdma_read_cqe(struct sdp_sock *ssk);
712219820Sjeffint sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb,
713219820Sjeff		unsigned long *used);
714219820Sjeffint sdp_post_rdma_rd_compl(struct sdp_sock *ssk,
715219820Sjeff		struct rx_srcavail_state *rx_sa);
716219820Sjeffint sdp_post_sendsm(struct socket *sk);
717219820Sjeffvoid srcavail_cancel_timeout(struct work_struct *work);
718219820Sjeffvoid sdp_abort_srcavail(struct socket *sk);
719219820Sjeffvoid sdp_abort_rdma_read(struct socket *sk);
720219820Sjeffint sdp_process_rx(struct sdp_sock *ssk);
721219820Sjeff
722219820Sjeff#endif
723