1219820Sjeff#ifndef _SDP_H_ 2219820Sjeff#define _SDP_H_ 3219820Sjeff 4324685Shselasky#define LINUXKPI_PARAM_PREFIX ib_sdp_ 5324685Shselasky 6219820Sjeff#include "opt_ddb.h" 7219820Sjeff#include "opt_inet.h" 8219820Sjeff#include "opt_ofed.h" 9219820Sjeff 10219820Sjeff#include <sys/param.h> 11219820Sjeff#include <sys/systm.h> 12219820Sjeff#include <sys/malloc.h> 13219820Sjeff#include <sys/kernel.h> 14219820Sjeff#include <sys/sysctl.h> 15219820Sjeff#include <sys/mbuf.h> 16219820Sjeff#include <sys/lock.h> 17219820Sjeff#include <sys/rwlock.h> 18219820Sjeff#include <sys/socket.h> 19219820Sjeff#include <sys/socketvar.h> 20219820Sjeff#include <sys/protosw.h> 21219820Sjeff#include <sys/proc.h> 22219820Sjeff#include <sys/jail.h> 23219820Sjeff#include <sys/domain.h> 24219820Sjeff 25219820Sjeff#ifdef DDB 26219820Sjeff#include <ddb/ddb.h> 27219820Sjeff#endif 28219820Sjeff 29219820Sjeff#include <net/if.h> 30219820Sjeff#include <net/route.h> 31219820Sjeff#include <net/vnet.h> 32219820Sjeff 33219820Sjeff#include <netinet/in.h> 34219820Sjeff#include <netinet/in_systm.h> 35219820Sjeff#include <netinet/in_var.h> 36219820Sjeff#include <netinet/in_pcb.h> 37219820Sjeff#include <netinet/tcp.h> 38219820Sjeff#include <netinet/tcp_fsm.h> 39219820Sjeff#include <netinet/tcp_timer.h> 40219820Sjeff#include <netinet/tcp_var.h> 41219820Sjeff 42219820Sjeff#include <linux/device.h> 43219820Sjeff#include <linux/err.h> 44219820Sjeff#include <linux/sched.h> 45219820Sjeff#include <linux/workqueue.h> 46219820Sjeff#include <linux/wait.h> 47219820Sjeff#include <linux/module.h> 48219820Sjeff#include <linux/moduleparam.h> 49219820Sjeff#include <linux/pci.h> 50219820Sjeff 51219820Sjeff#include <rdma/ib_verbs.h> 52219820Sjeff#include <rdma/rdma_cm.h> 53219820Sjeff#include <rdma/ib_cm.h> 54219820Sjeff#include <rdma/sdp_socket.h> 55219820Sjeff#include <rdma/ib_fmr_pool.h> 56219820Sjeff 57219820Sjeff#ifdef SDP_DEBUG 58219820Sjeff#define CONFIG_INFINIBAND_SDP_DEBUG 59219820Sjeff#endif 60219820Sjeff 61219820Sjeff#include "sdp_dbg.h" 62219820Sjeff 63219820Sjeff#undef LIST_HEAD 64219820Sjeff/* From sys/queue.h */ 65219820Sjeff#define LIST_HEAD(name, type) \ 66219820Sjeffstruct name { \ 67219820Sjeff struct type *lh_first; /* first element */ \ 68219820Sjeff} 69219820Sjeff 70219820Sjeff/* Interval between sucessive polls in the Tx routine when polling is used 71219820Sjeff instead of interrupts (in per-core Tx rings) - should be power of 2 */ 72219820Sjeff#define SDP_TX_POLL_MODER 16 73219820Sjeff#define SDP_TX_POLL_TIMEOUT (HZ / 20) 74219820Sjeff#define SDP_NAGLE_TIMEOUT (HZ / 10) 75219820Sjeff 76219820Sjeff#define SDP_SRCAVAIL_CANCEL_TIMEOUT (HZ * 5) 77219820Sjeff#define SDP_SRCAVAIL_ADV_TIMEOUT (1 * HZ) 78219820Sjeff#define SDP_SRCAVAIL_PAYLOAD_LEN 1 79219820Sjeff 80219820Sjeff#define SDP_RESOLVE_TIMEOUT 1000 81219820Sjeff#define SDP_ROUTE_TIMEOUT 1000 82219820Sjeff#define SDP_RETRY_COUNT 5 83219820Sjeff#define SDP_KEEPALIVE_TIME (120 * 60 * HZ) 84219820Sjeff#define SDP_FIN_WAIT_TIMEOUT (60 * HZ) /* like TCP_FIN_TIMEOUT */ 85219820Sjeff 86219820Sjeff#define SDP_TX_SIZE 0x40 87219820Sjeff#define SDP_RX_SIZE 0x40 88219820Sjeff 89219820Sjeff#define SDP_FMR_SIZE (MIN(0x1000, PAGE_SIZE) / sizeof(u64)) 90219820Sjeff#define SDP_FMR_POOL_SIZE 1024 91219820Sjeff#define SDP_FMR_DIRTY_SIZE ( SDP_FMR_POOL_SIZE / 4 ) 92219820Sjeff 93219820Sjeff#define SDP_MAX_RDMA_READ_LEN (PAGE_SIZE * (SDP_FMR_SIZE - 2)) 94219820Sjeff 95219820Sjeff/* mb inlined data len - rest will be rx'ed into frags */ 96219820Sjeff#define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh)) 97219820Sjeff 98219820Sjeff/* limit tx payload len, if the sink supports bigger buffers than the source 99219820Sjeff * can handle. 100219820Sjeff * or rx fragment size (limited by sge->length size) */ 101219820Sjeff#define SDP_MAX_PACKET (1 << 16) 102219820Sjeff#define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE) 103219820Sjeff 104219820Sjeff#define SDP_MAX_RECV_SGES (SDP_MAX_PACKET / MCLBYTES) 105219820Sjeff#define SDP_MAX_SEND_SGES (SDP_MAX_PACKET / MCLBYTES) + 2 106219820Sjeff 107219820Sjeff#define SDP_NUM_WC 4 108219820Sjeff 109219820Sjeff#define SDP_DEF_ZCOPY_THRESH 64*1024 110219820Sjeff#define SDP_MIN_ZCOPY_THRESH PAGE_SIZE 111219820Sjeff#define SDP_MAX_ZCOPY_THRESH 1048576 112219820Sjeff 113219820Sjeff#define SDP_OP_RECV 0x800000000LL 114219820Sjeff#define SDP_OP_SEND 0x400000000LL 115219820Sjeff#define SDP_OP_RDMA 0x200000000LL 116219820Sjeff#define SDP_OP_NOP 0x100000000LL 117219820Sjeff 118219820Sjeff/* how long (in jiffies) to block sender till tx completion*/ 119219820Sjeff#define SDP_BZCOPY_POLL_TIMEOUT (HZ / 10) 120219820Sjeff 121219820Sjeff#define SDP_AUTO_CONF 0xffff 122219820Sjeff#define AUTO_MOD_DELAY (HZ / 4) 123219820Sjeff 124219820Sjeffstruct sdp_mb_cb { 125219820Sjeff __u32 seq; /* Starting sequence number */ 126219820Sjeff struct bzcopy_state *bz; 127219820Sjeff struct rx_srcavail_state *rx_sa; 128219820Sjeff struct tx_srcavail_state *tx_sa; 129219820Sjeff}; 130219820Sjeff 131219820Sjeff#define M_PUSH M_PROTO1 /* Do a 'push'. */ 132219820Sjeff#define M_URG M_PROTO2 /* Mark as urgent (oob). */ 133219820Sjeff 134219820Sjeff#define SDP_SKB_CB(__mb) ((struct sdp_mb_cb *)&((__mb)->cb[0])) 135219820Sjeff#define BZCOPY_STATE(mb) (SDP_SKB_CB(mb)->bz) 136219820Sjeff#define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa) 137219820Sjeff#define TX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->tx_sa) 138219820Sjeff 139219820Sjeff#ifndef MIN 140219820Sjeff#define MIN(a, b) (a < b ? a : b) 141219820Sjeff#endif 142219820Sjeff 143219820Sjeff#define ring_head(ring) (atomic_read(&(ring).head)) 144219820Sjeff#define ring_tail(ring) (atomic_read(&(ring).tail)) 145219820Sjeff#define ring_posted(ring) (ring_head(ring) - ring_tail(ring)) 146219820Sjeff 147219820Sjeff#define rx_ring_posted(ssk) ring_posted(ssk->rx_ring) 148219820Sjeff#ifdef SDP_ZCOPY 149219820Sjeff#define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \ 150219820Sjeff (ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0)) 151219820Sjeff#else 152219820Sjeff#define tx_ring_posted(ssk) ring_posted(ssk->tx_ring) 153219820Sjeff#endif 154219820Sjeff 155219820Sjeffextern int sdp_zcopy_thresh; 156219820Sjeffextern int rcvbuf_initial_size; 157219820Sjeffextern struct workqueue_struct *rx_comp_wq; 158219820Sjeffextern struct ib_client sdp_client; 159219820Sjeff 160219820Sjeffenum sdp_mid { 161219820Sjeff SDP_MID_HELLO = 0x0, 162219820Sjeff SDP_MID_HELLO_ACK = 0x1, 163219820Sjeff SDP_MID_DISCONN = 0x2, 164219820Sjeff SDP_MID_ABORT = 0x3, 165219820Sjeff SDP_MID_SENDSM = 0x4, 166219820Sjeff SDP_MID_RDMARDCOMPL = 0x6, 167219820Sjeff SDP_MID_SRCAVAIL_CANCEL = 0x8, 168219820Sjeff SDP_MID_CHRCVBUF = 0xB, 169219820Sjeff SDP_MID_CHRCVBUF_ACK = 0xC, 170219820Sjeff SDP_MID_SINKAVAIL = 0xFD, 171219820Sjeff SDP_MID_SRCAVAIL = 0xFE, 172219820Sjeff SDP_MID_DATA = 0xFF, 173219820Sjeff}; 174219820Sjeff 175219820Sjeffenum sdp_flags { 176219820Sjeff SDP_OOB_PRES = 1 << 0, 177219820Sjeff SDP_OOB_PEND = 1 << 1, 178219820Sjeff}; 179219820Sjeff 180219820Sjeffenum { 181219820Sjeff SDP_MIN_TX_CREDITS = 2 182219820Sjeff}; 183219820Sjeff 184219820Sjeffenum { 185219820Sjeff SDP_ERR_ERROR = -4, 186219820Sjeff SDP_ERR_FAULT = -3, 187219820Sjeff SDP_NEW_SEG = -2, 188219820Sjeff SDP_DO_WAIT_MEM = -1 189219820Sjeff}; 190219820Sjeff 191219820Sjeffstruct sdp_bsdh { 192219820Sjeff u8 mid; 193219820Sjeff u8 flags; 194219820Sjeff __u16 bufs; 195219820Sjeff __u32 len; 196219820Sjeff __u32 mseq; 197219820Sjeff __u32 mseq_ack; 198219820Sjeff} __attribute__((__packed__)); 199219820Sjeff 200219820Sjeffunion cma_ip_addr { 201219820Sjeff struct in6_addr ip6; 202219820Sjeff struct { 203219820Sjeff __u32 pad[3]; 204219820Sjeff __u32 addr; 205219820Sjeff } ip4; 206219820Sjeff} __attribute__((__packed__)); 207219820Sjeff 208219820Sjeff/* TODO: too much? Can I avoid having the src/dst and port here? */ 209219820Sjeffstruct sdp_hh { 210219820Sjeff struct sdp_bsdh bsdh; 211219820Sjeff u8 majv_minv; 212219820Sjeff u8 ipv_cap; 213219820Sjeff u8 rsvd1; 214219820Sjeff u8 max_adverts; 215219820Sjeff __u32 desremrcvsz; 216219820Sjeff __u32 localrcvsz; 217219820Sjeff __u16 port; 218219820Sjeff __u16 rsvd2; 219219820Sjeff union cma_ip_addr src_addr; 220219820Sjeff union cma_ip_addr dst_addr; 221219820Sjeff u8 rsvd3[IB_CM_REQ_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 48]; 222219820Sjeff} __attribute__((__packed__)); 223219820Sjeff 224219820Sjeffstruct sdp_hah { 225219820Sjeff struct sdp_bsdh bsdh; 226219820Sjeff u8 majv_minv; 227219820Sjeff u8 ipv_cap; 228219820Sjeff u8 rsvd1; 229219820Sjeff u8 ext_max_adverts; 230219820Sjeff __u32 actrcvsz; 231219820Sjeff u8 rsvd2[IB_CM_REP_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 8]; 232219820Sjeff} __attribute__((__packed__)); 233219820Sjeff 234219820Sjeffstruct sdp_rrch { 235219820Sjeff __u32 len; 236219820Sjeff} __attribute__((__packed__)); 237219820Sjeff 238219820Sjeffstruct sdp_srcah { 239219820Sjeff __u32 len; 240219820Sjeff __u32 rkey; 241219820Sjeff __u64 vaddr; 242219820Sjeff} __attribute__((__packed__)); 243219820Sjeff 244219820Sjeffstruct sdp_buf { 245219820Sjeff struct mbuf *mb; 246219820Sjeff u64 mapping[SDP_MAX_SEND_SGES]; 247219820Sjeff} __attribute__((__packed__)); 248219820Sjeff 249219820Sjeffstruct sdp_chrecvbuf { 250219820Sjeff u32 size; 251219820Sjeff} __attribute__((__packed__)); 252219820Sjeff 253219820Sjeff/* Context used for synchronous zero copy bcopy (BZCOPY) */ 254219820Sjeffstruct bzcopy_state { 255219820Sjeff unsigned char __user *u_base; 256219820Sjeff int u_len; 257219820Sjeff int left; 258219820Sjeff int page_cnt; 259219820Sjeff int cur_page; 260219820Sjeff int cur_offset; 261219820Sjeff int busy; 262219820Sjeff struct sdp_sock *ssk; 263219820Sjeff struct page **pages; 264219820Sjeff}; 265219820Sjeff 266219820Sjeffenum rx_sa_flag { 267219820Sjeff RX_SA_ABORTED = 2, 268219820Sjeff}; 269219820Sjeff 270219820Sjeffenum tx_sa_flag { 271219820Sjeff TX_SA_SENDSM = 0x01, 272219820Sjeff TX_SA_CROSS_SEND = 0x02, 273219820Sjeff TX_SA_INTRRUPTED = 0x04, 274219820Sjeff TX_SA_TIMEDOUT = 0x08, 275219820Sjeff TX_SA_ERROR = 0x10, 276219820Sjeff}; 277219820Sjeff 278219820Sjeffstruct rx_srcavail_state { 279219820Sjeff /* Advertised buffer stuff */ 280219820Sjeff u32 mseq; 281219820Sjeff u32 used; 282219820Sjeff u32 reported; 283219820Sjeff u32 len; 284219820Sjeff u32 rkey; 285219820Sjeff u64 vaddr; 286219820Sjeff 287219820Sjeff /* Dest buff info */ 288219820Sjeff struct ib_umem *umem; 289219820Sjeff struct ib_pool_fmr *fmr; 290219820Sjeff 291219820Sjeff /* Utility */ 292219820Sjeff u8 busy; 293219820Sjeff enum rx_sa_flag flags; 294219820Sjeff}; 295219820Sjeff 296219820Sjeffstruct tx_srcavail_state { 297219820Sjeff /* Data below 'busy' will be reset */ 298219820Sjeff u8 busy; 299219820Sjeff 300219820Sjeff struct ib_umem *umem; 301219820Sjeff struct ib_pool_fmr *fmr; 302219820Sjeff 303219820Sjeff u32 bytes_sent; 304219820Sjeff u32 bytes_acked; 305219820Sjeff 306219820Sjeff enum tx_sa_flag abort_flags; 307219820Sjeff u8 posted; 308219820Sjeff 309219820Sjeff u32 mseq; 310219820Sjeff}; 311219820Sjeff 312219820Sjeffstruct sdp_tx_ring { 313219820Sjeff#ifdef SDP_ZCOPY 314219820Sjeff struct rx_srcavail_state *rdma_inflight; 315219820Sjeff#endif 316219820Sjeff struct sdp_buf *buffer; 317219820Sjeff atomic_t head; 318219820Sjeff atomic_t tail; 319219820Sjeff struct ib_cq *cq; 320219820Sjeff 321219820Sjeff atomic_t credits; 322219820Sjeff#define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits)) 323219820Sjeff 324219820Sjeff struct callout timer; 325219820Sjeff u16 poll_cnt; 326219820Sjeff}; 327219820Sjeff 328219820Sjeffstruct sdp_rx_ring { 329219820Sjeff struct sdp_buf *buffer; 330219820Sjeff atomic_t head; 331219820Sjeff atomic_t tail; 332219820Sjeff struct ib_cq *cq; 333219820Sjeff 334219820Sjeff int destroyed; 335219820Sjeff struct rwlock destroyed_lock; 336219820Sjeff}; 337219820Sjeff 338219820Sjeffstruct sdp_device { 339219820Sjeff struct ib_pd *pd; 340219820Sjeff struct ib_mr *mr; 341219820Sjeff struct ib_fmr_pool *fmr_pool; 342219820Sjeff}; 343219820Sjeff 344219820Sjeffstruct sdp_moderation { 345219820Sjeff unsigned long last_moder_packets; 346219820Sjeff unsigned long last_moder_tx_packets; 347219820Sjeff unsigned long last_moder_bytes; 348219820Sjeff unsigned long last_moder_jiffies; 349219820Sjeff int last_moder_time; 350219820Sjeff u16 rx_usecs; 351219820Sjeff u16 rx_frames; 352219820Sjeff u16 tx_usecs; 353219820Sjeff u32 pkt_rate_low; 354219820Sjeff u16 rx_usecs_low; 355219820Sjeff u32 pkt_rate_high; 356219820Sjeff u16 rx_usecs_high; 357219820Sjeff u16 sample_interval; 358219820Sjeff u16 adaptive_rx_coal; 359219820Sjeff u32 msg_enable; 360219820Sjeff 361219820Sjeff int moder_cnt; 362219820Sjeff int moder_time; 363219820Sjeff}; 364219820Sjeff 365219820Sjeff/* These are flags fields. */ 366219820Sjeff#define SDP_TIMEWAIT 0x0001 /* In ssk timewait state. */ 367219820Sjeff#define SDP_DROPPED 0x0002 /* Socket has been dropped. */ 368219820Sjeff#define SDP_SOCKREF 0x0004 /* Holding a sockref for close. */ 369219820Sjeff#define SDP_NODELAY 0x0008 /* Disble nagle. */ 370219820Sjeff#define SDP_NEEDFIN 0x0010 /* Send a fin on the next tx. */ 371219820Sjeff#define SDP_DREQWAIT 0x0020 /* Waiting on DREQ. */ 372219820Sjeff#define SDP_DESTROY 0x0040 /* Being destroyed. */ 373219820Sjeff#define SDP_DISCON 0x0080 /* rdma_disconnect is owed. */ 374219820Sjeff 375219820Sjeff/* These are oobflags */ 376219820Sjeff#define SDP_HADOOB 0x0001 /* Had OOB data. */ 377219820Sjeff#define SDP_HAVEOOB 0x0002 /* Have OOB data. */ 378219820Sjeff 379219820Sjeffstruct sdp_sock { 380219820Sjeff LIST_ENTRY(sdp_sock) list; 381219820Sjeff struct socket *socket; 382219820Sjeff struct rdma_cm_id *id; 383219820Sjeff struct ib_device *ib_device; 384219820Sjeff struct sdp_device *sdp_dev; 385219820Sjeff struct ib_qp *qp; 386219820Sjeff struct ucred *cred; 387219820Sjeff struct callout keep2msl; /* 2msl and keepalive timer. */ 388219820Sjeff struct callout nagle_timer; /* timeout waiting for ack */ 389219820Sjeff struct ib_ucontext context; 390219820Sjeff in_port_t lport; 391219820Sjeff in_addr_t laddr; 392219820Sjeff in_port_t fport; 393219820Sjeff in_addr_t faddr; 394219820Sjeff int flags; 395219820Sjeff int oobflags; /* protected by rx lock. */ 396219820Sjeff int state; 397219820Sjeff int softerror; 398219820Sjeff int recv_bytes; /* Bytes per recv. buf including header */ 399219820Sjeff int xmit_size_goal; 400219820Sjeff char iobc; 401219820Sjeff 402219820Sjeff struct sdp_rx_ring rx_ring; 403219820Sjeff struct sdp_tx_ring tx_ring; 404219820Sjeff struct rwlock lock; 405219820Sjeff struct mbuf *rx_ctl_q; 406219820Sjeff struct mbuf *rx_ctl_tail; 407219820Sjeff 408219820Sjeff int qp_active; /* XXX Flag. */ 409219820Sjeff int max_sge; 410219820Sjeff struct work_struct rx_comp_work; 411219820Sjeff#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt)) 412219820Sjeff atomic_t rcv_nxt; 413219820Sjeff 414219820Sjeff /* SDP specific */ 415219820Sjeff atomic_t mseq_ack; 416219820Sjeff#define mseq_ack(ssk) (atomic_read(&ssk->mseq_ack)) 417219820Sjeff unsigned max_bufs; /* Initial buffers offered by other side */ 418219820Sjeff unsigned min_bufs; /* Low water mark to wake senders */ 419219820Sjeff 420219820Sjeff unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */ 421219820Sjeff 422219820Sjeff atomic_t remote_credits; 423219820Sjeff#define remote_credits(ssk) (atomic_read(&ssk->remote_credits)) 424219820Sjeff int poll_cq; 425219820Sjeff 426219820Sjeff /* SDP slow start */ 427219820Sjeff int recv_request_head; /* mark the rx_head when the resize request 428219820Sjeff was recieved */ 429219820Sjeff int recv_request; /* XXX flag if request to resize was recieved */ 430219820Sjeff 431219820Sjeff unsigned long tx_packets; 432219820Sjeff unsigned long rx_packets; 433219820Sjeff unsigned long tx_bytes; 434219820Sjeff unsigned long rx_bytes; 435219820Sjeff struct sdp_moderation auto_mod; 436219820Sjeff struct task shutdown_task; 437219820Sjeff#ifdef SDP_ZCOPY 438219820Sjeff struct tx_srcavail_state *tx_sa; 439219820Sjeff struct rx_srcavail_state *rx_sa; 440219820Sjeff spinlock_t tx_sa_lock; 441219820Sjeff struct delayed_work srcavail_cancel_work; 442219820Sjeff int srcavail_cancel_mseq; 443219820Sjeff /* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */ 444219820Sjeff int zcopy_thresh; 445219820Sjeff#endif 446219820Sjeff}; 447219820Sjeff 448219820Sjeff#define sdp_sk(so) ((struct sdp_sock *)(so->so_pcb)) 449219820Sjeff 450219820Sjeff#define SDP_RLOCK(ssk) rw_rlock(&(ssk)->lock) 451219820Sjeff#define SDP_WLOCK(ssk) rw_wlock(&(ssk)->lock) 452219820Sjeff#define SDP_RUNLOCK(ssk) rw_runlock(&(ssk)->lock) 453219820Sjeff#define SDP_WUNLOCK(ssk) rw_wunlock(&(ssk)->lock) 454219820Sjeff#define SDP_WLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_WLOCKED) 455219820Sjeff#define SDP_RLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_RLOCKED) 456219820Sjeff#define SDP_LOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_LOCKED) 457219820Sjeff 458219820Sjeffstatic inline void tx_sa_reset(struct tx_srcavail_state *tx_sa) 459219820Sjeff{ 460219820Sjeff memset((void *)&tx_sa->busy, 0, 461219820Sjeff sizeof(*tx_sa) - offsetof(typeof(*tx_sa), busy)); 462219820Sjeff} 463219820Sjeff 464219820Sjeffstatic inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring) 465219820Sjeff{ 466219820Sjeff rw_runlock(&rx_ring->destroyed_lock); 467219820Sjeff} 468219820Sjeff 469219820Sjeffstatic inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring) 470219820Sjeff{ 471219820Sjeff rw_rlock(&rx_ring->destroyed_lock); 472219820Sjeff if (rx_ring->destroyed) { 473219820Sjeff rx_ring_unlock(rx_ring); 474219820Sjeff return 0; 475219820Sjeff } 476219820Sjeff return 1; 477219820Sjeff} 478219820Sjeff 479219820Sjeffstatic inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring) 480219820Sjeff{ 481219820Sjeff rw_wlock(&rx_ring->destroyed_lock); 482219820Sjeff rx_ring->destroyed = 1; 483219820Sjeff rw_wunlock(&rx_ring->destroyed_lock); 484219820Sjeff} 485219820Sjeff 486219820Sjeffstatic inline void sdp_arm_rx_cq(struct sdp_sock *ssk) 487219820Sjeff{ 488219820Sjeff sdp_prf(ssk->socket, NULL, "Arming RX cq"); 489219820Sjeff sdp_dbg_data(ssk->socket, "Arming RX cq\n"); 490219820Sjeff 491219820Sjeff ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP); 492219820Sjeff} 493219820Sjeff 494219820Sjeffstatic inline void sdp_arm_tx_cq(struct sdp_sock *ssk) 495219820Sjeff{ 496219820Sjeff sdp_prf(ssk->socket, NULL, "Arming TX cq"); 497219820Sjeff sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n", 498219820Sjeff tx_credits(ssk), tx_ring_posted(ssk)); 499219820Sjeff 500219820Sjeff ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP); 501219820Sjeff} 502219820Sjeff 503219820Sjeff/* return the min of: 504219820Sjeff * - tx credits 505219820Sjeff * - free slots in tx_ring (not including SDP_MIN_TX_CREDITS 506219820Sjeff */ 507219820Sjeffstatic inline int tx_slots_free(struct sdp_sock *ssk) 508219820Sjeff{ 509219820Sjeff int min_free; 510219820Sjeff 511219820Sjeff min_free = MIN(tx_credits(ssk), 512219820Sjeff SDP_TX_SIZE - tx_ring_posted(ssk)); 513219820Sjeff if (min_free < SDP_MIN_TX_CREDITS) 514219820Sjeff return 0; 515219820Sjeff 516219820Sjeff return min_free - SDP_MIN_TX_CREDITS; 517219820Sjeff}; 518219820Sjeff 519219820Sjeff/* utilities */ 520219820Sjeffstatic inline char *mid2str(int mid) 521219820Sjeff{ 522219820Sjeff#define ENUM2STR(e) [e] = #e 523219820Sjeff static char *mid2str[] = { 524219820Sjeff ENUM2STR(SDP_MID_HELLO), 525219820Sjeff ENUM2STR(SDP_MID_HELLO_ACK), 526219820Sjeff ENUM2STR(SDP_MID_ABORT), 527219820Sjeff ENUM2STR(SDP_MID_DISCONN), 528219820Sjeff ENUM2STR(SDP_MID_SENDSM), 529219820Sjeff ENUM2STR(SDP_MID_RDMARDCOMPL), 530219820Sjeff ENUM2STR(SDP_MID_SRCAVAIL_CANCEL), 531219820Sjeff ENUM2STR(SDP_MID_CHRCVBUF), 532219820Sjeff ENUM2STR(SDP_MID_CHRCVBUF_ACK), 533219820Sjeff ENUM2STR(SDP_MID_DATA), 534219820Sjeff ENUM2STR(SDP_MID_SRCAVAIL), 535219820Sjeff ENUM2STR(SDP_MID_SINKAVAIL), 536219820Sjeff }; 537219820Sjeff 538219820Sjeff if (mid >= ARRAY_SIZE(mid2str)) 539219820Sjeff return NULL; 540219820Sjeff 541219820Sjeff return mid2str[mid]; 542219820Sjeff} 543219820Sjeff 544219820Sjeffstatic inline struct mbuf * 545219820Sjeffsdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait) 546219820Sjeff{ 547219820Sjeff struct sdp_bsdh *h; 548219820Sjeff struct mbuf *mb; 549219820Sjeff 550219820Sjeff MGETHDR(mb, wait, MT_DATA); 551219820Sjeff if (mb == NULL) 552219820Sjeff return (NULL); 553219820Sjeff mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh); 554219820Sjeff h = mtod(mb, struct sdp_bsdh *); 555219820Sjeff h->mid = mid; 556219820Sjeff 557219820Sjeff return mb; 558219820Sjeff} 559219820Sjeffstatic inline struct mbuf * 560219820Sjeffsdp_alloc_mb_data(struct socket *sk, int wait) 561219820Sjeff{ 562219820Sjeff return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait); 563219820Sjeff} 564219820Sjeff 565219820Sjeffstatic inline struct mbuf * 566219820Sjeffsdp_alloc_mb_disconnect(struct socket *sk, int wait) 567219820Sjeff{ 568219820Sjeff return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait); 569219820Sjeff} 570219820Sjeff 571219820Sjeffstatic inline void * 572219820Sjeffmb_put(struct mbuf *mb, int len) 573219820Sjeff{ 574219820Sjeff uint8_t *data; 575219820Sjeff 576219820Sjeff data = mb->m_data; 577219820Sjeff data += mb->m_len; 578219820Sjeff mb->m_len += len; 579219820Sjeff return (void *)data; 580219820Sjeff} 581219820Sjeff 582219820Sjeffstatic inline struct mbuf * 583219820Sjeffsdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait) 584219820Sjeff{ 585219820Sjeff struct mbuf *mb; 586219820Sjeff struct sdp_chrecvbuf *resp_size; 587219820Sjeff 588219820Sjeff mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait); 589219820Sjeff if (mb == NULL) 590219820Sjeff return (NULL); 591219820Sjeff resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size); 592219820Sjeff resp_size->size = htonl(size); 593219820Sjeff 594219820Sjeff return mb; 595219820Sjeff} 596219820Sjeff 597219820Sjeffstatic inline struct mbuf * 598219820Sjeffsdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait) 599219820Sjeff{ 600219820Sjeff struct mbuf *mb; 601219820Sjeff struct sdp_srcah *srcah; 602219820Sjeff 603219820Sjeff mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait); 604219820Sjeff if (mb == NULL) 605219820Sjeff return (NULL); 606219820Sjeff srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah)); 607219820Sjeff srcah->len = htonl(len); 608219820Sjeff srcah->rkey = htonl(rkey); 609219820Sjeff srcah->vaddr = cpu_to_be64(vaddr); 610219820Sjeff 611219820Sjeff return mb; 612219820Sjeff} 613219820Sjeff 614219820Sjeffstatic inline struct mbuf * 615219820Sjeffsdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait) 616219820Sjeff{ 617219820Sjeff return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait); 618219820Sjeff} 619219820Sjeff 620219820Sjeffstatic inline struct mbuf * 621219820Sjeffsdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait) 622219820Sjeff{ 623219820Sjeff struct mbuf *mb; 624219820Sjeff struct sdp_rrch *rrch; 625219820Sjeff 626219820Sjeff mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait); 627219820Sjeff if (mb == NULL) 628219820Sjeff return (NULL); 629219820Sjeff rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch)); 630219820Sjeff rrch->len = htonl(len); 631219820Sjeff 632219820Sjeff return mb; 633219820Sjeff} 634219820Sjeff 635219820Sjeffstatic inline struct mbuf * 636219820Sjeffsdp_alloc_mb_sendsm(struct socket *sk, int wait) 637219820Sjeff{ 638219820Sjeff return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait); 639219820Sjeff} 640219820Sjeffstatic inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk) 641219820Sjeff{ 642219820Sjeff return SDP_TX_SIZE - tx_ring_posted(ssk); 643219820Sjeff} 644219820Sjeff 645219820Sjeffstatic inline int credit_update_needed(struct sdp_sock *ssk) 646219820Sjeff{ 647219820Sjeff int c; 648219820Sjeff 649219820Sjeff c = remote_credits(ssk); 650219820Sjeff if (likely(c > SDP_MIN_TX_CREDITS)) 651219820Sjeff c += c/2; 652219820Sjeff return unlikely(c < rx_ring_posted(ssk)) && 653219820Sjeff likely(tx_credits(ssk) > 0) && 654219820Sjeff likely(sdp_tx_ring_slots_left(ssk)); 655219820Sjeff} 656219820Sjeff 657219820Sjeff 658219820Sjeff#define SDPSTATS_COUNTER_INC(stat) 659219820Sjeff#define SDPSTATS_COUNTER_ADD(stat, val) 660219820Sjeff#define SDPSTATS_COUNTER_MID_INC(stat, mid) 661219820Sjeff#define SDPSTATS_HIST_LINEAR(stat, size) 662219820Sjeff#define SDPSTATS_HIST(stat, size) 663219820Sjeff 664219820Sjeffstatic inline void 665219820Sjeffsdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf, 666219820Sjeff enum dma_data_direction dir) 667219820Sjeff{ 668219820Sjeff struct ib_device *dev; 669219820Sjeff struct mbuf *mb; 670219820Sjeff int i; 671219820Sjeff 672219820Sjeff dev = ssk->ib_device; 673219820Sjeff for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++) 674219820Sjeff ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir); 675219820Sjeff} 676219820Sjeff 677219820Sjeff/* sdp_main.c */ 678219820Sjeffvoid sdp_set_default_moderation(struct sdp_sock *ssk); 679219820Sjeffvoid sdp_start_keepalive_timer(struct socket *sk); 680219820Sjeffvoid sdp_urg(struct sdp_sock *ssk, struct mbuf *mb); 681219820Sjeffvoid sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk); 682219820Sjeffvoid sdp_abort(struct socket *sk); 683219820Sjeffstruct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error); 684219820Sjeff 685219820Sjeff 686219820Sjeff/* sdp_cma.c */ 687219820Sjeffint sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); 688219820Sjeff 689219820Sjeff/* sdp_tx.c */ 690219820Sjeffint sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device); 691219820Sjeffvoid sdp_tx_ring_destroy(struct sdp_sock *ssk); 692219820Sjeffint sdp_xmit_poll(struct sdp_sock *ssk, int force); 693219820Sjeffvoid sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb); 694219820Sjeffvoid sdp_post_sends(struct sdp_sock *ssk, int wait); 695219820Sjeffvoid sdp_post_keepalive(struct sdp_sock *ssk); 696219820Sjeff 697219820Sjeff/* sdp_rx.c */ 698219820Sjeffvoid sdp_rx_ring_init(struct sdp_sock *ssk); 699219820Sjeffint sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device); 700219820Sjeffvoid sdp_rx_ring_destroy(struct sdp_sock *ssk); 701219820Sjeffint sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size); 702219820Sjeffint sdp_init_buffers(struct sdp_sock *ssk, u32 new_size); 703219820Sjeffvoid sdp_do_posts(struct sdp_sock *ssk); 704219820Sjeffvoid sdp_rx_comp_full(struct sdp_sock *ssk); 705219820Sjeff 706219820Sjeff/* sdp_zcopy.c */ 707271127Shselaskystruct kiocb; 708219820Sjeffint sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov); 709219820Sjeffint sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah); 710219820Sjeffvoid sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack); 711219820Sjeffvoid sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack, 712219820Sjeff u32 bytes_completed); 713219820Sjeffint sdp_handle_rdma_read_cqe(struct sdp_sock *ssk); 714219820Sjeffint sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb, 715219820Sjeff unsigned long *used); 716219820Sjeffint sdp_post_rdma_rd_compl(struct sdp_sock *ssk, 717219820Sjeff struct rx_srcavail_state *rx_sa); 718219820Sjeffint sdp_post_sendsm(struct socket *sk); 719219820Sjeffvoid srcavail_cancel_timeout(struct work_struct *work); 720219820Sjeffvoid sdp_abort_srcavail(struct socket *sk); 721219820Sjeffvoid sdp_abort_rdma_read(struct socket *sk); 722219820Sjeffint sdp_process_rx(struct sdp_sock *ssk); 723219820Sjeff 724219820Sjeff#endif 725