rdsv3.h revision 12895:805f5f4df09e
1/* 2 * This file contains definitions imported from the OFED rds header rds.h. 3 * Oracle elects to have and use the contents of rds.h under and 4 * governed by the OpenIB.org BSD license. 5 */ 6 7/* 8 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 9 */ 10 11#ifndef _RDSV3_RDSV3_H 12#define _RDSV3_RDSV3_H 13 14/* 15 * The name of this file is rds.h in ofed. 16 */ 17 18#ifdef __cplusplus 19extern "C" { 20#endif 21 22#include <sys/sunndi.h> 23#include <netinet/in.h> 24#include <sys/synch.h> 25#include <sys/stropts.h> 26#include <sys/socket.h> 27#include <sys/socketvar.h> 28#include <inet/ip.h> 29#include <sys/avl.h> 30#include <sys/param.h> 31#include <sys/time.h> 32#include <sys/rds.h> 33 34#include <sys/ib/ibtl/ibti.h> 35#include <sys/ib/clients/of/rdma/ib_verbs.h> 36#include <sys/ib/clients/of/rdma/ib_addr.h> 37#include <sys/ib/clients/of/rdma/rdma_cm.h> 38#include <sys/ib/clients/rdsv3/rdsv3_impl.h> 39#include <sys/ib/clients/rdsv3/info.h> 40 41#define NIPQUAD(addr) \ 42 (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 43 (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 44 (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 45 (unsigned char)(ntohl(addr) & 0xFF) 46 47/* 48 * RDS Network protocol version 49 */ 50#define RDS_PROTOCOL_3_0 0x0300 51#define RDS_PROTOCOL_3_1 0x0301 52#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 53#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 54#define RDS_PROTOCOL_MINOR(v) ((v) & 255) 55#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 56 57/* 58 * XXX randomly chosen, but at least seems to be unused: 59 * # 18464-18768 Unassigned 60 * We should do better. We want a reserved port to discourage unpriv'ed 61 * userspace from listening. 62 * 63 * port 18633 was the version that had ack frames on the wire. 64 */ 65#define RDSV3_PORT 18634 66 67#define RDSV3_REAPER_WAIT_SECS (5*60) 68#define RDSV3_REAPER_WAIT_JIFFIES SEC_TO_TICK(RDSV3_REAPER_WAIT_SECS) 69 70/* 71 * This is the sad making. Some kernels have a bug in the per_cpu() api which 72 * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 73 * in the module is not cacheline-aligned. As much as we'd like to tell users 74 * with older kernels to stuff it, that's not reasonable. We'll roll our own 75 * until this doesn't have to build against older kernels. 76 */ 77#define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 78#define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 79#define rdsv3_per_cpu(var, cpu) var[cpu] 80 81static inline ulong_t 82ceil(ulong_t x, ulong_t y) 83{ 84 return ((x + y - 1) / y); 85} 86 87#define RDSV3_FRAG_SHIFT 12 88#define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 89 90#define RDSV3_CONG_MAP_BYTES (65536 / 8) 91#define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 92#define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 93#define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 94 95struct rdsv3_cong_map { 96 struct avl_node m_rb_node; 97 uint32_be_t m_addr; 98 rdsv3_wait_queue_t m_waitq; 99 struct list m_conn_list; 100 unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 101}; 102 103/* 104 * This is how we will track the connection state: 105 * A connection is always in one of the following 106 * states. Updates to the state are atomic and imply 107 * a memory barrier. 108 */ 109enum { 110 RDSV3_CONN_DOWN = 0, 111 RDSV3_CONN_CONNECTING, 112 RDSV3_CONN_DISCONNECTING, 113 RDSV3_CONN_UP, 114 RDSV3_CONN_ERROR, 115}; 116 117/* Bits for c_flags */ 118#define RDSV3_LL_SEND_FULL 0 119#define RDSV3_RECONNECT_PENDING 1 120 121struct rdsv3_connection { 122 struct avl_node c_hash_node; 123 struct rdsv3_ip_bucket *c_bucketp; 124 uint32_be_t c_laddr; 125 uint32_be_t c_faddr; 126 unsigned int c_loopback:1; 127 struct rdsv3_connection *c_passive; 128 129 struct rdsv3_cong_map *c_lcong; 130 struct rdsv3_cong_map *c_fcong; 131 132 struct mutex c_send_lock; /* protect send ring */ 133 atomic_t c_send_generation; 134 atomic_t c_senders; 135 136 struct rdsv3_message *c_xmit_rm; 137 unsigned long c_xmit_sg; 138 unsigned int c_xmit_hdr_off; 139 unsigned int c_xmit_data_off; 140 unsigned int c_xmit_rdma_sent; 141 142 kmutex_t c_lock; /* protect msg queues */ 143 uint64_t c_next_tx_seq; 144 struct list c_send_queue; 145 struct list c_retrans; 146 147 uint64_t c_next_rx_seq; 148 149 struct rdsv3_transport *c_trans; 150 void *c_transport_data; 151 152 atomic_t c_state; 153 unsigned long c_flags; 154 unsigned long c_reconnect_jiffies; 155 clock_t c_last_connect_jiffies; 156 157 struct rdsv3_delayed_work_s c_send_w; 158 struct rdsv3_delayed_work_s c_recv_w; 159 struct rdsv3_delayed_work_s c_conn_w; 160 struct rdsv3_delayed_work_s c_reap_w; 161 struct rdsv3_work_s c_down_w; 162 struct mutex c_cm_lock; /* protect conn state & cm */ 163 164 struct list_node c_map_item; 165 unsigned long c_map_queued; 166 unsigned long c_map_offset; 167 unsigned long c_map_bytes; 168 169 unsigned int c_unacked_packets; 170 unsigned int c_unacked_bytes; 171 172 /* Protocol version */ 173 unsigned int c_version; 174}; 175 176#define RDSV3_FLAG_CONG_BITMAP 0x01 177#define RDSV3_FLAG_ACK_REQUIRED 0x02 178#define RDSV3_FLAG_RETRANSMITTED 0x04 179#define RDSV3_MAX_ADV_CREDIT 127 180 181/* 182 * Maximum space available for extension headers. 183 */ 184#define RDSV3_HEADER_EXT_SPACE 16 185 186struct rdsv3_header { 187 uint64_be_t h_sequence; 188 uint64_be_t h_ack; 189 uint32_be_t h_len; 190 uint16_be_t h_sport; 191 uint16_be_t h_dport; 192 uint8_t h_flags; 193 uint8_t h_credit; 194 uint8_t h_padding[4]; 195 uint16_be_t h_csum; 196 197 uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 198}; 199 200/* Reserved - indicates end of extensions */ 201#define RDSV3_EXTHDR_NONE 0 202 203/* 204 * This extension header is included in the very 205 * first message that is sent on a new connection, 206 * and identifies the protocol level. This will help 207 * rolling updates if a future change requires breaking 208 * the protocol. 209 */ 210#define RDSV3_EXTHDR_VERSION 1 211struct rdsv3_ext_header_version { 212 uint32_be_t h_version; 213}; 214 215/* 216 * This extension header is included in the RDS message 217 * chasing an RDMA operation. 218 */ 219#define RDSV3_EXTHDR_RDMA 2 220struct rdsv3_ext_header_rdma { 221 uint32_be_t h_rdma_rkey; 222}; 223 224/* 225 * This extension header tells the peer about the 226 * destination <R_Key,offset> of the requested RDMA 227 * operation. 228 */ 229#define RDSV3_EXTHDR_RDMA_DEST 3 230struct rdsv3_ext_header_rdma_dest { 231 uint32_be_t h_rdma_rkey; 232 uint32_be_t h_rdma_offset; 233}; 234 235#define __RDSV3_EXTHDR_MAX 16 /* for now */ 236 237struct rdsv3_incoming { 238 atomic_t i_refcount; 239 struct list_node i_item; 240 struct rdsv3_connection *i_conn; 241 struct rdsv3_header i_hdr; 242 unsigned long i_rx_jiffies; 243 uint32_be_t i_saddr; 244 245 rds_rdma_cookie_t i_rdma_cookie; 246}; 247 248/* 249 * m_sock_item and m_conn_item are on lists that are serialized under 250 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 251 * the message will not be put back on the retransmit list after being sent. 252 * messages that are canceled while being sent rely on this. 253 * 254 * m_inc is used by loopback so that it can pass an incoming message straight 255 * back up into the rx path. It embeds a wire header which is also used by 256 * the send path, which is kind of awkward. 257 * 258 * m_sock_item indicates the message's presence on a socket's send or receive 259 * queue. m_rs will point to that socket. 260 * 261 * m_daddr is used by cancellation to prune messages to a given destination. 262 * 263 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 264 * nesting. As paths iterate over messages on a sock, or conn, they must 265 * also lock the conn, or sock, to remove the message from those lists too. 266 * Testing the flag to determine if the message is still on the lists lets 267 * us avoid testing the list_head directly. That means each path can use 268 * the message's list_head to keep it on a local list while juggling locks 269 * without confusing the other path. 270 * 271 * m_ack_seq is an optional field set by transports who need a different 272 * sequence number range to invalidate. They can use this in a callback 273 * that they pass to rdsv3_send_drop_acked() to see if each message has been 274 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 275 * had ack_seq set yet. 276 */ 277#define RDSV3_MSG_ON_SOCK 1 278#define RDSV3_MSG_ON_CONN 2 279#define RDSV3_MSG_HAS_ACK_SEQ 3 280#define RDSV3_MSG_ACK_REQUIRED 4 281#define RDSV3_MSG_RETRANSMITTED 5 282#define RDSV3_MSG_MAPPED 6 283#define RDSV3_MSG_PAGEVEC 7 284 285struct rdsv3_message { 286 atomic_t m_refcount; 287 struct list_node m_sock_item; 288 struct list_node m_conn_item; 289 struct rdsv3_incoming m_inc; 290 uint64_t m_ack_seq; 291 uint32_be_t m_daddr; 292 unsigned long m_flags; 293 294 /* 295 * Never access m_rs without holding m_rs_lock. 296 * Lock nesting is 297 * rm->m_rs_lock 298 * -> rs->rs_lock 299 */ 300 kmutex_t m_rs_lock; 301 rdsv3_wait_queue_t m_flush_wait; 302 303 struct rdsv3_sock *m_rs; 304 struct rdsv3_rdma_op *m_rdma_op; 305 rds_rdma_cookie_t m_rdma_cookie; 306 struct rdsv3_mr *m_rdma_mr; 307 unsigned int m_nents; 308 unsigned int m_count; 309 struct rdsv3_scatterlist m_sg[1]; 310}; 311 312/* 313 * The RDS notifier is used (optionally) to tell the application about 314 * completed RDMA operations. Rather than keeping the whole rds message 315 * around on the queue, we allocate a small notifier that is put on the 316 * socket's notifier_list. Notifications are delivered to the application 317 * through control messages. 318 */ 319struct rdsv3_notifier { 320 list_node_t n_list; 321 uint64_t n_user_token; 322 int n_status; 323}; 324 325/* 326 * struct rdsv3_transport - transport specific behavioural hooks 327 * 328 * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 329 * part of a message. The caller serializes on the send_sem so this 330 * doesn't need to be reentrant for a given conn. The header must be 331 * sent before the data payload. .xmit must be prepared to send a 332 * message with no data payload. .xmit should return the number of 333 * bytes that were sent down the connection, including header bytes. 334 * Returning 0 tells the caller that it doesn't need to perform any 335 * additional work now. This is usually the case when the transport has 336 * filled the sending queue for its connection and will handle 337 * triggering the rds thread to continue the send when space becomes 338 * available. Returning -EAGAIN tells the caller to retry the send 339 * immediately. Returning -ENOMEM tells the caller to retry the send at 340 * some point in the future. 341 * 342 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 343 * it returns the connection can not call rdsv3_recv_incoming(). 344 * This will only be called once after conn_connect returns 345 * non-zero success and will The caller serializes this with 346 * the send and connecting paths (xmit_* and conn_*). The 347 * transport is responsible for other serialization, including 348 * rdsv3_recv_incoming(). This is called in process context but 349 * should try hard not to block. 350 * 351 * @xmit_cong_map: This asks the transport to send the local bitmap down the 352 * given connection. XXX get a better story about the bitmap 353 * flag and header. 354 */ 355 356#define RDS_TRANS_IB 0 357#define RDS_TRANS_IWARP 1 358#define RDS_TRANS_TCP 2 359#define RDS_TRANS_COUNT 3 360 361struct rdsv3_transport { 362 char t_name[TRANSNAMSIZ]; 363 struct list_node t_item; 364 unsigned int t_type; 365 unsigned int t_prefer_loopback:1; 366 367 int (*laddr_check)(uint32_be_t addr); 368 int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 369 void (*conn_free)(void *data); 370 int (*conn_connect)(struct rdsv3_connection *conn); 371 void (*conn_shutdown)(struct rdsv3_connection *conn); 372 void (*xmit_prepare)(struct rdsv3_connection *conn); 373 void (*xmit_complete)(struct rdsv3_connection *conn); 374 int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 375 unsigned int hdr_off, unsigned int sg, unsigned int off); 376 int (*xmit_cong_map)(struct rdsv3_connection *conn, 377 struct rdsv3_cong_map *map, unsigned long offset); 378 int (*xmit_rdma)(struct rdsv3_connection *conn, 379 struct rdsv3_rdma_op *op); 380 int (*recv)(struct rdsv3_connection *conn); 381 int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 382 size_t size); 383 void (*inc_free)(struct rdsv3_incoming *inc); 384 385 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 386 struct rdma_cm_event *event); 387 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 388 void (*cm_connect_complete)(struct rdsv3_connection *conn, 389 struct rdma_cm_event *event); 390 391 unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 392 unsigned int avail); 393 void (*exit)(void); 394 void *(*get_mr)(struct rds_iovec *sg, unsigned long nr_sg, 395 struct rdsv3_sock *rs, uint32_t *key_ret); 396 void (*sync_mr)(void *trans_private, int direction); 397 void (*free_mr)(void *trans_private, int invalidate); 398 void (*flush_mrs)(void); 399}; 400 401struct rdsv3_sock { 402 struct rsock *rs_sk; 403 uint64_t rs_user_addr; 404 uint64_t rs_user_bytes; 405 406 /* 407 * bound_addr used for both incoming and outgoing, no INADDR_ANY 408 * support. 409 */ 410 struct avl_node rs_bound_node; 411 uint32_be_t rs_bound_addr; 412 uint32_be_t rs_conn_addr; 413 uint16_be_t rs_bound_port; 414 uint16_be_t rs_conn_port; 415 416 /* 417 * This is only used to communicate the transport between bind and 418 * initiating connections. All other trans use is referenced through 419 * the connection. 420 */ 421 struct rdsv3_transport *rs_transport; 422 423 /* 424 * rdsv3_sendmsg caches the conn it used the last time around. 425 * This helps avoid costly lookups. 426 */ 427 struct rdsv3_connection *rs_conn; 428 kmutex_t rs_conn_lock; 429 430 /* flag indicating we were congested or not */ 431 int rs_congested; 432 /* seen congestion (ENOBUFS) when sending? */ 433 int rs_seen_congestion; 434 kmutex_t rs_congested_lock; 435 kcondvar_t rs_congested_cv; 436 437 /* rs_lock protects all these adjacent members before the newline */ 438 kmutex_t rs_lock; 439 struct list rs_send_queue; 440 uint32_t rs_snd_bytes; 441 int rs_rcv_bytes; 442 /* currently used for failed RDMAs */ 443 struct list rs_notify_queue; 444 445 /* 446 * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 447 * to decide whether the application should be woken up. 448 * If not set, we use rs_cong_track to find out whether a cong map 449 * update arrived. 450 */ 451 uint64_t rs_cong_mask; 452 uint64_t rs_cong_notify; 453 struct list_node rs_cong_list; 454 unsigned long rs_cong_track; 455 456 /* 457 * rs_recv_lock protects the receive queue, and is 458 * used to serialize with rdsv3_release. 459 */ 460 krwlock_t rs_recv_lock; 461 struct list rs_recv_queue; 462 463 /* just for stats reporting */ 464 struct list_node rs_item; 465 466 /* these have their own lock */ 467 kmutex_t rs_rdma_lock; 468 struct avl_tree rs_rdma_keys; 469 470 /* Socket options - in case there will be more */ 471 unsigned char rs_recverr, 472 rs_cong_monitor; 473 474 cred_t *rs_cred; 475 zoneid_t rs_zoneid; 476}; 477 478static inline struct rdsv3_sock * 479rdsv3_sk_to_rs(const struct rsock *sk) 480{ 481 return ((struct rdsv3_sock *)sk->sk_protinfo); 482} 483 484static inline struct rsock * 485rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 486{ 487 return ((struct rsock *)rs->rs_sk); 488} 489 490/* 491 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 492 * to account for overhead. We don't account for overhead, we just apply 493 * the number of payload bytes to the specified value. 494 */ 495static inline int 496rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 497{ 498 /* XXX */ 499 return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 500} 501 502static inline int 503rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 504{ 505 /* XXX */ 506 return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 507} 508 509struct rdsv3_statistics { 510 uint64_t s_conn_reset; 511 uint64_t s_recv_drop_bad_checksum; 512 uint64_t s_recv_drop_old_seq; 513 uint64_t s_recv_drop_no_sock; 514 uint64_t s_recv_drop_dead_sock; 515 uint64_t s_recv_deliver_raced; 516 uint64_t s_recv_delivered; 517 uint64_t s_recv_queued; 518 uint64_t s_recv_immediate_retry; 519 uint64_t s_recv_delayed_retry; 520 uint64_t s_recv_ack_required; 521 uint64_t s_recv_rdma_bytes; 522 uint64_t s_recv_ping; 523 uint64_t s_send_queue_empty; 524 uint64_t s_send_queue_full; 525 uint64_t s_send_sem_contention; 526 uint64_t s_send_sem_queue_raced; 527 uint64_t s_send_immediate_retry; 528 uint64_t s_send_delayed_retry; 529 uint64_t s_send_drop_acked; 530 uint64_t s_send_ack_required; 531 uint64_t s_send_queued; 532 uint64_t s_send_rdma; 533 uint64_t s_send_rdma_bytes; 534 uint64_t s_send_pong; 535 uint64_t s_page_remainder_hit; 536 uint64_t s_page_remainder_miss; 537 uint64_t s_copy_to_user; 538 uint64_t s_copy_from_user; 539 uint64_t s_cong_update_queued; 540 uint64_t s_cong_update_received; 541 uint64_t s_cong_send_error; 542 uint64_t s_cong_send_blocked; 543}; 544 545/* af_rds.c */ 546void rdsv3_sock_addref(struct rdsv3_sock *rs); 547void rdsv3_sock_put(struct rdsv3_sock *rs); 548void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 549void __rdsv3_wake_sk_sleep(struct rsock *sk); 550 551/* bind.c */ 552int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 553 socklen_t len, cred_t *cr); 554void rdsv3_remove_bound(struct rdsv3_sock *rs); 555struct rdsv3_sock *rdsv3_find_bound(struct rdsv3_connection *conn, 556 uint16_be_t port); 557struct rdsv3_ip_bucket *rdsv3_find_ip_bucket(ipaddr_t, zoneid_t); 558 559/* conn.c */ 560int rdsv3_conn_init(void); 561void rdsv3_conn_exit(void); 562struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 563 struct rdsv3_transport *trans, int gfp); 564struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 565 uint32_be_t faddr, 566 struct rdsv3_transport *trans, int gfp); 567void rdsv3_conn_shutdown(struct rdsv3_connection *conn); 568void rdsv3_conn_destroy(struct rdsv3_connection *conn); 569void rdsv3_conn_reset(struct rdsv3_connection *conn); 570void rdsv3_conn_drop(struct rdsv3_connection *conn); 571void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 572 struct rdsv3_info_iterator *iter, 573 struct rdsv3_info_lengths *lens, 574 int (*visitor)(struct rdsv3_connection *, void *), 575 size_t item_len); 576 577static inline int 578rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 579{ 580 return (atomic_cmpxchg(&conn->c_state, old, new) == old); 581} 582 583static inline int 584rdsv3_conn_state(struct rdsv3_connection *conn) 585{ 586 return (atomic_get(&conn->c_state)); 587} 588 589static inline int 590rdsv3_conn_up(struct rdsv3_connection *conn) 591{ 592 return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 593} 594 595static inline int 596rdsv3_conn_connecting(struct rdsv3_connection *conn) 597{ 598 return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 599} 600 601/* recv.c */ 602void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 603 uint32_be_t saddr); 604void rdsv3_inc_addref(struct rdsv3_incoming *inc); 605void rdsv3_inc_put(struct rdsv3_incoming *inc); 606void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 607 uint32_be_t daddr, 608 struct rdsv3_incoming *inc, int gfp); 609int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 610 struct msghdr *msg, size_t size, int msg_flags); 611void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 612int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 613void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 614 struct rdsv3_info_iterator *iter, 615 uint32_be_t saddr, uint32_be_t daddr, int flip); 616 617/* page.c */ 618int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 619 unsigned long bytes, int gfp); 620 621/* send.c */ 622int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 623 size_t payload_len); 624void rdsv3_send_reset(struct rdsv3_connection *conn); 625int rdsv3_send_xmit(struct rdsv3_connection *conn); 626struct sockaddr_in; 627void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 628typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 629void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 630 is_acked_func is_acked); 631int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 632void rdsv3_send_remove_from_sock(struct list *messages, int status); 633int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 634struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 635 struct rdsv3_rdma_op *); 636 637/* rdma.c */ 638void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 639 640/* cong.c */ 641void rdsv3_cong_init(void); 642int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 643void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 644void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 645void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 646void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 647int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 648 struct rdsv3_sock *rs); 649void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 650void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 651int rdsv3_cong_updated_since(unsigned long *recent); 652void rdsv3_cong_add_socket(struct rdsv3_sock *); 653void rdsv3_cong_remove_socket(struct rdsv3_sock *); 654void rdsv3_cong_exit(void); 655struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 656 657/* stats.c */ 658RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 659#define rdsv3_stats_inc_which(which, member) do { \ 660 rdsv3_per_cpu(which, get_cpu()).member++; \ 661 put_cpu(); \ 662} while (0) 663#define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 664#define rdsv3_stats_add_which(which, member, count) do { \ 665 rdsv3_per_cpu(which, get_cpu()).member += count; \ 666 put_cpu(); \ 667} while (0) 668#define rdsv3_stats_add(member, count) \ 669 rdsv3_stats_add_which(rdsv3_stats, member, count) 670int rdsv3_stats_init(void); 671void rdsv3_stats_exit(void); 672void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 673 uint64_t *values, char **names, size_t nr); 674 675 676/* sysctl.c */ 677int rdsv3_sysctl_init(void); 678void rdsv3_sysctl_exit(void); 679extern unsigned long rdsv3_sysctl_sndbuf_min; 680extern unsigned long rdsv3_sysctl_sndbuf_default; 681extern unsigned long rdsv3_sysctl_sndbuf_max; 682extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 683extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 684extern unsigned int rdsv3_sysctl_max_unacked_packets; 685extern unsigned int rdsv3_sysctl_max_unacked_bytes; 686extern unsigned int rdsv3_sysctl_ping_enable; 687extern unsigned long rdsv3_sysctl_trace_flags; 688extern unsigned int rdsv3_sysctl_trace_level; 689 690/* threads.c */ 691int rdsv3_threads_init(); 692void rdsv3_threads_exit(void); 693extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 694void rdsv3_queue_reconnect(struct rdsv3_connection *conn); 695void rdsv3_connect_worker(struct rdsv3_work_s *); 696void rdsv3_shutdown_worker(struct rdsv3_work_s *); 697void rdsv3_send_worker(struct rdsv3_work_s *); 698void rdsv3_recv_worker(struct rdsv3_work_s *); 699void rdsv3_reaper_worker(struct rdsv3_work_s *); 700void rdsv3_connect_complete(struct rdsv3_connection *conn); 701 702/* transport.c */ 703int rdsv3_trans_register(struct rdsv3_transport *trans); 704void rdsv3_trans_unregister(struct rdsv3_transport *trans); 705struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 706unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 707 unsigned int avail); 708void rdsv3_trans_exit(void); 709 710/* message.c */ 711struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 712struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 713 size_t total_len); 714struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 715 unsigned int total_len); 716void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 717 uint16_be_t dport, uint64_t seq); 718int rdsv3_message_add_extension(struct rdsv3_header *hdr, 719 unsigned int type, const void *data, unsigned int len); 720int rdsv3_message_next_extension(struct rdsv3_header *hdr, 721 unsigned int *pos, void *buf, unsigned int *buflen); 722int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 723 unsigned int version); 724int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 725 unsigned int *version); 726int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 727 uint32_t r_key, uint32_t offset); 728int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 729 uio_t *uio, size_t size); 730void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 731void rdsv3_message_addref(struct rdsv3_message *rm); 732void rdsv3_message_put(struct rdsv3_message *rm); 733void rdsv3_message_wait(struct rdsv3_message *rm); 734void rdsv3_message_unmapped(struct rdsv3_message *rm); 735 736static inline void 737rdsv3_message_make_checksum(struct rdsv3_header *hdr) 738{ 739 hdr->h_csum = 0; 740 hdr->h_csum = 741 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 742} 743 744static inline int 745rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 746{ 747 return (!hdr->h_csum || 748 rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 749} 750 751/* rdsv3_sc.c */ 752extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 753extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 754extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 755 756#ifdef __cplusplus 757} 758#endif 759 760#endif /* _RDSV3_RDSV3_H */ 761