1/*
2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2004, 2011-2012 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc.  All rights reserved.
5 * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#ifndef INFINIBAND_VERBS_H
37#define INFINIBAND_VERBS_H
38
39#include <stdint.h>
40#include <pthread.h>
41#include <stddef.h>
42#include <errno.h>
43#include <string.h>
44#include <infiniband/types.h>
45
46#ifdef __cplusplus
47#  define BEGIN_C_DECLS extern "C" {
48#  define END_C_DECLS   }
49#else /* !__cplusplus */
50#  define BEGIN_C_DECLS
51#  define END_C_DECLS
52#endif /* __cplusplus */
53
54#if __GNUC__ >= 3
55#  define __attribute_const __attribute__((const))
56#else
57#  define __attribute_const
58#endif
59
60BEGIN_C_DECLS
61
62union ibv_gid {
63	uint8_t			raw[16];
64	struct {
65		__be64	subnet_prefix;
66		__be64	interface_id;
67	} global;
68};
69
70#ifndef container_of
71/**
72  * container_of - cast a member of a structure out to the containing structure
73  * @ptr:        the pointer to the member.
74  * @type:       the type of the container struct this is embedded in.
75  * @member:     the name of the member within the struct.
76  *
77 */
78#define container_of(ptr, type, member) \
79	((type *) ((uint8_t *)(ptr) - offsetof(type, member)))
80#endif
81
82#define vext_field_avail(type, fld, sz) (offsetof(type, fld) < (sz))
83
84static void *__VERBS_ABI_IS_EXTENDED = ((uint8_t *) NULL) - 1;
85
86enum ibv_node_type {
87	IBV_NODE_UNKNOWN	= -1,
88	IBV_NODE_CA 		= 1,
89	IBV_NODE_SWITCH,
90	IBV_NODE_ROUTER,
91	IBV_NODE_RNIC,
92	IBV_NODE_USNIC,
93	IBV_NODE_USNIC_UDP,
94};
95
96enum ibv_transport_type {
97	IBV_TRANSPORT_UNKNOWN	= -1,
98	IBV_TRANSPORT_IB	= 0,
99	IBV_TRANSPORT_IWARP,
100	IBV_TRANSPORT_USNIC,
101	IBV_TRANSPORT_USNIC_UDP,
102};
103
104enum ibv_device_cap_flags {
105	IBV_DEVICE_RESIZE_MAX_WR	= 1,
106	IBV_DEVICE_BAD_PKEY_CNTR	= 1 <<  1,
107	IBV_DEVICE_BAD_QKEY_CNTR	= 1 <<  2,
108	IBV_DEVICE_RAW_MULTI		= 1 <<  3,
109	IBV_DEVICE_AUTO_PATH_MIG	= 1 <<  4,
110	IBV_DEVICE_CHANGE_PHY_PORT	= 1 <<  5,
111	IBV_DEVICE_UD_AV_PORT_ENFORCE	= 1 <<  6,
112	IBV_DEVICE_CURR_QP_STATE_MOD	= 1 <<  7,
113	IBV_DEVICE_SHUTDOWN_PORT	= 1 <<  8,
114	IBV_DEVICE_INIT_TYPE		= 1 <<  9,
115	IBV_DEVICE_PORT_ACTIVE_EVENT	= 1 << 10,
116	IBV_DEVICE_SYS_IMAGE_GUID	= 1 << 11,
117	IBV_DEVICE_RC_RNR_NAK_GEN	= 1 << 12,
118	IBV_DEVICE_SRQ_RESIZE		= 1 << 13,
119	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14,
120	IBV_DEVICE_MEM_WINDOW           = 1 << 17,
121	IBV_DEVICE_UD_IP_CSUM		= 1 << 18,
122	IBV_DEVICE_XRC			= 1 << 20,
123	IBV_DEVICE_MEM_MGT_EXTENSIONS	= 1 << 21,
124	IBV_DEVICE_MEM_WINDOW_TYPE_2A	= 1 << 23,
125	IBV_DEVICE_MEM_WINDOW_TYPE_2B	= 1 << 24,
126	IBV_DEVICE_RC_IP_CSUM		= 1 << 25,
127	IBV_DEVICE_RAW_IP_CSUM		= 1 << 26,
128	IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29
129};
130
131/*
132 * Can't extended above ibv_device_cap_flags enum as in some systems/compilers
133 * enum range is limited to 4 bytes.
134 */
135#define IBV_DEVICE_RAW_SCATTER_FCS (1ULL << 34)
136
137enum ibv_atomic_cap {
138	IBV_ATOMIC_NONE,
139	IBV_ATOMIC_HCA,
140	IBV_ATOMIC_GLOB
141};
142
143struct ibv_device_attr {
144	char			fw_ver[64];
145	__be64			node_guid;
146	__be64			sys_image_guid;
147	uint64_t		max_mr_size;
148	uint64_t		page_size_cap;
149	uint32_t		vendor_id;
150	uint32_t		vendor_part_id;
151	uint32_t		hw_ver;
152	int			max_qp;
153	int			max_qp_wr;
154	int			device_cap_flags;
155	int			max_sge;
156	int			max_sge_rd;
157	int			max_cq;
158	int			max_cqe;
159	int			max_mr;
160	int			max_pd;
161	int			max_qp_rd_atom;
162	int			max_ee_rd_atom;
163	int			max_res_rd_atom;
164	int			max_qp_init_rd_atom;
165	int			max_ee_init_rd_atom;
166	enum ibv_atomic_cap	atomic_cap;
167	int			max_ee;
168	int			max_rdd;
169	int			max_mw;
170	int			max_raw_ipv6_qp;
171	int			max_raw_ethy_qp;
172	int			max_mcast_grp;
173	int			max_mcast_qp_attach;
174	int			max_total_mcast_qp_attach;
175	int			max_ah;
176	int			max_fmr;
177	int			max_map_per_fmr;
178	int			max_srq;
179	int			max_srq_wr;
180	int			max_srq_sge;
181	uint16_t		max_pkeys;
182	uint8_t			local_ca_ack_delay;
183	uint8_t			phys_port_cnt;
184};
185
186/* An extensible input struct for possible future extensions of the
187 * ibv_query_device_ex verb. */
188struct ibv_query_device_ex_input {
189	uint32_t		comp_mask;
190};
191
192enum ibv_odp_transport_cap_bits {
193	IBV_ODP_SUPPORT_SEND     = 1 << 0,
194	IBV_ODP_SUPPORT_RECV     = 1 << 1,
195	IBV_ODP_SUPPORT_WRITE    = 1 << 2,
196	IBV_ODP_SUPPORT_READ     = 1 << 3,
197	IBV_ODP_SUPPORT_ATOMIC   = 1 << 4,
198};
199
200struct ibv_odp_caps {
201	uint64_t general_caps;
202	struct {
203		uint32_t rc_odp_caps;
204		uint32_t uc_odp_caps;
205		uint32_t ud_odp_caps;
206	} per_transport_caps;
207};
208
209enum ibv_odp_general_caps {
210	IBV_ODP_SUPPORT = 1 << 0,
211};
212
213struct ibv_tso_caps {
214	uint32_t max_tso;
215	uint32_t supported_qpts;
216};
217
218/* RX Hash function flags */
219enum ibv_rx_hash_function_flags {
220	IBV_RX_HASH_FUNC_TOEPLITZ	= 1 << 0,
221};
222
223/*
224 * RX Hash fields enable to set which incoming packet's field should
225 * participates in RX Hash. Each flag represent certain packet's field,
226 * when the flag is set the field that is represented by the flag will
227 * participate in RX Hash calculation.
228 * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
229 * and *TCP and *UDP flags can't be enabled together on the same QP.
230*/
231enum ibv_rx_hash_fields {
232	IBV_RX_HASH_SRC_IPV4	= 1 << 0,
233	IBV_RX_HASH_DST_IPV4	= 1 << 1,
234	IBV_RX_HASH_SRC_IPV6	= 1 << 2,
235	IBV_RX_HASH_DST_IPV6	= 1 << 3,
236	IBV_RX_HASH_SRC_PORT_TCP	= 1 << 4,
237	IBV_RX_HASH_DST_PORT_TCP	= 1 << 5,
238	IBV_RX_HASH_SRC_PORT_UDP	= 1 << 6,
239	IBV_RX_HASH_DST_PORT_UDP	= 1 << 7
240};
241
242struct ibv_rss_caps {
243	uint32_t supported_qpts;
244	uint32_t max_rwq_indirection_tables;
245	uint32_t max_rwq_indirection_table_size;
246	uint64_t rx_hash_fields_mask; /* enum ibv_rx_hash_fields */
247	uint8_t  rx_hash_function; /* enum ibv_rx_hash_function_flags */
248};
249
250struct ibv_packet_pacing_caps {
251	uint32_t qp_rate_limit_min;
252	uint32_t qp_rate_limit_max; /* In kbps */
253	uint32_t supported_qpts;
254};
255
256enum ibv_raw_packet_caps {
257	IBV_RAW_PACKET_CAP_CVLAN_STRIPPING	= 1 << 0,
258	IBV_RAW_PACKET_CAP_SCATTER_FCS		= 1 << 1,
259	IBV_RAW_PACKET_CAP_IP_CSUM		= 1 << 2,
260};
261
262struct ibv_device_attr_ex {
263	struct ibv_device_attr	orig_attr;
264	uint32_t		comp_mask;
265	struct ibv_odp_caps	odp_caps;
266	uint64_t		completion_timestamp_mask;
267	uint64_t		hca_core_clock;
268	uint64_t		device_cap_flags_ex;
269	struct ibv_tso_caps	tso_caps;
270	struct ibv_rss_caps     rss_caps;
271	uint32_t		max_wq_type_rq;
272	struct ibv_packet_pacing_caps packet_pacing_caps;
273	uint32_t		raw_packet_caps; /* Use ibv_raw_packet_caps */
274};
275
276enum ibv_mtu {
277	IBV_MTU_256  = 1,
278	IBV_MTU_512  = 2,
279	IBV_MTU_1024 = 3,
280	IBV_MTU_2048 = 4,
281	IBV_MTU_4096 = 5
282};
283
284enum ibv_port_state {
285	IBV_PORT_NOP		= 0,
286	IBV_PORT_DOWN		= 1,
287	IBV_PORT_INIT		= 2,
288	IBV_PORT_ARMED		= 3,
289	IBV_PORT_ACTIVE		= 4,
290	IBV_PORT_ACTIVE_DEFER	= 5
291};
292
293enum {
294	IBV_LINK_LAYER_UNSPECIFIED,
295	IBV_LINK_LAYER_INFINIBAND,
296	IBV_LINK_LAYER_ETHERNET,
297};
298
299enum ibv_port_cap_flags {
300	IBV_PORT_SM				= 1 <<  1,
301	IBV_PORT_NOTICE_SUP			= 1 <<  2,
302	IBV_PORT_TRAP_SUP			= 1 <<  3,
303	IBV_PORT_OPT_IPD_SUP			= 1 <<  4,
304	IBV_PORT_AUTO_MIGR_SUP			= 1 <<  5,
305	IBV_PORT_SL_MAP_SUP			= 1 <<  6,
306	IBV_PORT_MKEY_NVRAM			= 1 <<  7,
307	IBV_PORT_PKEY_NVRAM			= 1 <<  8,
308	IBV_PORT_LED_INFO_SUP			= 1 <<  9,
309	IBV_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
310	IBV_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
311	IBV_PORT_EXTENDED_SPEEDS_SUP		= 1 << 14,
312	IBV_PORT_CM_SUP				= 1 << 16,
313	IBV_PORT_SNMP_TUNNEL_SUP		= 1 << 17,
314	IBV_PORT_REINIT_SUP			= 1 << 18,
315	IBV_PORT_DEVICE_MGMT_SUP		= 1 << 19,
316	IBV_PORT_VENDOR_CLASS_SUP		= 1 << 20,
317	IBV_PORT_DR_NOTICE_SUP			= 1 << 21,
318	IBV_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
319	IBV_PORT_BOOT_MGMT_SUP			= 1 << 23,
320	IBV_PORT_LINK_LATENCY_SUP		= 1 << 24,
321	IBV_PORT_CLIENT_REG_SUP			= 1 << 25,
322	IBV_PORT_IP_BASED_GIDS			= 1 << 26
323};
324
325struct ibv_port_attr {
326	enum ibv_port_state	state;
327	enum ibv_mtu		max_mtu;
328	enum ibv_mtu		active_mtu;
329	int			gid_tbl_len;
330	uint32_t		port_cap_flags;
331	uint32_t		max_msg_sz;
332	uint32_t		bad_pkey_cntr;
333	uint32_t		qkey_viol_cntr;
334	uint16_t		pkey_tbl_len;
335	uint16_t		lid;
336	uint16_t		sm_lid;
337	uint8_t			lmc;
338	uint8_t			max_vl_num;
339	uint8_t			sm_sl;
340	uint8_t			subnet_timeout;
341	uint8_t			init_type_reply;
342	uint8_t			active_width;
343	uint8_t			active_speed;
344	uint8_t			phys_state;
345	uint8_t			link_layer;
346	uint8_t			reserved;
347};
348
349enum ibv_event_type {
350	IBV_EVENT_CQ_ERR,
351	IBV_EVENT_QP_FATAL,
352	IBV_EVENT_QP_REQ_ERR,
353	IBV_EVENT_QP_ACCESS_ERR,
354	IBV_EVENT_COMM_EST,
355	IBV_EVENT_SQ_DRAINED,
356	IBV_EVENT_PATH_MIG,
357	IBV_EVENT_PATH_MIG_ERR,
358	IBV_EVENT_DEVICE_FATAL,
359	IBV_EVENT_PORT_ACTIVE,
360	IBV_EVENT_PORT_ERR,
361	IBV_EVENT_LID_CHANGE,
362	IBV_EVENT_PKEY_CHANGE,
363	IBV_EVENT_SM_CHANGE,
364	IBV_EVENT_SRQ_ERR,
365	IBV_EVENT_SRQ_LIMIT_REACHED,
366	IBV_EVENT_QP_LAST_WQE_REACHED,
367	IBV_EVENT_CLIENT_REREGISTER,
368	IBV_EVENT_GID_CHANGE,
369	IBV_EVENT_WQ_FATAL,
370};
371
372struct ibv_async_event {
373	union {
374		struct ibv_cq  *cq;
375		struct ibv_qp  *qp;
376		struct ibv_srq *srq;
377		struct ibv_wq  *wq;
378		int		port_num;
379	} element;
380	enum ibv_event_type	event_type;
381};
382
383enum ibv_wc_status {
384	IBV_WC_SUCCESS,
385	IBV_WC_LOC_LEN_ERR,
386	IBV_WC_LOC_QP_OP_ERR,
387	IBV_WC_LOC_EEC_OP_ERR,
388	IBV_WC_LOC_PROT_ERR,
389	IBV_WC_WR_FLUSH_ERR,
390	IBV_WC_MW_BIND_ERR,
391	IBV_WC_BAD_RESP_ERR,
392	IBV_WC_LOC_ACCESS_ERR,
393	IBV_WC_REM_INV_REQ_ERR,
394	IBV_WC_REM_ACCESS_ERR,
395	IBV_WC_REM_OP_ERR,
396	IBV_WC_RETRY_EXC_ERR,
397	IBV_WC_RNR_RETRY_EXC_ERR,
398	IBV_WC_LOC_RDD_VIOL_ERR,
399	IBV_WC_REM_INV_RD_REQ_ERR,
400	IBV_WC_REM_ABORT_ERR,
401	IBV_WC_INV_EECN_ERR,
402	IBV_WC_INV_EEC_STATE_ERR,
403	IBV_WC_FATAL_ERR,
404	IBV_WC_RESP_TIMEOUT_ERR,
405	IBV_WC_GENERAL_ERR
406};
407const char *ibv_wc_status_str(enum ibv_wc_status status);
408
409enum ibv_wc_opcode {
410	IBV_WC_SEND,
411	IBV_WC_RDMA_WRITE,
412	IBV_WC_RDMA_READ,
413	IBV_WC_COMP_SWAP,
414	IBV_WC_FETCH_ADD,
415	IBV_WC_BIND_MW,
416	IBV_WC_LOCAL_INV,
417	IBV_WC_TSO,
418/*
419 * Set value of IBV_WC_RECV so consumers can test if a completion is a
420 * receive by testing (opcode & IBV_WC_RECV).
421 */
422	IBV_WC_RECV			= 1 << 7,
423	IBV_WC_RECV_RDMA_WITH_IMM
424};
425
426enum {
427	IBV_WC_IP_CSUM_OK_SHIFT	= 2
428};
429
430enum ibv_create_cq_wc_flags {
431	IBV_WC_EX_WITH_BYTE_LEN		= 1 << 0,
432	IBV_WC_EX_WITH_IMM		= 1 << 1,
433	IBV_WC_EX_WITH_QP_NUM		= 1 << 2,
434	IBV_WC_EX_WITH_SRC_QP		= 1 << 3,
435	IBV_WC_EX_WITH_SLID		= 1 << 4,
436	IBV_WC_EX_WITH_SL		= 1 << 5,
437	IBV_WC_EX_WITH_DLID_PATH_BITS	= 1 << 6,
438	IBV_WC_EX_WITH_COMPLETION_TIMESTAMP	= 1 << 7,
439	IBV_WC_EX_WITH_CVLAN		= 1 << 8,
440	IBV_WC_EX_WITH_FLOW_TAG		= 1 << 9,
441};
442
443enum {
444	IBV_WC_STANDARD_FLAGS = IBV_WC_EX_WITH_BYTE_LEN		|
445				 IBV_WC_EX_WITH_IMM		|
446				 IBV_WC_EX_WITH_QP_NUM		|
447				 IBV_WC_EX_WITH_SRC_QP		|
448				 IBV_WC_EX_WITH_SLID		|
449				 IBV_WC_EX_WITH_SL		|
450				 IBV_WC_EX_WITH_DLID_PATH_BITS
451};
452
453enum {
454	IBV_CREATE_CQ_SUP_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
455				IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
456				IBV_WC_EX_WITH_CVLAN |
457				IBV_WC_EX_WITH_FLOW_TAG
458};
459
460enum ibv_wc_flags {
461	IBV_WC_GRH		= 1 << 0,
462	IBV_WC_WITH_IMM		= 1 << 1,
463	IBV_WC_IP_CSUM_OK	= 1 << IBV_WC_IP_CSUM_OK_SHIFT,
464	IBV_WC_WITH_INV         = 1 << 3
465};
466
467struct ibv_wc {
468	uint64_t		wr_id;
469	enum ibv_wc_status	status;
470	enum ibv_wc_opcode	opcode;
471	uint32_t		vendor_err;
472	uint32_t		byte_len;
473	/* When (wc_flags & IBV_WC_WITH_IMM): Immediate data in network byte order.
474	 * When (wc_flags & IBV_WC_WITH_INV): Stores the invalidated rkey.
475	 */
476	union {
477		__be32		imm_data;
478		uint32_t	invalidated_rkey;
479	};
480	uint32_t		qp_num;
481	uint32_t		src_qp;
482	int			wc_flags;
483	uint16_t		pkey_index;
484	uint16_t		slid;
485	uint8_t			sl;
486	uint8_t			dlid_path_bits;
487};
488
489enum ibv_access_flags {
490	IBV_ACCESS_LOCAL_WRITE		= 1,
491	IBV_ACCESS_REMOTE_WRITE		= (1<<1),
492	IBV_ACCESS_REMOTE_READ		= (1<<2),
493	IBV_ACCESS_REMOTE_ATOMIC	= (1<<3),
494	IBV_ACCESS_MW_BIND		= (1<<4),
495	IBV_ACCESS_ZERO_BASED		= (1<<5),
496	IBV_ACCESS_ON_DEMAND		= (1<<6),
497};
498
499struct ibv_mw_bind_info {
500	struct ibv_mr	*mr;
501	uint64_t	 addr;
502	uint64_t	 length;
503	int		 mw_access_flags; /* use ibv_access_flags */
504};
505
506struct ibv_pd {
507	struct ibv_context     *context;
508	uint32_t		handle;
509};
510
511enum ibv_xrcd_init_attr_mask {
512	IBV_XRCD_INIT_ATTR_FD	    = 1 << 0,
513	IBV_XRCD_INIT_ATTR_OFLAGS   = 1 << 1,
514	IBV_XRCD_INIT_ATTR_RESERVED = 1 << 2
515};
516
517struct ibv_xrcd_init_attr {
518	uint32_t comp_mask;
519	int	 fd;
520	int	 oflags;
521};
522
523struct ibv_xrcd {
524	struct ibv_context     *context;
525};
526
527enum ibv_rereg_mr_flags {
528	IBV_REREG_MR_CHANGE_TRANSLATION	= (1 << 0),
529	IBV_REREG_MR_CHANGE_PD		= (1 << 1),
530	IBV_REREG_MR_CHANGE_ACCESS	= (1 << 2),
531	IBV_REREG_MR_KEEP_VALID		= (1 << 3),
532	IBV_REREG_MR_FLAGS_SUPPORTED	= ((IBV_REREG_MR_KEEP_VALID << 1) - 1)
533};
534
535struct ibv_mr {
536	struct ibv_context     *context;
537	struct ibv_pd	       *pd;
538	void		       *addr;
539	size_t			length;
540	uint32_t		handle;
541	uint32_t		lkey;
542	uint32_t		rkey;
543};
544
545enum ibv_mw_type {
546	IBV_MW_TYPE_1			= 1,
547	IBV_MW_TYPE_2			= 2
548};
549
550struct ibv_mw {
551	struct ibv_context     *context;
552	struct ibv_pd	       *pd;
553	uint32_t		rkey;
554	uint32_t		handle;
555	enum ibv_mw_type	type;
556};
557
558struct ibv_global_route {
559	union ibv_gid		dgid;
560	uint32_t		flow_label;
561	uint8_t			sgid_index;
562	uint8_t			hop_limit;
563	uint8_t			traffic_class;
564};
565
566struct ibv_grh {
567	__be32			version_tclass_flow;
568	__be16			paylen;
569	uint8_t			next_hdr;
570	uint8_t			hop_limit;
571	union ibv_gid		sgid;
572	union ibv_gid		dgid;
573};
574
575enum ibv_rate {
576	IBV_RATE_MAX      = 0,
577	IBV_RATE_2_5_GBPS = 2,
578	IBV_RATE_5_GBPS   = 5,
579	IBV_RATE_10_GBPS  = 3,
580	IBV_RATE_20_GBPS  = 6,
581	IBV_RATE_30_GBPS  = 4,
582	IBV_RATE_40_GBPS  = 7,
583	IBV_RATE_60_GBPS  = 8,
584	IBV_RATE_80_GBPS  = 9,
585	IBV_RATE_120_GBPS = 10,
586	IBV_RATE_14_GBPS  = 11,
587	IBV_RATE_56_GBPS  = 12,
588	IBV_RATE_112_GBPS = 13,
589	IBV_RATE_168_GBPS = 14,
590	IBV_RATE_25_GBPS  = 15,
591	IBV_RATE_100_GBPS = 16,
592	IBV_RATE_200_GBPS = 17,
593	IBV_RATE_300_GBPS = 18,
594	IBV_RATE_28_GBPS  = 19,
595	IBV_RATE_50_GBPS  = 20,
596	IBV_RATE_400_GBPS = 21,
597	IBV_RATE_600_GBPS = 22,
598};
599
600/**
601 * ibv_rate_to_mult - Convert the IB rate enum to a multiple of the
602 * base rate of 2.5 Gbit/sec.  For example, IBV_RATE_5_GBPS will be
603 * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
604 * @rate: rate to convert.
605 */
606int  __attribute_const ibv_rate_to_mult(enum ibv_rate rate);
607
608/**
609 * mult_to_ibv_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate enum.
610 * @mult: multiple to convert.
611 */
612enum ibv_rate __attribute_const mult_to_ibv_rate(int mult);
613
614/**
615 * ibv_rate_to_mbps - Convert the IB rate enum to Mbit/sec.
616 * For example, IBV_RATE_5_GBPS will return the value 5000.
617 * @rate: rate to convert.
618 */
619int __attribute_const ibv_rate_to_mbps(enum ibv_rate rate);
620
621/**
622 * mbps_to_ibv_rate - Convert a Mbit/sec value to an IB rate enum.
623 * @mbps: value to convert.
624 */
625enum ibv_rate __attribute_const mbps_to_ibv_rate(int mbps) __attribute_const;
626
627struct ibv_ah_attr {
628	struct ibv_global_route	grh;
629	uint16_t		dlid;
630	uint8_t			sl;
631	uint8_t			src_path_bits;
632	uint8_t			static_rate;
633	uint8_t			is_global;
634	uint8_t			port_num;
635};
636
637enum ibv_srq_attr_mask {
638	IBV_SRQ_MAX_WR	= 1 << 0,
639	IBV_SRQ_LIMIT	= 1 << 1
640};
641
642struct ibv_srq_attr {
643	uint32_t		max_wr;
644	uint32_t		max_sge;
645	uint32_t		srq_limit;
646};
647
648struct ibv_srq_init_attr {
649	void		       *srq_context;
650	struct ibv_srq_attr	attr;
651};
652
653enum ibv_srq_type {
654	IBV_SRQT_BASIC,
655	IBV_SRQT_XRC
656};
657
658enum ibv_srq_init_attr_mask {
659	IBV_SRQ_INIT_ATTR_TYPE		= 1 << 0,
660	IBV_SRQ_INIT_ATTR_PD		= 1 << 1,
661	IBV_SRQ_INIT_ATTR_XRCD		= 1 << 2,
662	IBV_SRQ_INIT_ATTR_CQ		= 1 << 3,
663	IBV_SRQ_INIT_ATTR_RESERVED	= 1 << 4
664};
665
666struct ibv_srq_init_attr_ex {
667	void		       *srq_context;
668	struct ibv_srq_attr	attr;
669
670	uint32_t		comp_mask;
671	enum ibv_srq_type	srq_type;
672	struct ibv_pd	       *pd;
673	struct ibv_xrcd	       *xrcd;
674	struct ibv_cq	       *cq;
675};
676
677enum ibv_wq_type {
678	IBV_WQT_RQ
679};
680
681enum ibv_wq_init_attr_mask {
682	IBV_WQ_INIT_ATTR_FLAGS		= 1 << 0,
683	IBV_WQ_INIT_ATTR_RESERVED	= 1 << 1,
684};
685
686enum ibv_wq_flags {
687	IBV_WQ_FLAGS_CVLAN_STRIPPING		= 1 << 0,
688	IBV_WQ_FLAGS_SCATTER_FCS		= 1 << 1,
689	IBV_WQ_FLAGS_RESERVED			= 1 << 2,
690};
691
692struct ibv_wq_init_attr {
693	void		       *wq_context;
694	enum ibv_wq_type	wq_type;
695	uint32_t		max_wr;
696	uint32_t		max_sge;
697	struct	ibv_pd	       *pd;
698	struct	ibv_cq	       *cq;
699	uint32_t		comp_mask; /* Use ibv_wq_init_attr_mask */
700	uint32_t		create_flags; /* use ibv_wq_flags */
701};
702
703enum ibv_wq_state {
704	IBV_WQS_RESET,
705	IBV_WQS_RDY,
706	IBV_WQS_ERR,
707	IBV_WQS_UNKNOWN
708};
709
710enum ibv_wq_attr_mask {
711	IBV_WQ_ATTR_STATE	= 1 << 0,
712	IBV_WQ_ATTR_CURR_STATE	= 1 << 1,
713	IBV_WQ_ATTR_FLAGS	= 1 << 2,
714	IBV_WQ_ATTR_RESERVED	= 1 << 3,
715};
716
717struct ibv_wq_attr {
718	/* enum ibv_wq_attr_mask */
719	uint32_t		attr_mask;
720	/* Move the WQ to this state */
721	enum	ibv_wq_state	wq_state;
722	/* Assume this is the current WQ state */
723	enum	ibv_wq_state	curr_wq_state;
724	uint32_t		flags; /* Use ibv_wq_flags */
725	uint32_t		flags_mask; /* Use ibv_wq_flags */
726};
727
728/*
729 * Receive Work Queue Indirection Table.
730 * It's used in order to distribute incoming packets between different
731 * Receive Work Queues. Associating Receive WQs with different CPU cores
732 * allows to workload the traffic between different CPU cores.
733 * The Indirection Table can contain only WQs of type IBV_WQT_RQ.
734*/
735struct ibv_rwq_ind_table {
736	struct ibv_context *context;
737	int ind_tbl_handle;
738	int ind_tbl_num;
739	uint32_t comp_mask;
740};
741
742enum ibv_ind_table_init_attr_mask {
743	IBV_CREATE_IND_TABLE_RESERVED = (1 << 0)
744};
745
746/*
747 * Receive Work Queue Indirection Table attributes
748 */
749struct ibv_rwq_ind_table_init_attr {
750	uint32_t log_ind_tbl_size;
751	/* Each entry is a pointer to a Receive Work Queue */
752	struct ibv_wq **ind_tbl;
753	uint32_t comp_mask;
754};
755
756enum ibv_qp_type {
757	IBV_QPT_RC = 2,
758	IBV_QPT_UC,
759	IBV_QPT_UD,
760	IBV_QPT_RAW_PACKET = 8,
761	IBV_QPT_XRC_SEND = 9,
762	IBV_QPT_XRC_RECV
763};
764
765struct ibv_qp_cap {
766	uint32_t		max_send_wr;
767	uint32_t		max_recv_wr;
768	uint32_t		max_send_sge;
769	uint32_t		max_recv_sge;
770	uint32_t		max_inline_data;
771};
772
773struct ibv_qp_init_attr {
774	void		       *qp_context;
775	struct ibv_cq	       *send_cq;
776	struct ibv_cq	       *recv_cq;
777	struct ibv_srq	       *srq;
778	struct ibv_qp_cap	cap;
779	enum ibv_qp_type	qp_type;
780	int			sq_sig_all;
781};
782
783enum ibv_qp_init_attr_mask {
784	IBV_QP_INIT_ATTR_PD		= 1 << 0,
785	IBV_QP_INIT_ATTR_XRCD		= 1 << 1,
786	IBV_QP_INIT_ATTR_CREATE_FLAGS	= 1 << 2,
787	IBV_QP_INIT_ATTR_MAX_TSO_HEADER = 1 << 3,
788	IBV_QP_INIT_ATTR_IND_TABLE	= 1 << 4,
789	IBV_QP_INIT_ATTR_RX_HASH	= 1 << 5,
790	IBV_QP_INIT_ATTR_RESERVED	= 1 << 6
791};
792
793enum ibv_qp_create_flags {
794	IBV_QP_CREATE_BLOCK_SELF_MCAST_LB	= 1 << 1,
795	IBV_QP_CREATE_SCATTER_FCS		= 1 << 8,
796	IBV_QP_CREATE_CVLAN_STRIPPING		= 1 << 9,
797};
798
799struct ibv_rx_hash_conf {
800	/* enum ibv_rx_hash_function_flags */
801	uint8_t	rx_hash_function;
802	uint8_t	rx_hash_key_len;
803	uint8_t	*rx_hash_key;
804	/* enum ibv_rx_hash_fields */
805	uint64_t	rx_hash_fields_mask;
806};
807
808struct ibv_qp_init_attr_ex {
809	void		       *qp_context;
810	struct ibv_cq	       *send_cq;
811	struct ibv_cq	       *recv_cq;
812	struct ibv_srq	       *srq;
813	struct ibv_qp_cap	cap;
814	enum ibv_qp_type	qp_type;
815	int			sq_sig_all;
816
817	uint32_t		comp_mask;
818	struct ibv_pd	       *pd;
819	struct ibv_xrcd	       *xrcd;
820	uint32_t                create_flags;
821	uint16_t		max_tso_header;
822	struct ibv_rwq_ind_table       *rwq_ind_tbl;
823	struct ibv_rx_hash_conf	rx_hash_conf;
824};
825
826enum ibv_qp_open_attr_mask {
827	IBV_QP_OPEN_ATTR_NUM		= 1 << 0,
828	IBV_QP_OPEN_ATTR_XRCD	        = 1 << 1,
829	IBV_QP_OPEN_ATTR_CONTEXT	= 1 << 2,
830	IBV_QP_OPEN_ATTR_TYPE		= 1 << 3,
831	IBV_QP_OPEN_ATTR_RESERVED	= 1 << 4
832};
833
834struct ibv_qp_open_attr {
835	uint32_t		comp_mask;
836	uint32_t		qp_num;
837	struct ibv_xrcd        *xrcd;
838	void		       *qp_context;
839	enum ibv_qp_type	qp_type;
840};
841
842enum ibv_qp_attr_mask {
843	IBV_QP_STATE			= 1 << 	0,
844	IBV_QP_CUR_STATE		= 1 << 	1,
845	IBV_QP_EN_SQD_ASYNC_NOTIFY	= 1 << 	2,
846	IBV_QP_ACCESS_FLAGS		= 1 << 	3,
847	IBV_QP_PKEY_INDEX		= 1 << 	4,
848	IBV_QP_PORT			= 1 << 	5,
849	IBV_QP_QKEY			= 1 << 	6,
850	IBV_QP_AV			= 1 << 	7,
851	IBV_QP_PATH_MTU			= 1 << 	8,
852	IBV_QP_TIMEOUT			= 1 << 	9,
853	IBV_QP_RETRY_CNT		= 1 << 10,
854	IBV_QP_RNR_RETRY		= 1 << 11,
855	IBV_QP_RQ_PSN			= 1 << 12,
856	IBV_QP_MAX_QP_RD_ATOMIC		= 1 << 13,
857	IBV_QP_ALT_PATH			= 1 << 14,
858	IBV_QP_MIN_RNR_TIMER		= 1 << 15,
859	IBV_QP_SQ_PSN			= 1 << 16,
860	IBV_QP_MAX_DEST_RD_ATOMIC	= 1 << 17,
861	IBV_QP_PATH_MIG_STATE		= 1 << 18,
862	IBV_QP_CAP			= 1 << 19,
863	IBV_QP_DEST_QPN			= 1 << 20,
864	IBV_QP_RATE_LIMIT		= 1 << 25,
865};
866
867enum ibv_qp_state {
868	IBV_QPS_RESET,
869	IBV_QPS_INIT,
870	IBV_QPS_RTR,
871	IBV_QPS_RTS,
872	IBV_QPS_SQD,
873	IBV_QPS_SQE,
874	IBV_QPS_ERR,
875	IBV_QPS_UNKNOWN
876};
877
878enum ibv_mig_state {
879	IBV_MIG_MIGRATED,
880	IBV_MIG_REARM,
881	IBV_MIG_ARMED
882};
883
884struct ibv_qp_attr {
885	enum ibv_qp_state	qp_state;
886	enum ibv_qp_state	cur_qp_state;
887	enum ibv_mtu		path_mtu;
888	enum ibv_mig_state	path_mig_state;
889	uint32_t		qkey;
890	uint32_t		rq_psn;
891	uint32_t		sq_psn;
892	uint32_t		dest_qp_num;
893	int			qp_access_flags;
894	struct ibv_qp_cap	cap;
895	struct ibv_ah_attr	ah_attr;
896	struct ibv_ah_attr	alt_ah_attr;
897	uint16_t		pkey_index;
898	uint16_t		alt_pkey_index;
899	uint8_t			en_sqd_async_notify;
900	uint8_t			sq_draining;
901	uint8_t			max_rd_atomic;
902	uint8_t			max_dest_rd_atomic;
903	uint8_t			min_rnr_timer;
904	uint8_t			port_num;
905	uint8_t			timeout;
906	uint8_t			retry_cnt;
907	uint8_t			rnr_retry;
908	uint8_t			alt_port_num;
909	uint8_t			alt_timeout;
910	uint32_t		rate_limit;
911};
912
913enum ibv_wr_opcode {
914	IBV_WR_RDMA_WRITE,
915	IBV_WR_RDMA_WRITE_WITH_IMM,
916	IBV_WR_SEND,
917	IBV_WR_SEND_WITH_IMM,
918	IBV_WR_RDMA_READ,
919	IBV_WR_ATOMIC_CMP_AND_SWP,
920	IBV_WR_ATOMIC_FETCH_AND_ADD,
921	IBV_WR_LOCAL_INV,
922	IBV_WR_BIND_MW,
923	IBV_WR_SEND_WITH_INV,
924	IBV_WR_TSO,
925};
926
927enum ibv_send_flags {
928	IBV_SEND_FENCE		= 1 << 0,
929	IBV_SEND_SIGNALED	= 1 << 1,
930	IBV_SEND_SOLICITED	= 1 << 2,
931	IBV_SEND_INLINE		= 1 << 3,
932	IBV_SEND_IP_CSUM	= 1 << 4
933};
934
935struct ibv_sge {
936	uint64_t		addr;
937	uint32_t		length;
938	uint32_t		lkey;
939};
940
941struct ibv_send_wr {
942	uint64_t		wr_id;
943	struct ibv_send_wr     *next;
944	struct ibv_sge	       *sg_list;
945	int			num_sge;
946	enum ibv_wr_opcode	opcode;
947	int			send_flags;
948	__be32			imm_data;
949	union {
950		struct {
951			uint64_t	remote_addr;
952			uint32_t	rkey;
953		} rdma;
954		struct {
955			uint64_t	remote_addr;
956			uint64_t	compare_add;
957			uint64_t	swap;
958			uint32_t	rkey;
959		} atomic;
960		struct {
961			struct ibv_ah  *ah;
962			uint32_t	remote_qpn;
963			uint32_t	remote_qkey;
964		} ud;
965	} wr;
966	union {
967		struct {
968			uint32_t    remote_srqn;
969		} xrc;
970	} qp_type;
971	union {
972		struct {
973			struct ibv_mw	*mw;
974			uint32_t		rkey;
975			struct ibv_mw_bind_info	bind_info;
976		} bind_mw;
977		struct {
978			void		       *hdr;
979			uint16_t		hdr_sz;
980			uint16_t		mss;
981		} tso;
982	};
983};
984
985struct ibv_recv_wr {
986	uint64_t		wr_id;
987	struct ibv_recv_wr     *next;
988	struct ibv_sge	       *sg_list;
989	int			num_sge;
990};
991
992struct ibv_mw_bind {
993	uint64_t		wr_id;
994	int			send_flags;
995	struct ibv_mw_bind_info bind_info;
996};
997
998struct ibv_srq {
999	struct ibv_context     *context;
1000	void		       *srq_context;
1001	struct ibv_pd	       *pd;
1002	uint32_t		handle;
1003
1004	pthread_mutex_t		mutex;
1005	pthread_cond_t		cond;
1006	uint32_t		events_completed;
1007};
1008
1009/*
1010 * Work Queue. QP can be created without internal WQs "packaged" inside it,
1011 * this QP can be configured to use "external" WQ object as its
1012 * receive/send queue.
1013 * WQ associated (many to one) with Completion Queue it owns WQ properties
1014 * (PD, WQ size etc).
1015 * WQ of type IBV_WQT_RQ:
1016 * - Contains receive WQEs, in this case its PD serves as scatter as well.
1017 * - Exposes post receive function to be used to post a list of work
1018 *   requests (WRs) to its receive queue.
1019 */
1020struct ibv_wq {
1021	struct ibv_context     *context;
1022	void		       *wq_context;
1023	struct	ibv_pd	       *pd;
1024	struct	ibv_cq	       *cq;
1025	uint32_t		wq_num;
1026	uint32_t		handle;
1027	enum ibv_wq_state       state;
1028	enum ibv_wq_type	wq_type;
1029	int (*post_recv)(struct ibv_wq *current,
1030			 struct ibv_recv_wr *recv_wr,
1031			 struct ibv_recv_wr **bad_recv_wr);
1032	pthread_mutex_t		mutex;
1033	pthread_cond_t		cond;
1034	uint32_t		events_completed;
1035	uint32_t		comp_mask;
1036};
1037
1038struct ibv_qp {
1039	struct ibv_context     *context;
1040	void		       *qp_context;
1041	struct ibv_pd	       *pd;
1042	struct ibv_cq	       *send_cq;
1043	struct ibv_cq	       *recv_cq;
1044	struct ibv_srq	       *srq;
1045	uint32_t		handle;
1046	uint32_t		qp_num;
1047	enum ibv_qp_state       state;
1048	enum ibv_qp_type	qp_type;
1049
1050	pthread_mutex_t		mutex;
1051	pthread_cond_t		cond;
1052	uint32_t		events_completed;
1053};
1054
1055struct ibv_comp_channel {
1056	struct ibv_context     *context;
1057	int			fd;
1058	int			refcnt;
1059};
1060
1061struct ibv_cq {
1062	struct ibv_context     *context;
1063	struct ibv_comp_channel *channel;
1064	void		       *cq_context;
1065	uint32_t		handle;
1066	int			cqe;
1067
1068	pthread_mutex_t		mutex;
1069	pthread_cond_t		cond;
1070	uint32_t		comp_events_completed;
1071	uint32_t		async_events_completed;
1072};
1073
1074struct ibv_poll_cq_attr {
1075	uint32_t comp_mask;
1076};
1077
1078struct ibv_cq_ex {
1079	struct ibv_context     *context;
1080	struct ibv_comp_channel *channel;
1081	void		       *cq_context;
1082	uint32_t		handle;
1083	int			cqe;
1084
1085	pthread_mutex_t		mutex;
1086	pthread_cond_t		cond;
1087	uint32_t		comp_events_completed;
1088	uint32_t		async_events_completed;
1089
1090	uint32_t		comp_mask;
1091	enum ibv_wc_status status;
1092	uint64_t wr_id;
1093	int (*start_poll)(struct ibv_cq_ex *current,
1094			     struct ibv_poll_cq_attr *attr);
1095	int (*next_poll)(struct ibv_cq_ex *current);
1096	void (*end_poll)(struct ibv_cq_ex *current);
1097	enum ibv_wc_opcode (*read_opcode)(struct ibv_cq_ex *current);
1098	uint32_t (*read_vendor_err)(struct ibv_cq_ex *current);
1099	uint32_t (*read_byte_len)(struct ibv_cq_ex *current);
1100	uint32_t (*read_imm_data)(struct ibv_cq_ex *current);
1101	uint32_t (*read_qp_num)(struct ibv_cq_ex *current);
1102	uint32_t (*read_src_qp)(struct ibv_cq_ex *current);
1103	int (*read_wc_flags)(struct ibv_cq_ex *current);
1104	uint32_t (*read_slid)(struct ibv_cq_ex *current);
1105	uint8_t (*read_sl)(struct ibv_cq_ex *current);
1106	uint8_t (*read_dlid_path_bits)(struct ibv_cq_ex *current);
1107	uint64_t (*read_completion_ts)(struct ibv_cq_ex *current);
1108	uint16_t (*read_cvlan)(struct ibv_cq_ex *current);
1109	uint32_t (*read_flow_tag)(struct ibv_cq_ex *current);
1110};
1111
1112static inline struct ibv_cq *ibv_cq_ex_to_cq(struct ibv_cq_ex *cq)
1113{
1114	return (struct ibv_cq *)cq;
1115}
1116
1117static inline int ibv_start_poll(struct ibv_cq_ex *cq,
1118				    struct ibv_poll_cq_attr *attr)
1119{
1120	return cq->start_poll(cq, attr);
1121}
1122
1123static inline int ibv_next_poll(struct ibv_cq_ex *cq)
1124{
1125	return cq->next_poll(cq);
1126}
1127
1128static inline void ibv_end_poll(struct ibv_cq_ex *cq)
1129{
1130	cq->end_poll(cq);
1131}
1132
1133static inline enum ibv_wc_opcode ibv_wc_read_opcode(struct ibv_cq_ex *cq)
1134{
1135	return cq->read_opcode(cq);
1136}
1137
1138static inline uint32_t ibv_wc_read_vendor_err(struct ibv_cq_ex *cq)
1139{
1140	return cq->read_vendor_err(cq);
1141}
1142
1143static inline uint32_t ibv_wc_read_byte_len(struct ibv_cq_ex *cq)
1144{
1145	return cq->read_byte_len(cq);
1146}
1147
1148static inline uint32_t ibv_wc_read_imm_data(struct ibv_cq_ex *cq)
1149{
1150	return cq->read_imm_data(cq);
1151}
1152
1153static inline uint32_t ibv_wc_read_qp_num(struct ibv_cq_ex *cq)
1154{
1155	return cq->read_qp_num(cq);
1156}
1157
1158static inline uint32_t ibv_wc_read_src_qp(struct ibv_cq_ex *cq)
1159{
1160	return cq->read_src_qp(cq);
1161}
1162
1163static inline int ibv_wc_read_wc_flags(struct ibv_cq_ex *cq)
1164{
1165	return cq->read_wc_flags(cq);
1166}
1167
1168static inline uint32_t ibv_wc_read_slid(struct ibv_cq_ex *cq)
1169{
1170	return cq->read_slid(cq);
1171}
1172
1173static inline uint8_t ibv_wc_read_sl(struct ibv_cq_ex *cq)
1174{
1175	return cq->read_sl(cq);
1176}
1177
1178static inline uint8_t ibv_wc_read_dlid_path_bits(struct ibv_cq_ex *cq)
1179{
1180	return cq->read_dlid_path_bits(cq);
1181}
1182
1183static inline uint64_t ibv_wc_read_completion_ts(struct ibv_cq_ex *cq)
1184{
1185	return cq->read_completion_ts(cq);
1186}
1187
1188static inline uint16_t ibv_wc_read_cvlan(struct ibv_cq_ex *cq)
1189{
1190	return cq->read_cvlan(cq);
1191}
1192
1193static inline uint32_t ibv_wc_read_flow_tag(struct ibv_cq_ex *cq)
1194{
1195	return cq->read_flow_tag(cq);
1196}
1197
1198static inline int ibv_post_wq_recv(struct ibv_wq *wq,
1199				   struct ibv_recv_wr *recv_wr,
1200				   struct ibv_recv_wr **bad_recv_wr)
1201{
1202	return wq->post_recv(wq, recv_wr, bad_recv_wr);
1203}
1204
1205struct ibv_ah {
1206	struct ibv_context     *context;
1207	struct ibv_pd	       *pd;
1208	uint32_t		handle;
1209};
1210
1211enum ibv_flow_flags {
1212	IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1 << 0,
1213	IBV_FLOW_ATTR_FLAGS_DONT_TRAP = 1 << 1,
1214};
1215
1216enum ibv_flow_attr_type {
1217	/* steering according to rule specifications */
1218	IBV_FLOW_ATTR_NORMAL		= 0x0,
1219	/* default unicast and multicast rule -
1220	 * receive all Eth traffic which isn't steered to any QP
1221	 */
1222	IBV_FLOW_ATTR_ALL_DEFAULT	= 0x1,
1223	/* default multicast rule -
1224	 * receive all Eth multicast traffic which isn't steered to any QP
1225	 */
1226	IBV_FLOW_ATTR_MC_DEFAULT	= 0x2,
1227	/* sniffer rule - receive all port traffic */
1228	IBV_FLOW_ATTR_SNIFFER		= 0x3,
1229};
1230
1231enum ibv_flow_spec_type {
1232	IBV_FLOW_SPEC_ETH		= 0x20,
1233	IBV_FLOW_SPEC_IPV4		= 0x30,
1234	IBV_FLOW_SPEC_IPV6		= 0x31,
1235	IBV_FLOW_SPEC_IPV4_EXT		= 0x32,
1236	IBV_FLOW_SPEC_TCP		= 0x40,
1237	IBV_FLOW_SPEC_UDP		= 0x41,
1238	IBV_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
1239	IBV_FLOW_SPEC_INNER		= 0x100,
1240	IBV_FLOW_SPEC_ACTION_TAG	= 0x1000,
1241	IBV_FLOW_SPEC_ACTION_DROP	= 0x1001,
1242};
1243
1244struct ibv_flow_eth_filter {
1245	uint8_t		dst_mac[6];
1246	uint8_t		src_mac[6];
1247	uint16_t	ether_type;
1248	/*
1249	 * same layout as 802.1q: prio 3, cfi 1, vlan id 12
1250	 */
1251	uint16_t	vlan_tag;
1252};
1253
1254struct ibv_flow_spec_eth {
1255	enum ibv_flow_spec_type  type;
1256	uint16_t  size;
1257	struct ibv_flow_eth_filter val;
1258	struct ibv_flow_eth_filter mask;
1259};
1260
1261struct ibv_flow_ipv4_filter {
1262	uint32_t src_ip;
1263	uint32_t dst_ip;
1264};
1265
1266struct ibv_flow_spec_ipv4 {
1267	enum ibv_flow_spec_type  type;
1268	uint16_t  size;
1269	struct ibv_flow_ipv4_filter val;
1270	struct ibv_flow_ipv4_filter mask;
1271};
1272
1273struct ibv_flow_ipv4_ext_filter {
1274	uint32_t src_ip;
1275	uint32_t dst_ip;
1276	uint8_t  proto;
1277	uint8_t  tos;
1278	uint8_t  ttl;
1279	uint8_t  flags;
1280};
1281
1282struct ibv_flow_spec_ipv4_ext {
1283	enum ibv_flow_spec_type  type;
1284	uint16_t  size;
1285	struct ibv_flow_ipv4_ext_filter val;
1286	struct ibv_flow_ipv4_ext_filter mask;
1287};
1288
1289struct ibv_flow_ipv6_filter {
1290	uint8_t  src_ip[16];
1291	uint8_t  dst_ip[16];
1292	uint32_t flow_label;
1293	uint8_t  next_hdr;
1294	uint8_t  traffic_class;
1295	uint8_t  hop_limit;
1296};
1297
1298struct ibv_flow_spec_ipv6 {
1299	enum ibv_flow_spec_type  type;
1300	uint16_t  size;
1301	struct ibv_flow_ipv6_filter val;
1302	struct ibv_flow_ipv6_filter mask;
1303};
1304
1305struct ibv_flow_tcp_udp_filter {
1306	uint16_t dst_port;
1307	uint16_t src_port;
1308};
1309
1310struct ibv_flow_spec_tcp_udp {
1311	enum ibv_flow_spec_type  type;
1312	uint16_t  size;
1313	struct ibv_flow_tcp_udp_filter val;
1314	struct ibv_flow_tcp_udp_filter mask;
1315};
1316
1317struct ibv_flow_tunnel_filter {
1318	uint32_t tunnel_id;
1319};
1320
1321struct ibv_flow_spec_tunnel {
1322	enum ibv_flow_spec_type  type;
1323	uint16_t  size;
1324	struct ibv_flow_tunnel_filter val;
1325	struct ibv_flow_tunnel_filter mask;
1326};
1327
1328struct ibv_flow_spec_action_tag {
1329	enum ibv_flow_spec_type  type;
1330	uint16_t  size;
1331	uint32_t  tag_id;
1332};
1333
1334struct ibv_flow_spec_action_drop {
1335	enum ibv_flow_spec_type  type;
1336	uint16_t  size;
1337};
1338
1339struct ibv_flow_spec {
1340	union {
1341		struct {
1342			enum ibv_flow_spec_type	type;
1343			uint16_t		size;
1344		} hdr;
1345		struct ibv_flow_spec_eth eth;
1346		struct ibv_flow_spec_ipv4 ipv4;
1347		struct ibv_flow_spec_tcp_udp tcp_udp;
1348		struct ibv_flow_spec_ipv4_ext ipv4_ext;
1349		struct ibv_flow_spec_ipv6 ipv6;
1350		struct ibv_flow_spec_tunnel tunnel;
1351		struct ibv_flow_spec_action_tag flow_tag;
1352		struct ibv_flow_spec_action_drop drop;
1353	};
1354};
1355
1356struct ibv_flow_attr {
1357	uint32_t comp_mask;
1358	enum ibv_flow_attr_type type;
1359	uint16_t size;
1360	uint16_t priority;
1361	uint8_t num_of_specs;
1362	uint8_t port;
1363	uint32_t flags;
1364	/* Following are the optional layers according to user request
1365	 * struct ibv_flow_spec_xxx [L2]
1366	 * struct ibv_flow_spec_yyy [L3/L4]
1367	 */
1368};
1369
1370struct ibv_flow {
1371	uint32_t	   comp_mask;
1372	struct ibv_context *context;
1373	uint32_t	   handle;
1374};
1375
1376struct ibv_device;
1377struct ibv_context;
1378
1379/* Obsolete, never used, do not touch */
1380struct _ibv_device_ops {
1381	struct ibv_context *	(*_dummy1)(struct ibv_device *device, int cmd_fd);
1382	void			(*_dummy2)(struct ibv_context *context);
1383};
1384
1385enum {
1386	IBV_SYSFS_NAME_MAX	= 64,
1387	IBV_SYSFS_PATH_MAX	= 256
1388};
1389
1390struct ibv_device {
1391	struct _ibv_device_ops	_ops;
1392	enum ibv_node_type	node_type;
1393	enum ibv_transport_type	transport_type;
1394	/* Name of underlying kernel IB device, eg "mthca0" */
1395	char			name[IBV_SYSFS_NAME_MAX];
1396	/* Name of uverbs device, eg "uverbs0" */
1397	char			dev_name[IBV_SYSFS_NAME_MAX];
1398	/* Path to infiniband_verbs class device in sysfs */
1399	char			dev_path[IBV_SYSFS_PATH_MAX];
1400	/* Path to infiniband class device in sysfs */
1401	char			ibdev_path[IBV_SYSFS_PATH_MAX];
1402};
1403
1404struct ibv_context_ops {
1405	int			(*query_device)(struct ibv_context *context,
1406					      struct ibv_device_attr *device_attr);
1407	int			(*query_port)(struct ibv_context *context, uint8_t port_num,
1408					      struct ibv_port_attr *port_attr);
1409	struct ibv_pd *		(*alloc_pd)(struct ibv_context *context);
1410	int			(*dealloc_pd)(struct ibv_pd *pd);
1411	struct ibv_mr *		(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
1412					  int access);
1413	int			(*rereg_mr)(struct ibv_mr *mr,
1414					    int flags,
1415					    struct ibv_pd *pd, void *addr,
1416					    size_t length,
1417					    int access);
1418	int			(*dereg_mr)(struct ibv_mr *mr);
1419	struct ibv_mw *		(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
1420	int			(*bind_mw)(struct ibv_qp *qp, struct ibv_mw *mw,
1421					   struct ibv_mw_bind *mw_bind);
1422	int			(*dealloc_mw)(struct ibv_mw *mw);
1423	struct ibv_cq *		(*create_cq)(struct ibv_context *context, int cqe,
1424					     struct ibv_comp_channel *channel,
1425					     int comp_vector);
1426	int			(*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc);
1427	int			(*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
1428	void			(*cq_event)(struct ibv_cq *cq);
1429	int			(*resize_cq)(struct ibv_cq *cq, int cqe);
1430	int			(*destroy_cq)(struct ibv_cq *cq);
1431	struct ibv_srq *	(*create_srq)(struct ibv_pd *pd,
1432					      struct ibv_srq_init_attr *srq_init_attr);
1433	int			(*modify_srq)(struct ibv_srq *srq,
1434					      struct ibv_srq_attr *srq_attr,
1435					      int srq_attr_mask);
1436	int			(*query_srq)(struct ibv_srq *srq,
1437					     struct ibv_srq_attr *srq_attr);
1438	int			(*destroy_srq)(struct ibv_srq *srq);
1439	int			(*post_srq_recv)(struct ibv_srq *srq,
1440						 struct ibv_recv_wr *recv_wr,
1441						 struct ibv_recv_wr **bad_recv_wr);
1442	struct ibv_qp *		(*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
1443	int			(*query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1444					    int attr_mask,
1445					    struct ibv_qp_init_attr *init_attr);
1446	int			(*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1447					     int attr_mask);
1448	int			(*destroy_qp)(struct ibv_qp *qp);
1449	int			(*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr,
1450					     struct ibv_send_wr **bad_wr);
1451	int			(*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr,
1452					     struct ibv_recv_wr **bad_wr);
1453	struct ibv_ah *		(*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr);
1454	int			(*destroy_ah)(struct ibv_ah *ah);
1455	int			(*attach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid,
1456						uint16_t lid);
1457	int			(*detach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid,
1458						uint16_t lid);
1459	void			(*async_event)(struct ibv_async_event *event);
1460};
1461
1462struct ibv_context {
1463	struct ibv_device      *device;
1464	struct ibv_context_ops	ops;
1465	int			cmd_fd;
1466	int			async_fd;
1467	int			num_comp_vectors;
1468	pthread_mutex_t		mutex;
1469	void		       *abi_compat;
1470};
1471
1472enum ibv_cq_init_attr_mask {
1473	IBV_CQ_INIT_ATTR_MASK_FLAGS	= 1 << 0,
1474	IBV_CQ_INIT_ATTR_MASK_RESERVED	= 1 << 1
1475};
1476
1477enum ibv_create_cq_attr_flags {
1478	IBV_CREATE_CQ_ATTR_SINGLE_THREADED = 1 << 0,
1479	IBV_CREATE_CQ_ATTR_RESERVED = 1 << 1,
1480};
1481
1482struct ibv_cq_init_attr_ex {
1483	/* Minimum number of entries required for CQ */
1484	uint32_t			cqe;
1485	/* Consumer-supplied context returned for completion events */
1486	void			*cq_context;
1487	/* Completion channel where completion events will be queued.
1488	 * May be NULL if completion events will not be used.
1489	 */
1490	struct ibv_comp_channel *channel;
1491	/* Completion vector used to signal completion events.
1492	 *  Must be < context->num_comp_vectors.
1493	 */
1494	uint32_t			comp_vector;
1495	 /* Or'ed bit of enum ibv_create_cq_wc_flags. */
1496	uint64_t		wc_flags;
1497	/* compatibility mask (extended verb). Or'd flags of
1498	 * enum ibv_cq_init_attr_mask
1499	 */
1500	uint32_t		comp_mask;
1501	/* create cq attr flags - one or more flags from
1502	 * enum ibv_create_cq_attr_flags
1503	 */
1504	uint32_t		flags;
1505};
1506
1507enum ibv_values_mask {
1508	IBV_VALUES_MASK_RAW_CLOCK	= 1 << 0,
1509	IBV_VALUES_MASK_RESERVED	= 1 << 1
1510};
1511
1512struct ibv_values_ex {
1513	uint32_t	comp_mask;
1514	struct timespec raw_clock;
1515};
1516
1517enum verbs_context_mask {
1518	VERBS_CONTEXT_XRCD	= 1 << 0,
1519	VERBS_CONTEXT_SRQ	= 1 << 1,
1520	VERBS_CONTEXT_QP	= 1 << 2,
1521	VERBS_CONTEXT_CREATE_FLOW = 1 << 3,
1522	VERBS_CONTEXT_DESTROY_FLOW = 1 << 4,
1523	VERBS_CONTEXT_RESERVED	= 1 << 5
1524};
1525
1526struct verbs_context {
1527	/*  "grows up" - new fields go here */
1528	int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table);
1529	struct ibv_rwq_ind_table *(*create_rwq_ind_table)(struct ibv_context *context,
1530							  struct ibv_rwq_ind_table_init_attr *init_attr);
1531	int (*destroy_wq)(struct ibv_wq *wq);
1532	int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr);
1533	struct ibv_wq * (*create_wq)(struct ibv_context *context,
1534				     struct ibv_wq_init_attr *wq_init_attr);
1535	int (*query_rt_values)(struct ibv_context *context,
1536			       struct ibv_values_ex *values);
1537	struct ibv_cq_ex *(*create_cq_ex)(struct ibv_context *context,
1538					  struct ibv_cq_init_attr_ex *init_attr);
1539	struct verbs_ex_private *priv;
1540	int (*query_device_ex)(struct ibv_context *context,
1541			       const struct ibv_query_device_ex_input *input,
1542			       struct ibv_device_attr_ex *attr,
1543			       size_t attr_size);
1544	int (*ibv_destroy_flow) (struct ibv_flow *flow);
1545	void (*ABI_placeholder2) (void); /* DO NOT COPY THIS GARBAGE */
1546	struct ibv_flow * (*ibv_create_flow) (struct ibv_qp *qp,
1547					      struct ibv_flow_attr *flow_attr);
1548	void (*ABI_placeholder1) (void); /* DO NOT COPY THIS GARBAGE */
1549	struct ibv_qp *(*open_qp)(struct ibv_context *context,
1550			struct ibv_qp_open_attr *attr);
1551	struct ibv_qp *(*create_qp_ex)(struct ibv_context *context,
1552			struct ibv_qp_init_attr_ex *qp_init_attr_ex);
1553	int (*get_srq_num)(struct ibv_srq *srq, uint32_t *srq_num);
1554	struct ibv_srq *	(*create_srq_ex)(struct ibv_context *context,
1555						 struct ibv_srq_init_attr_ex *srq_init_attr_ex);
1556	struct ibv_xrcd *	(*open_xrcd)(struct ibv_context *context,
1557					     struct ibv_xrcd_init_attr *xrcd_init_attr);
1558	int			(*close_xrcd)(struct ibv_xrcd *xrcd);
1559	uint64_t has_comp_mask;
1560	size_t   sz;			/* Must be immediately before struct ibv_context */
1561	struct ibv_context context;	/* Must be last field in the struct */
1562};
1563
1564static inline struct verbs_context *verbs_get_ctx(struct ibv_context *ctx)
1565{
1566	return (ctx->abi_compat != __VERBS_ABI_IS_EXTENDED) ?
1567		NULL : container_of(ctx, struct verbs_context, context);
1568}
1569
1570#define verbs_get_ctx_op(ctx, op) ({ \
1571	struct verbs_context *__vctx = verbs_get_ctx(ctx); \
1572	(!__vctx || (__vctx->sz < sizeof(*__vctx) - offsetof(struct verbs_context, op)) || \
1573	 !__vctx->op) ? NULL : __vctx; })
1574
1575#define verbs_set_ctx_op(_vctx, op, ptr) ({ \
1576	struct verbs_context *vctx = _vctx; \
1577	if (vctx && (vctx->sz >= sizeof(*vctx) - offsetof(struct verbs_context, op))) \
1578		vctx->op = ptr; })
1579
1580/**
1581 * ibv_get_device_list - Get list of IB devices currently available
1582 * @num_devices: optional.  if non-NULL, set to the number of devices
1583 * returned in the array.
1584 *
1585 * Return a NULL-terminated array of IB devices.  The array can be
1586 * released with ibv_free_device_list().
1587 */
1588struct ibv_device **ibv_get_device_list(int *num_devices);
1589
1590/**
1591 * ibv_free_device_list - Free list from ibv_get_device_list()
1592 *
1593 * Free an array of devices returned from ibv_get_device_list().  Once
1594 * the array is freed, pointers to devices that were not opened with
1595 * ibv_open_device() are no longer valid.  Client code must open all
1596 * devices it intends to use before calling ibv_free_device_list().
1597 */
1598void ibv_free_device_list(struct ibv_device **list);
1599
1600/**
1601 * ibv_get_device_name - Return kernel device name
1602 */
1603const char *ibv_get_device_name(struct ibv_device *device);
1604
1605/**
1606 * ibv_get_device_guid - Return device's node GUID
1607 */
1608__be64 ibv_get_device_guid(struct ibv_device *device);
1609
1610/**
1611 * ibv_open_device - Initialize device for use
1612 */
1613struct ibv_context *ibv_open_device(struct ibv_device *device);
1614
1615/**
1616 * ibv_close_device - Release device
1617 */
1618int ibv_close_device(struct ibv_context *context);
1619
1620/**
1621 * ibv_get_async_event - Get next async event
1622 * @event: Pointer to use to return async event
1623 *
1624 * All async events returned by ibv_get_async_event() must eventually
1625 * be acknowledged with ibv_ack_async_event().
1626 */
1627int ibv_get_async_event(struct ibv_context *context,
1628			struct ibv_async_event *event);
1629
1630/**
1631 * ibv_ack_async_event - Acknowledge an async event
1632 * @event: Event to be acknowledged.
1633 *
1634 * All async events which are returned by ibv_get_async_event() must
1635 * be acknowledged.  To avoid races, destroying an object (CQ, SRQ or
1636 * QP) will wait for all affiliated events to be acknowledged, so
1637 * there should be a one-to-one correspondence between acks and
1638 * successful gets.
1639 */
1640void ibv_ack_async_event(struct ibv_async_event *event);
1641
1642/**
1643 * ibv_query_device - Get device properties
1644 */
1645int ibv_query_device(struct ibv_context *context,
1646		     struct ibv_device_attr *device_attr);
1647
1648/**
1649 * ibv_query_port - Get port properties
1650 */
1651int ibv_query_port(struct ibv_context *context, uint8_t port_num,
1652		   struct ibv_port_attr *port_attr);
1653
1654static inline int ___ibv_query_port(struct ibv_context *context,
1655				    uint8_t port_num,
1656				    struct ibv_port_attr *port_attr)
1657{
1658	/* For compatibility when running with old libibverbs */
1659	port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
1660	port_attr->reserved   = 0;
1661
1662	return ibv_query_port(context, port_num, port_attr);
1663}
1664
1665#define ibv_query_port(context, port_num, port_attr) \
1666	___ibv_query_port(context, port_num, port_attr)
1667
1668/**
1669 * ibv_query_gid - Get a GID table entry
1670 */
1671int ibv_query_gid(struct ibv_context *context, uint8_t port_num,
1672		  int index, union ibv_gid *gid);
1673
1674/**
1675 * ibv_query_pkey - Get a P_Key table entry
1676 */
1677int ibv_query_pkey(struct ibv_context *context, uint8_t port_num,
1678		   int index, __be16 *pkey);
1679
1680/**
1681 * ibv_alloc_pd - Allocate a protection domain
1682 */
1683struct ibv_pd *ibv_alloc_pd(struct ibv_context *context);
1684
1685/**
1686 * ibv_dealloc_pd - Free a protection domain
1687 */
1688int ibv_dealloc_pd(struct ibv_pd *pd);
1689
1690static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp,
1691					       struct ibv_flow_attr *flow)
1692{
1693	struct verbs_context *vctx = verbs_get_ctx_op(qp->context,
1694						      ibv_create_flow);
1695	if (!vctx || !vctx->ibv_create_flow) {
1696		errno = ENOSYS;
1697		return NULL;
1698	}
1699
1700	return vctx->ibv_create_flow(qp, flow);
1701}
1702
1703static inline int ibv_destroy_flow(struct ibv_flow *flow_id)
1704{
1705	struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context,
1706						      ibv_destroy_flow);
1707	if (!vctx || !vctx->ibv_destroy_flow)
1708		return -ENOSYS;
1709	return vctx->ibv_destroy_flow(flow_id);
1710}
1711
1712/**
1713 * ibv_open_xrcd - Open an extended connection domain
1714 */
1715static inline struct ibv_xrcd *
1716ibv_open_xrcd(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr)
1717{
1718	struct verbs_context *vctx = verbs_get_ctx_op(context, open_xrcd);
1719	if (!vctx) {
1720		errno = ENOSYS;
1721		return NULL;
1722	}
1723	return vctx->open_xrcd(context, xrcd_init_attr);
1724}
1725
1726/**
1727 * ibv_close_xrcd - Close an extended connection domain
1728 */
1729static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
1730{
1731	struct verbs_context *vctx = verbs_get_ctx(xrcd->context);
1732	return vctx->close_xrcd(xrcd);
1733}
1734
1735/**
1736 * ibv_reg_mr - Register a memory region
1737 */
1738struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
1739			  size_t length, int access);
1740
1741
1742enum ibv_rereg_mr_err_code {
1743	/* Old MR is valid, invalid input */
1744	IBV_REREG_MR_ERR_INPUT = -1,
1745	/* Old MR is valid, failed via don't fork on new address range */
1746	IBV_REREG_MR_ERR_DONT_FORK_NEW = -2,
1747	/* New MR is valid, failed via do fork on old address range */
1748	IBV_REREG_MR_ERR_DO_FORK_OLD = -3,
1749	/* MR shouldn't be used, command error */
1750	IBV_REREG_MR_ERR_CMD = -4,
1751	/* MR shouldn't be used, command error, invalid fork state on new address range */
1752	IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW = -5,
1753};
1754
1755/**
1756 * ibv_rereg_mr - Re-Register a memory region
1757 */
1758int ibv_rereg_mr(struct ibv_mr *mr, int flags,
1759		 struct ibv_pd *pd, void *addr,
1760		 size_t length, int access);
1761/**
1762 * ibv_dereg_mr - Deregister a memory region
1763 */
1764int ibv_dereg_mr(struct ibv_mr *mr);
1765
1766/**
1767 * ibv_alloc_mw - Allocate a memory window
1768 */
1769static inline struct ibv_mw *ibv_alloc_mw(struct ibv_pd *pd,
1770					  enum ibv_mw_type type)
1771{
1772	struct ibv_mw *mw;
1773
1774	if (!pd->context->ops.alloc_mw) {
1775		errno = ENOSYS;
1776		return NULL;
1777	}
1778
1779	mw = pd->context->ops.alloc_mw(pd, type);
1780	return mw;
1781}
1782
1783/**
1784 * ibv_dealloc_mw - Free a memory window
1785 */
1786static inline int ibv_dealloc_mw(struct ibv_mw *mw)
1787{
1788	return mw->context->ops.dealloc_mw(mw);
1789}
1790
1791/**
1792 * ibv_inc_rkey - Increase the 8 lsb in the given rkey
1793 */
1794static inline uint32_t ibv_inc_rkey(uint32_t rkey)
1795{
1796	const uint32_t mask = 0x000000ff;
1797	uint8_t newtag = (uint8_t)((rkey + 1) & mask);
1798
1799	return (rkey & ~mask) | newtag;
1800}
1801
1802/**
1803 * ibv_bind_mw - Bind a memory window to a region
1804 */
1805static inline int ibv_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
1806			      struct ibv_mw_bind *mw_bind)
1807{
1808	if (mw->type != IBV_MW_TYPE_1)
1809		return EINVAL;
1810
1811	return mw->context->ops.bind_mw(qp, mw, mw_bind);
1812}
1813
1814/**
1815 * ibv_create_comp_channel - Create a completion event channel
1816 */
1817struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context);
1818
1819/**
1820 * ibv_destroy_comp_channel - Destroy a completion event channel
1821 */
1822int ibv_destroy_comp_channel(struct ibv_comp_channel *channel);
1823
1824/**
1825 * ibv_create_cq - Create a completion queue
1826 * @context - Context CQ will be attached to
1827 * @cqe - Minimum number of entries required for CQ
1828 * @cq_context - Consumer-supplied context returned for completion events
1829 * @channel - Completion channel where completion events will be queued.
1830 *     May be NULL if completion events will not be used.
1831 * @comp_vector - Completion vector used to signal completion events.
1832 *     Must be >= 0 and < context->num_comp_vectors.
1833 */
1834struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
1835			     void *cq_context,
1836			     struct ibv_comp_channel *channel,
1837			     int comp_vector);
1838
1839/**
1840 * ibv_create_cq_ex - Create a completion queue
1841 * @context - Context CQ will be attached to
1842 * @cq_attr - Attributes to create the CQ with
1843 */
1844static inline
1845struct ibv_cq_ex *ibv_create_cq_ex(struct ibv_context *context,
1846				   struct ibv_cq_init_attr_ex *cq_attr)
1847{
1848	struct verbs_context *vctx = verbs_get_ctx_op(context, create_cq_ex);
1849
1850	if (!vctx) {
1851		errno = ENOSYS;
1852		return NULL;
1853	}
1854
1855	if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1)) {
1856		errno = EINVAL;
1857		return NULL;
1858	}
1859
1860	return vctx->create_cq_ex(context, cq_attr);
1861}
1862
1863/**
1864 * ibv_resize_cq - Modifies the capacity of the CQ.
1865 * @cq: The CQ to resize.
1866 * @cqe: The minimum size of the CQ.
1867 *
1868 * Users can examine the cq structure to determine the actual CQ size.
1869 */
1870int ibv_resize_cq(struct ibv_cq *cq, int cqe);
1871
1872/**
1873 * ibv_destroy_cq - Destroy a completion queue
1874 */
1875int ibv_destroy_cq(struct ibv_cq *cq);
1876
1877/**
1878 * ibv_get_cq_event - Read next CQ event
1879 * @channel: Channel to get next event from.
1880 * @cq: Used to return pointer to CQ.
1881 * @cq_context: Used to return consumer-supplied CQ context.
1882 *
1883 * All completion events returned by ibv_get_cq_event() must
1884 * eventually be acknowledged with ibv_ack_cq_events().
1885 */
1886int ibv_get_cq_event(struct ibv_comp_channel *channel,
1887		     struct ibv_cq **cq, void **cq_context);
1888
1889/**
1890 * ibv_ack_cq_events - Acknowledge CQ completion events
1891 * @cq: CQ to acknowledge events for
1892 * @nevents: Number of events to acknowledge.
1893 *
1894 * All completion events which are returned by ibv_get_cq_event() must
1895 * be acknowledged.  To avoid races, ibv_destroy_cq() will wait for
1896 * all completion events to be acknowledged, so there should be a
1897 * one-to-one correspondence between acks and successful gets.  An
1898 * application may accumulate multiple completion events and
1899 * acknowledge them in a single call to ibv_ack_cq_events() by passing
1900 * the number of events to ack in @nevents.
1901 */
1902void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents);
1903
1904/**
1905 * ibv_poll_cq - Poll a CQ for work completions
1906 * @cq:the CQ being polled
1907 * @num_entries:maximum number of completions to return
1908 * @wc:array of at least @num_entries of &struct ibv_wc where completions
1909 *   will be returned
1910 *
1911 * Poll a CQ for (possibly multiple) completions.  If the return value
1912 * is < 0, an error occurred.  If the return value is >= 0, it is the
1913 * number of completions returned.  If the return value is
1914 * non-negative and strictly less than num_entries, then the CQ was
1915 * emptied.
1916 */
1917static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc)
1918{
1919	return cq->context->ops.poll_cq(cq, num_entries, wc);
1920}
1921
1922/**
1923 * ibv_req_notify_cq - Request completion notification on a CQ.  An
1924 *   event will be added to the completion channel associated with the
1925 *   CQ when an entry is added to the CQ.
1926 * @cq: The completion queue to request notification for.
1927 * @solicited_only: If non-zero, an event will be generated only for
1928 *   the next solicited CQ entry.  If zero, any CQ entry, solicited or
1929 *   not, will generate an event.
1930 */
1931static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only)
1932{
1933	return cq->context->ops.req_notify_cq(cq, solicited_only);
1934}
1935
1936/**
1937 * ibv_create_srq - Creates a SRQ associated with the specified protection
1938 *   domain.
1939 * @pd: The protection domain associated with the SRQ.
1940 * @srq_init_attr: A list of initial attributes required to create the SRQ.
1941 *
1942 * srq_attr->max_wr and srq_attr->max_sge are read the determine the
1943 * requested size of the SRQ, and set to the actual values allocated
1944 * on return.  If ibv_create_srq() succeeds, then max_wr and max_sge
1945 * will always be at least as large as the requested values.
1946 */
1947struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
1948			       struct ibv_srq_init_attr *srq_init_attr);
1949
1950static inline struct ibv_srq *
1951ibv_create_srq_ex(struct ibv_context *context,
1952		  struct ibv_srq_init_attr_ex *srq_init_attr_ex)
1953{
1954	struct verbs_context *vctx;
1955	uint32_t mask = srq_init_attr_ex->comp_mask;
1956
1957	if (!(mask & ~(IBV_SRQ_INIT_ATTR_PD | IBV_SRQ_INIT_ATTR_TYPE)) &&
1958	    (mask & IBV_SRQ_INIT_ATTR_PD) &&
1959	    (!(mask & IBV_SRQ_INIT_ATTR_TYPE) ||
1960	     (srq_init_attr_ex->srq_type == IBV_SRQT_BASIC)))
1961		return ibv_create_srq(srq_init_attr_ex->pd,
1962				      (struct ibv_srq_init_attr *)srq_init_attr_ex);
1963
1964	vctx = verbs_get_ctx_op(context, create_srq_ex);
1965	if (!vctx) {
1966		errno = ENOSYS;
1967		return NULL;
1968	}
1969	return vctx->create_srq_ex(context, srq_init_attr_ex);
1970}
1971
1972/**
1973 * ibv_modify_srq - Modifies the attributes for the specified SRQ.
1974 * @srq: The SRQ to modify.
1975 * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
1976 *   the current values of selected SRQ attributes are returned.
1977 * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
1978 *   are being modified.
1979 *
1980 * The mask may contain IBV_SRQ_MAX_WR to resize the SRQ and/or
1981 * IBV_SRQ_LIMIT to set the SRQ's limit and request notification when
1982 * the number of receives queued drops below the limit.
1983 */
1984int ibv_modify_srq(struct ibv_srq *srq,
1985		   struct ibv_srq_attr *srq_attr,
1986		   int srq_attr_mask);
1987
1988/**
1989 * ibv_query_srq - Returns the attribute list and current values for the
1990 *   specified SRQ.
1991 * @srq: The SRQ to query.
1992 * @srq_attr: The attributes of the specified SRQ.
1993 */
1994int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
1995
1996static inline int ibv_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
1997{
1998	struct verbs_context *vctx = verbs_get_ctx_op(srq->context, get_srq_num);
1999
2000	if (!vctx)
2001		return ENOSYS;
2002
2003	return vctx->get_srq_num(srq, srq_num);
2004}
2005
2006/**
2007 * ibv_destroy_srq - Destroys the specified SRQ.
2008 * @srq: The SRQ to destroy.
2009 */
2010int ibv_destroy_srq(struct ibv_srq *srq);
2011
2012/**
2013 * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ.
2014 * @srq: The SRQ to post the work request on.
2015 * @recv_wr: A list of work requests to post on the receive queue.
2016 * @bad_recv_wr: On an immediate failure, this parameter will reference
2017 *   the work request that failed to be posted on the QP.
2018 */
2019static inline int ibv_post_srq_recv(struct ibv_srq *srq,
2020				    struct ibv_recv_wr *recv_wr,
2021				    struct ibv_recv_wr **bad_recv_wr)
2022{
2023	return srq->context->ops.post_srq_recv(srq, recv_wr, bad_recv_wr);
2024}
2025
2026/**
2027 * ibv_create_qp - Create a queue pair.
2028 */
2029struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
2030			     struct ibv_qp_init_attr *qp_init_attr);
2031
2032static inline struct ibv_qp *
2033ibv_create_qp_ex(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_init_attr_ex)
2034{
2035	struct verbs_context *vctx;
2036	uint32_t mask = qp_init_attr_ex->comp_mask;
2037
2038	if (mask == IBV_QP_INIT_ATTR_PD)
2039		return ibv_create_qp(qp_init_attr_ex->pd,
2040				     (struct ibv_qp_init_attr *)qp_init_attr_ex);
2041
2042	vctx = verbs_get_ctx_op(context, create_qp_ex);
2043	if (!vctx) {
2044		errno = ENOSYS;
2045		return NULL;
2046	}
2047	return vctx->create_qp_ex(context, qp_init_attr_ex);
2048}
2049
2050/**
2051 * ibv_query_rt_values_ex - Get current real time @values of a device.
2052 * @values - in/out - defines the attributes we need to query/queried.
2053 * (Or's bits of enum ibv_values_mask on values->comp_mask field)
2054 */
2055static inline int
2056ibv_query_rt_values_ex(struct ibv_context *context,
2057		       struct ibv_values_ex *values)
2058{
2059	struct verbs_context *vctx;
2060
2061	vctx = verbs_get_ctx_op(context, query_rt_values);
2062	if (!vctx)
2063		return ENOSYS;
2064
2065	if (values->comp_mask & ~(IBV_VALUES_MASK_RESERVED - 1))
2066		return EINVAL;
2067
2068	return vctx->query_rt_values(context, values);
2069}
2070
2071/**
2072 * ibv_query_device_ex - Get extended device properties
2073 */
2074static inline int
2075ibv_query_device_ex(struct ibv_context *context,
2076		    const struct ibv_query_device_ex_input *input,
2077		    struct ibv_device_attr_ex *attr)
2078{
2079	struct verbs_context *vctx;
2080	int ret;
2081
2082	vctx = verbs_get_ctx_op(context, query_device_ex);
2083	if (!vctx)
2084		goto legacy;
2085
2086	ret = vctx->query_device_ex(context, input, attr, sizeof(*attr));
2087	if (ret == ENOSYS)
2088		goto legacy;
2089
2090	return ret;
2091
2092legacy:
2093	memset(attr, 0, sizeof(*attr));
2094	ret = ibv_query_device(context, &attr->orig_attr);
2095
2096	return ret;
2097}
2098
2099/**
2100 * ibv_open_qp - Open a shareable queue pair.
2101 */
2102static inline struct ibv_qp *
2103ibv_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *qp_open_attr)
2104{
2105	struct verbs_context *vctx = verbs_get_ctx_op(context, open_qp);
2106	if (!vctx) {
2107		errno = ENOSYS;
2108		return NULL;
2109	}
2110	return vctx->open_qp(context, qp_open_attr);
2111}
2112
2113/**
2114 * ibv_modify_qp - Modify a queue pair.
2115 */
2116int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
2117		  int attr_mask);
2118
2119/**
2120 * ibv_query_qp - Returns the attribute list and current values for the
2121 *   specified QP.
2122 * @qp: The QP to query.
2123 * @attr: The attributes of the specified QP.
2124 * @attr_mask: A bit-mask used to select specific attributes to query.
2125 * @init_attr: Additional attributes of the selected QP.
2126 *
2127 * The qp_attr_mask may be used to limit the query to gathering only the
2128 * selected attributes.
2129 */
2130int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
2131		 int attr_mask,
2132		 struct ibv_qp_init_attr *init_attr);
2133
2134/**
2135 * ibv_destroy_qp - Destroy a queue pair.
2136 */
2137int ibv_destroy_qp(struct ibv_qp *qp);
2138
2139/*
2140 * ibv_create_wq - Creates a WQ associated with the specified protection
2141 * domain.
2142 * @context: ibv_context.
2143 * @wq_init_attr: A list of initial attributes required to create the
2144 * WQ. If WQ creation succeeds, then the attributes are updated to
2145 * the actual capabilities of the created WQ.
2146 *
2147 * wq_init_attr->max_wr and wq_init_attr->max_sge determine
2148 * the requested size of the WQ, and set to the actual values allocated
2149 * on return.
2150 * If ibv_create_wq() succeeds, then max_wr and max_sge will always be
2151 * at least as large as the requested values.
2152 *
2153 * Return Value
2154 * ibv_create_wq() returns a pointer to the created WQ, or NULL if the request
2155 * fails.
2156 */
2157static inline struct ibv_wq *ibv_create_wq(struct ibv_context *context,
2158					   struct ibv_wq_init_attr *wq_init_attr)
2159{
2160	struct verbs_context *vctx = verbs_get_ctx_op(context, create_wq);
2161	struct ibv_wq *wq;
2162
2163	if (!vctx) {
2164		errno = ENOSYS;
2165		return NULL;
2166	}
2167
2168	wq = vctx->create_wq(context, wq_init_attr);
2169	if (wq) {
2170		wq->events_completed = 0;
2171		pthread_mutex_init(&wq->mutex, NULL);
2172		pthread_cond_init(&wq->cond, NULL);
2173	}
2174
2175	return wq;
2176}
2177
2178/*
2179 * ibv_modify_wq - Modifies the attributes for the specified WQ.
2180 * @wq: The WQ to modify.
2181 * @wq_attr: On input, specifies the WQ attributes to modify.
2182 *    wq_attr->attr_mask: A bit-mask used to specify which attributes of the WQ
2183 *    are being modified.
2184 * On output, the current values of selected WQ attributes are returned.
2185 *
2186 * Return Value
2187 * ibv_modify_wq() returns 0 on success, or the value of errno
2188 * on failure (which indicates the failure reason).
2189 *
2190*/
2191static inline int ibv_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr)
2192{
2193	struct verbs_context *vctx = verbs_get_ctx_op(wq->context, modify_wq);
2194
2195	if (!vctx)
2196		return ENOSYS;
2197
2198	return vctx->modify_wq(wq, wq_attr);
2199}
2200
2201/*
2202 * ibv_destroy_wq - Destroys the specified WQ.
2203 * @ibv_wq: The WQ to destroy.
2204 * Return Value
2205 * ibv_destroy_wq() returns 0 on success, or the value of errno
2206 * on failure (which indicates the failure reason).
2207*/
2208static inline int ibv_destroy_wq(struct ibv_wq *wq)
2209{
2210	struct verbs_context *vctx;
2211
2212	vctx = verbs_get_ctx_op(wq->context, destroy_wq);
2213	if (!vctx)
2214		return ENOSYS;
2215
2216	return vctx->destroy_wq(wq);
2217}
2218
2219/*
2220 * ibv_create_rwq_ind_table - Creates a receive work queue Indirection Table
2221 * @context: ibv_context.
2222 * @init_attr: A list of initial attributes required to create the Indirection Table.
2223 * Return Value
2224 * ibv_create_rwq_ind_table returns a pointer to the created
2225 * Indirection Table, or NULL if the request fails.
2226 */
2227static inline struct ibv_rwq_ind_table *ibv_create_rwq_ind_table(struct ibv_context *context,
2228								 struct ibv_rwq_ind_table_init_attr *init_attr)
2229{
2230	struct verbs_context *vctx;
2231
2232	vctx = verbs_get_ctx_op(context, create_rwq_ind_table);
2233	if (!vctx) {
2234		errno = ENOSYS;
2235		return NULL;
2236	}
2237
2238	return vctx->create_rwq_ind_table(context, init_attr);
2239}
2240
2241/*
2242 * ibv_destroy_rwq_ind_table - Destroys the specified Indirection Table.
2243 * @rwq_ind_table: The Indirection Table to destroy.
2244 * Return Value
2245 * ibv_destroy_rwq_ind_table() returns 0 on success, or the value of errno
2246 * on failure (which indicates the failure reason).
2247*/
2248static inline int ibv_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2249{
2250	struct verbs_context *vctx;
2251
2252	vctx = verbs_get_ctx_op(rwq_ind_table->context, destroy_rwq_ind_table);
2253	if (!vctx)
2254		return ENOSYS;
2255
2256	return vctx->destroy_rwq_ind_table(rwq_ind_table);
2257}
2258
2259/**
2260 * ibv_post_send - Post a list of work requests to a send queue.
2261 *
2262 * If IBV_SEND_INLINE flag is set, the data buffers can be reused
2263 * immediately after the call returns.
2264 */
2265static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
2266				struct ibv_send_wr **bad_wr)
2267{
2268	return qp->context->ops.post_send(qp, wr, bad_wr);
2269}
2270
2271/**
2272 * ibv_post_recv - Post a list of work requests to a receive queue.
2273 */
2274static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr,
2275				struct ibv_recv_wr **bad_wr)
2276{
2277	return qp->context->ops.post_recv(qp, wr, bad_wr);
2278}
2279
2280/**
2281 * ibv_create_ah - Create an address handle.
2282 */
2283struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
2284
2285/**
2286 * ibv_init_ah_from_wc - Initializes address handle attributes from a
2287 *   work completion.
2288 * @context: Device context on which the received message arrived.
2289 * @port_num: Port on which the received message arrived.
2290 * @wc: Work completion associated with the received message.
2291 * @grh: References the received global route header.  This parameter is
2292 *   ignored unless the work completion indicates that the GRH is valid.
2293 * @ah_attr: Returned attributes that can be used when creating an address
2294 *   handle for replying to the message.
2295 */
2296int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num,
2297			struct ibv_wc *wc, struct ibv_grh *grh,
2298			struct ibv_ah_attr *ah_attr);
2299
2300/**
2301 * ibv_create_ah_from_wc - Creates an address handle associated with the
2302 *   sender of the specified work completion.
2303 * @pd: The protection domain associated with the address handle.
2304 * @wc: Work completion information associated with a received message.
2305 * @grh: References the received global route header.  This parameter is
2306 *   ignored unless the work completion indicates that the GRH is valid.
2307 * @port_num: The outbound port number to associate with the address.
2308 *
2309 * The address handle is used to reference a local or global destination
2310 * in all UD QP post sends.
2311 */
2312struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc,
2313				     struct ibv_grh *grh, uint8_t port_num);
2314
2315/**
2316 * ibv_destroy_ah - Destroy an address handle.
2317 */
2318int ibv_destroy_ah(struct ibv_ah *ah);
2319
2320/**
2321 * ibv_attach_mcast - Attaches the specified QP to a multicast group.
2322 * @qp: QP to attach to the multicast group.  The QP must be a UD QP.
2323 * @gid: Multicast group GID.
2324 * @lid: Multicast group LID in host byte order.
2325 *
2326 * In order to route multicast packets correctly, subnet
2327 * administration must have created the multicast group and configured
2328 * the fabric appropriately.  The port associated with the specified
2329 * QP must also be a member of the multicast group.
2330 */
2331int ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
2332
2333/**
2334 * ibv_detach_mcast - Detaches the specified QP from a multicast group.
2335 * @qp: QP to detach from the multicast group.
2336 * @gid: Multicast group GID.
2337 * @lid: Multicast group LID in host byte order.
2338 */
2339int ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
2340
2341/**
2342 * ibv_fork_init - Prepare data structures so that fork() may be used
2343 * safely.  If this function is not called or returns a non-zero
2344 * status, then libibverbs data structures are not fork()-safe and the
2345 * effect of an application calling fork() is undefined.
2346 */
2347int ibv_fork_init(void);
2348
2349/**
2350 * ibv_node_type_str - Return string describing node_type enum value
2351 */
2352const char *ibv_node_type_str(enum ibv_node_type node_type);
2353
2354/**
2355 * ibv_port_state_str - Return string describing port_state enum value
2356 */
2357const char *ibv_port_state_str(enum ibv_port_state port_state);
2358
2359/**
2360 * ibv_event_type_str - Return string describing event_type enum value
2361 */
2362const char *ibv_event_type_str(enum ibv_event_type event);
2363
2364#define ETHERNET_LL_SIZE 6
2365int ibv_resolve_eth_l2_from_gid(struct ibv_context *context,
2366				struct ibv_ah_attr *attr,
2367				uint8_t eth_mac[ETHERNET_LL_SIZE],
2368				uint16_t *vid);
2369
2370static inline int ibv_is_qpt_supported(uint32_t caps, enum ibv_qp_type qpt)
2371{
2372	return !!(caps & (1 << qpt));
2373}
2374
2375END_C_DECLS
2376
2377#  undef __attribute_const
2378
2379
2380#endif /* INFINIBAND_VERBS_H */
2381