verbs.h revision 331769
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2004, 2011-2012 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc.  All rights reserved.
5 * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#ifndef INFINIBAND_VERBS_H
37#define INFINIBAND_VERBS_H
38
39#include <stdint.h>
40#include <pthread.h>
41#include <stddef.h>
42#include <errno.h>
43#include <string.h>
44#include <infiniband/types.h>
45
46#ifdef __cplusplus
47#  define BEGIN_C_DECLS extern "C" {
48#  define END_C_DECLS   }
49#else /* !__cplusplus */
50#  define BEGIN_C_DECLS
51#  define END_C_DECLS
52#endif /* __cplusplus */
53
54#if __GNUC__ >= 3
55#  define __attribute_const __attribute__((const))
56#else
57#  define __attribute_const
58#endif
59
60BEGIN_C_DECLS
61
62union ibv_gid {
63	uint8_t			raw[16];
64	struct {
65		__be64	subnet_prefix;
66		__be64	interface_id;
67	} global;
68};
69
70#ifndef container_of
71/**
72  * container_of - cast a member of a structure out to the containing structure
73  * @ptr:        the pointer to the member.
74  * @type:       the type of the container struct this is embedded in.
75  * @member:     the name of the member within the struct.
76  *
77 */
78#define container_of(ptr, type, member) \
79	((type *) ((uint8_t *)(ptr) - offsetof(type, member)))
80#endif
81
82#define vext_field_avail(type, fld, sz) (offsetof(type, fld) < (sz))
83
84static void *__VERBS_ABI_IS_EXTENDED = ((uint8_t *) NULL) - 1;
85
86enum ibv_node_type {
87	IBV_NODE_UNKNOWN	= -1,
88	IBV_NODE_CA 		= 1,
89	IBV_NODE_SWITCH,
90	IBV_NODE_ROUTER,
91	IBV_NODE_RNIC,
92	IBV_NODE_USNIC,
93	IBV_NODE_USNIC_UDP,
94};
95
96enum ibv_transport_type {
97	IBV_TRANSPORT_UNKNOWN	= -1,
98	IBV_TRANSPORT_IB	= 0,
99	IBV_TRANSPORT_IWARP,
100	IBV_TRANSPORT_USNIC,
101	IBV_TRANSPORT_USNIC_UDP,
102};
103
104enum ibv_device_cap_flags {
105	IBV_DEVICE_RESIZE_MAX_WR	= 1,
106	IBV_DEVICE_BAD_PKEY_CNTR	= 1 <<  1,
107	IBV_DEVICE_BAD_QKEY_CNTR	= 1 <<  2,
108	IBV_DEVICE_RAW_MULTI		= 1 <<  3,
109	IBV_DEVICE_AUTO_PATH_MIG	= 1 <<  4,
110	IBV_DEVICE_CHANGE_PHY_PORT	= 1 <<  5,
111	IBV_DEVICE_UD_AV_PORT_ENFORCE	= 1 <<  6,
112	IBV_DEVICE_CURR_QP_STATE_MOD	= 1 <<  7,
113	IBV_DEVICE_SHUTDOWN_PORT	= 1 <<  8,
114	IBV_DEVICE_INIT_TYPE		= 1 <<  9,
115	IBV_DEVICE_PORT_ACTIVE_EVENT	= 1 << 10,
116	IBV_DEVICE_SYS_IMAGE_GUID	= 1 << 11,
117	IBV_DEVICE_RC_RNR_NAK_GEN	= 1 << 12,
118	IBV_DEVICE_SRQ_RESIZE		= 1 << 13,
119	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14,
120	IBV_DEVICE_MEM_WINDOW           = 1 << 17,
121	IBV_DEVICE_UD_IP_CSUM		= 1 << 18,
122	IBV_DEVICE_XRC			= 1 << 20,
123	IBV_DEVICE_MEM_MGT_EXTENSIONS	= 1 << 21,
124	IBV_DEVICE_MEM_WINDOW_TYPE_2A	= 1 << 23,
125	IBV_DEVICE_MEM_WINDOW_TYPE_2B	= 1 << 24,
126	IBV_DEVICE_RC_IP_CSUM		= 1 << 25,
127	IBV_DEVICE_RAW_IP_CSUM		= 1 << 26,
128	IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29
129};
130
131/*
132 * Can't extended above ibv_device_cap_flags enum as in some systems/compilers
133 * enum range is limited to 4 bytes.
134 */
135#define IBV_DEVICE_RAW_SCATTER_FCS (1ULL << 34)
136
137enum ibv_atomic_cap {
138	IBV_ATOMIC_NONE,
139	IBV_ATOMIC_HCA,
140	IBV_ATOMIC_GLOB
141};
142
143struct ibv_device_attr {
144	char			fw_ver[64];
145	__be64			node_guid;
146	__be64			sys_image_guid;
147	uint64_t		max_mr_size;
148	uint64_t		page_size_cap;
149	uint32_t		vendor_id;
150	uint32_t		vendor_part_id;
151	uint32_t		hw_ver;
152	int			max_qp;
153	int			max_qp_wr;
154	int			device_cap_flags;
155	int			max_sge;
156	int			max_sge_rd;
157	int			max_cq;
158	int			max_cqe;
159	int			max_mr;
160	int			max_pd;
161	int			max_qp_rd_atom;
162	int			max_ee_rd_atom;
163	int			max_res_rd_atom;
164	int			max_qp_init_rd_atom;
165	int			max_ee_init_rd_atom;
166	enum ibv_atomic_cap	atomic_cap;
167	int			max_ee;
168	int			max_rdd;
169	int			max_mw;
170	int			max_raw_ipv6_qp;
171	int			max_raw_ethy_qp;
172	int			max_mcast_grp;
173	int			max_mcast_qp_attach;
174	int			max_total_mcast_qp_attach;
175	int			max_ah;
176	int			max_fmr;
177	int			max_map_per_fmr;
178	int			max_srq;
179	int			max_srq_wr;
180	int			max_srq_sge;
181	uint16_t		max_pkeys;
182	uint8_t			local_ca_ack_delay;
183	uint8_t			phys_port_cnt;
184};
185
186/* An extensible input struct for possible future extensions of the
187 * ibv_query_device_ex verb. */
188struct ibv_query_device_ex_input {
189	uint32_t		comp_mask;
190};
191
192enum ibv_odp_transport_cap_bits {
193	IBV_ODP_SUPPORT_SEND     = 1 << 0,
194	IBV_ODP_SUPPORT_RECV     = 1 << 1,
195	IBV_ODP_SUPPORT_WRITE    = 1 << 2,
196	IBV_ODP_SUPPORT_READ     = 1 << 3,
197	IBV_ODP_SUPPORT_ATOMIC   = 1 << 4,
198};
199
200struct ibv_odp_caps {
201	uint64_t general_caps;
202	struct {
203		uint32_t rc_odp_caps;
204		uint32_t uc_odp_caps;
205		uint32_t ud_odp_caps;
206	} per_transport_caps;
207};
208
209enum ibv_odp_general_caps {
210	IBV_ODP_SUPPORT = 1 << 0,
211};
212
213struct ibv_tso_caps {
214	uint32_t max_tso;
215	uint32_t supported_qpts;
216};
217
218/* RX Hash function flags */
219enum ibv_rx_hash_function_flags {
220	IBV_RX_HASH_FUNC_TOEPLITZ	= 1 << 0,
221};
222
223/*
224 * RX Hash fields enable to set which incoming packet's field should
225 * participates in RX Hash. Each flag represent certain packet's field,
226 * when the flag is set the field that is represented by the flag will
227 * participate in RX Hash calculation.
228 * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
229 * and *TCP and *UDP flags can't be enabled together on the same QP.
230*/
231enum ibv_rx_hash_fields {
232	IBV_RX_HASH_SRC_IPV4	= 1 << 0,
233	IBV_RX_HASH_DST_IPV4	= 1 << 1,
234	IBV_RX_HASH_SRC_IPV6	= 1 << 2,
235	IBV_RX_HASH_DST_IPV6	= 1 << 3,
236	IBV_RX_HASH_SRC_PORT_TCP	= 1 << 4,
237	IBV_RX_HASH_DST_PORT_TCP	= 1 << 5,
238	IBV_RX_HASH_SRC_PORT_UDP	= 1 << 6,
239	IBV_RX_HASH_DST_PORT_UDP	= 1 << 7
240};
241
242struct ibv_rss_caps {
243	uint32_t supported_qpts;
244	uint32_t max_rwq_indirection_tables;
245	uint32_t max_rwq_indirection_table_size;
246	uint64_t rx_hash_fields_mask; /* enum ibv_rx_hash_fields */
247	uint8_t  rx_hash_function; /* enum ibv_rx_hash_function_flags */
248};
249
250struct ibv_packet_pacing_caps {
251	uint32_t qp_rate_limit_min;
252	uint32_t qp_rate_limit_max; /* In kbps */
253	uint32_t supported_qpts;
254};
255
256enum ibv_raw_packet_caps {
257	IBV_RAW_PACKET_CAP_CVLAN_STRIPPING	= 1 << 0,
258	IBV_RAW_PACKET_CAP_SCATTER_FCS		= 1 << 1,
259	IBV_RAW_PACKET_CAP_IP_CSUM		= 1 << 2,
260};
261
262struct ibv_device_attr_ex {
263	struct ibv_device_attr	orig_attr;
264	uint32_t		comp_mask;
265	struct ibv_odp_caps	odp_caps;
266	uint64_t		completion_timestamp_mask;
267	uint64_t		hca_core_clock;
268	uint64_t		device_cap_flags_ex;
269	struct ibv_tso_caps	tso_caps;
270	struct ibv_rss_caps     rss_caps;
271	uint32_t		max_wq_type_rq;
272	struct ibv_packet_pacing_caps packet_pacing_caps;
273	uint32_t		raw_packet_caps; /* Use ibv_raw_packet_caps */
274};
275
276enum ibv_mtu {
277	IBV_MTU_256  = 1,
278	IBV_MTU_512  = 2,
279	IBV_MTU_1024 = 3,
280	IBV_MTU_2048 = 4,
281	IBV_MTU_4096 = 5
282};
283
284enum ibv_port_state {
285	IBV_PORT_NOP		= 0,
286	IBV_PORT_DOWN		= 1,
287	IBV_PORT_INIT		= 2,
288	IBV_PORT_ARMED		= 3,
289	IBV_PORT_ACTIVE		= 4,
290	IBV_PORT_ACTIVE_DEFER	= 5
291};
292
293enum {
294	IBV_LINK_LAYER_UNSPECIFIED,
295	IBV_LINK_LAYER_INFINIBAND,
296	IBV_LINK_LAYER_ETHERNET,
297};
298
299enum ibv_port_cap_flags {
300	IBV_PORT_SM				= 1 <<  1,
301	IBV_PORT_NOTICE_SUP			= 1 <<  2,
302	IBV_PORT_TRAP_SUP			= 1 <<  3,
303	IBV_PORT_OPT_IPD_SUP			= 1 <<  4,
304	IBV_PORT_AUTO_MIGR_SUP			= 1 <<  5,
305	IBV_PORT_SL_MAP_SUP			= 1 <<  6,
306	IBV_PORT_MKEY_NVRAM			= 1 <<  7,
307	IBV_PORT_PKEY_NVRAM			= 1 <<  8,
308	IBV_PORT_LED_INFO_SUP			= 1 <<  9,
309	IBV_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
310	IBV_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
311	IBV_PORT_EXTENDED_SPEEDS_SUP		= 1 << 14,
312	IBV_PORT_CM_SUP				= 1 << 16,
313	IBV_PORT_SNMP_TUNNEL_SUP		= 1 << 17,
314	IBV_PORT_REINIT_SUP			= 1 << 18,
315	IBV_PORT_DEVICE_MGMT_SUP		= 1 << 19,
316	IBV_PORT_VENDOR_CLASS_SUP		= 1 << 20,
317	IBV_PORT_DR_NOTICE_SUP			= 1 << 21,
318	IBV_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
319	IBV_PORT_BOOT_MGMT_SUP			= 1 << 23,
320	IBV_PORT_LINK_LATENCY_SUP		= 1 << 24,
321	IBV_PORT_CLIENT_REG_SUP			= 1 << 25,
322	IBV_PORT_IP_BASED_GIDS			= 1 << 26
323};
324
325struct ibv_port_attr {
326	enum ibv_port_state	state;
327	enum ibv_mtu		max_mtu;
328	enum ibv_mtu		active_mtu;
329	int			gid_tbl_len;
330	uint32_t		port_cap_flags;
331	uint32_t		max_msg_sz;
332	uint32_t		bad_pkey_cntr;
333	uint32_t		qkey_viol_cntr;
334	uint16_t		pkey_tbl_len;
335	uint16_t		lid;
336	uint16_t		sm_lid;
337	uint8_t			lmc;
338	uint8_t			max_vl_num;
339	uint8_t			sm_sl;
340	uint8_t			subnet_timeout;
341	uint8_t			init_type_reply;
342	uint8_t			active_width;
343	uint8_t			active_speed;
344	uint8_t			phys_state;
345	uint8_t			link_layer;
346	uint8_t			reserved;
347};
348
349enum ibv_event_type {
350	IBV_EVENT_CQ_ERR,
351	IBV_EVENT_QP_FATAL,
352	IBV_EVENT_QP_REQ_ERR,
353	IBV_EVENT_QP_ACCESS_ERR,
354	IBV_EVENT_COMM_EST,
355	IBV_EVENT_SQ_DRAINED,
356	IBV_EVENT_PATH_MIG,
357	IBV_EVENT_PATH_MIG_ERR,
358	IBV_EVENT_DEVICE_FATAL,
359	IBV_EVENT_PORT_ACTIVE,
360	IBV_EVENT_PORT_ERR,
361	IBV_EVENT_LID_CHANGE,
362	IBV_EVENT_PKEY_CHANGE,
363	IBV_EVENT_SM_CHANGE,
364	IBV_EVENT_SRQ_ERR,
365	IBV_EVENT_SRQ_LIMIT_REACHED,
366	IBV_EVENT_QP_LAST_WQE_REACHED,
367	IBV_EVENT_CLIENT_REREGISTER,
368	IBV_EVENT_GID_CHANGE,
369	IBV_EVENT_WQ_FATAL,
370};
371
372struct ibv_async_event {
373	union {
374		struct ibv_cq  *cq;
375		struct ibv_qp  *qp;
376		struct ibv_srq *srq;
377		struct ibv_wq  *wq;
378		int		port_num;
379	} element;
380	enum ibv_event_type	event_type;
381};
382
383enum ibv_wc_status {
384	IBV_WC_SUCCESS,
385	IBV_WC_LOC_LEN_ERR,
386	IBV_WC_LOC_QP_OP_ERR,
387	IBV_WC_LOC_EEC_OP_ERR,
388	IBV_WC_LOC_PROT_ERR,
389	IBV_WC_WR_FLUSH_ERR,
390	IBV_WC_MW_BIND_ERR,
391	IBV_WC_BAD_RESP_ERR,
392	IBV_WC_LOC_ACCESS_ERR,
393	IBV_WC_REM_INV_REQ_ERR,
394	IBV_WC_REM_ACCESS_ERR,
395	IBV_WC_REM_OP_ERR,
396	IBV_WC_RETRY_EXC_ERR,
397	IBV_WC_RNR_RETRY_EXC_ERR,
398	IBV_WC_LOC_RDD_VIOL_ERR,
399	IBV_WC_REM_INV_RD_REQ_ERR,
400	IBV_WC_REM_ABORT_ERR,
401	IBV_WC_INV_EECN_ERR,
402	IBV_WC_INV_EEC_STATE_ERR,
403	IBV_WC_FATAL_ERR,
404	IBV_WC_RESP_TIMEOUT_ERR,
405	IBV_WC_GENERAL_ERR
406};
407const char *ibv_wc_status_str(enum ibv_wc_status status);
408
409enum ibv_wc_opcode {
410	IBV_WC_SEND,
411	IBV_WC_RDMA_WRITE,
412	IBV_WC_RDMA_READ,
413	IBV_WC_COMP_SWAP,
414	IBV_WC_FETCH_ADD,
415	IBV_WC_BIND_MW,
416	IBV_WC_LOCAL_INV,
417	IBV_WC_TSO,
418/*
419 * Set value of IBV_WC_RECV so consumers can test if a completion is a
420 * receive by testing (opcode & IBV_WC_RECV).
421 */
422	IBV_WC_RECV			= 1 << 7,
423	IBV_WC_RECV_RDMA_WITH_IMM
424};
425
426enum {
427	IBV_WC_IP_CSUM_OK_SHIFT	= 2
428};
429
430enum ibv_create_cq_wc_flags {
431	IBV_WC_EX_WITH_BYTE_LEN		= 1 << 0,
432	IBV_WC_EX_WITH_IMM		= 1 << 1,
433	IBV_WC_EX_WITH_QP_NUM		= 1 << 2,
434	IBV_WC_EX_WITH_SRC_QP		= 1 << 3,
435	IBV_WC_EX_WITH_SLID		= 1 << 4,
436	IBV_WC_EX_WITH_SL		= 1 << 5,
437	IBV_WC_EX_WITH_DLID_PATH_BITS	= 1 << 6,
438	IBV_WC_EX_WITH_COMPLETION_TIMESTAMP	= 1 << 7,
439	IBV_WC_EX_WITH_CVLAN		= 1 << 8,
440	IBV_WC_EX_WITH_FLOW_TAG		= 1 << 9,
441};
442
443enum {
444	IBV_WC_STANDARD_FLAGS = IBV_WC_EX_WITH_BYTE_LEN		|
445				 IBV_WC_EX_WITH_IMM		|
446				 IBV_WC_EX_WITH_QP_NUM		|
447				 IBV_WC_EX_WITH_SRC_QP		|
448				 IBV_WC_EX_WITH_SLID		|
449				 IBV_WC_EX_WITH_SL		|
450				 IBV_WC_EX_WITH_DLID_PATH_BITS
451};
452
453enum {
454	IBV_CREATE_CQ_SUP_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
455				IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
456				IBV_WC_EX_WITH_CVLAN |
457				IBV_WC_EX_WITH_FLOW_TAG
458};
459
460enum ibv_wc_flags {
461	IBV_WC_GRH		= 1 << 0,
462	IBV_WC_WITH_IMM		= 1 << 1,
463	IBV_WC_IP_CSUM_OK	= 1 << IBV_WC_IP_CSUM_OK_SHIFT,
464	IBV_WC_WITH_INV         = 1 << 3
465};
466
467struct ibv_wc {
468	uint64_t		wr_id;
469	enum ibv_wc_status	status;
470	enum ibv_wc_opcode	opcode;
471	uint32_t		vendor_err;
472	uint32_t		byte_len;
473	/* When (wc_flags & IBV_WC_WITH_IMM): Immediate data in network byte order.
474	 * When (wc_flags & IBV_WC_WITH_INV): Stores the invalidated rkey.
475	 */
476	union {
477		__be32		imm_data;
478		uint32_t	invalidated_rkey;
479	};
480	uint32_t		qp_num;
481	uint32_t		src_qp;
482	int			wc_flags;
483	uint16_t		pkey_index;
484	uint16_t		slid;
485	uint8_t			sl;
486	uint8_t			dlid_path_bits;
487};
488
489enum ibv_access_flags {
490	IBV_ACCESS_LOCAL_WRITE		= 1,
491	IBV_ACCESS_REMOTE_WRITE		= (1<<1),
492	IBV_ACCESS_REMOTE_READ		= (1<<2),
493	IBV_ACCESS_REMOTE_ATOMIC	= (1<<3),
494	IBV_ACCESS_MW_BIND		= (1<<4),
495	IBV_ACCESS_ZERO_BASED		= (1<<5),
496	IBV_ACCESS_ON_DEMAND		= (1<<6),
497};
498
499struct ibv_mw_bind_info {
500	struct ibv_mr	*mr;
501	uint64_t	 addr;
502	uint64_t	 length;
503	int		 mw_access_flags; /* use ibv_access_flags */
504};
505
506struct ibv_pd {
507	struct ibv_context     *context;
508	uint32_t		handle;
509};
510
511enum ibv_xrcd_init_attr_mask {
512	IBV_XRCD_INIT_ATTR_FD	    = 1 << 0,
513	IBV_XRCD_INIT_ATTR_OFLAGS   = 1 << 1,
514	IBV_XRCD_INIT_ATTR_RESERVED = 1 << 2
515};
516
517struct ibv_xrcd_init_attr {
518	uint32_t comp_mask;
519	int	 fd;
520	int	 oflags;
521};
522
523struct ibv_xrcd {
524	struct ibv_context     *context;
525};
526
527enum ibv_rereg_mr_flags {
528	IBV_REREG_MR_CHANGE_TRANSLATION	= (1 << 0),
529	IBV_REREG_MR_CHANGE_PD		= (1 << 1),
530	IBV_REREG_MR_CHANGE_ACCESS	= (1 << 2),
531	IBV_REREG_MR_KEEP_VALID		= (1 << 3),
532	IBV_REREG_MR_FLAGS_SUPPORTED	= ((IBV_REREG_MR_KEEP_VALID << 1) - 1)
533};
534
535struct ibv_mr {
536	struct ibv_context     *context;
537	struct ibv_pd	       *pd;
538	void		       *addr;
539	size_t			length;
540	uint32_t		handle;
541	uint32_t		lkey;
542	uint32_t		rkey;
543};
544
545enum ibv_mw_type {
546	IBV_MW_TYPE_1			= 1,
547	IBV_MW_TYPE_2			= 2
548};
549
550struct ibv_mw {
551	struct ibv_context     *context;
552	struct ibv_pd	       *pd;
553	uint32_t		rkey;
554	uint32_t		handle;
555	enum ibv_mw_type	type;
556};
557
558struct ibv_global_route {
559	union ibv_gid		dgid;
560	uint32_t		flow_label;
561	uint8_t			sgid_index;
562	uint8_t			hop_limit;
563	uint8_t			traffic_class;
564};
565
566struct ibv_grh {
567	__be32			version_tclass_flow;
568	__be16			paylen;
569	uint8_t			next_hdr;
570	uint8_t			hop_limit;
571	union ibv_gid		sgid;
572	union ibv_gid		dgid;
573};
574
575enum ibv_rate {
576	IBV_RATE_MAX      = 0,
577	IBV_RATE_2_5_GBPS = 2,
578	IBV_RATE_5_GBPS   = 5,
579	IBV_RATE_10_GBPS  = 3,
580	IBV_RATE_20_GBPS  = 6,
581	IBV_RATE_30_GBPS  = 4,
582	IBV_RATE_40_GBPS  = 7,
583	IBV_RATE_60_GBPS  = 8,
584	IBV_RATE_80_GBPS  = 9,
585	IBV_RATE_120_GBPS = 10,
586	IBV_RATE_14_GBPS  = 11,
587	IBV_RATE_56_GBPS  = 12,
588	IBV_RATE_112_GBPS = 13,
589	IBV_RATE_168_GBPS = 14,
590	IBV_RATE_25_GBPS  = 15,
591	IBV_RATE_100_GBPS = 16,
592	IBV_RATE_200_GBPS = 17,
593	IBV_RATE_300_GBPS = 18
594};
595
596/**
597 * ibv_rate_to_mult - Convert the IB rate enum to a multiple of the
598 * base rate of 2.5 Gbit/sec.  For example, IBV_RATE_5_GBPS will be
599 * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
600 * @rate: rate to convert.
601 */
602int  __attribute_const ibv_rate_to_mult(enum ibv_rate rate);
603
604/**
605 * mult_to_ibv_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate enum.
606 * @mult: multiple to convert.
607 */
608enum ibv_rate __attribute_const mult_to_ibv_rate(int mult);
609
610/**
611 * ibv_rate_to_mbps - Convert the IB rate enum to Mbit/sec.
612 * For example, IBV_RATE_5_GBPS will return the value 5000.
613 * @rate: rate to convert.
614 */
615int __attribute_const ibv_rate_to_mbps(enum ibv_rate rate);
616
617/**
618 * mbps_to_ibv_rate - Convert a Mbit/sec value to an IB rate enum.
619 * @mbps: value to convert.
620 */
621enum ibv_rate __attribute_const mbps_to_ibv_rate(int mbps) __attribute_const;
622
623struct ibv_ah_attr {
624	struct ibv_global_route	grh;
625	uint16_t		dlid;
626	uint8_t			sl;
627	uint8_t			src_path_bits;
628	uint8_t			static_rate;
629	uint8_t			is_global;
630	uint8_t			port_num;
631};
632
633enum ibv_srq_attr_mask {
634	IBV_SRQ_MAX_WR	= 1 << 0,
635	IBV_SRQ_LIMIT	= 1 << 1
636};
637
638struct ibv_srq_attr {
639	uint32_t		max_wr;
640	uint32_t		max_sge;
641	uint32_t		srq_limit;
642};
643
644struct ibv_srq_init_attr {
645	void		       *srq_context;
646	struct ibv_srq_attr	attr;
647};
648
649enum ibv_srq_type {
650	IBV_SRQT_BASIC,
651	IBV_SRQT_XRC
652};
653
654enum ibv_srq_init_attr_mask {
655	IBV_SRQ_INIT_ATTR_TYPE		= 1 << 0,
656	IBV_SRQ_INIT_ATTR_PD		= 1 << 1,
657	IBV_SRQ_INIT_ATTR_XRCD		= 1 << 2,
658	IBV_SRQ_INIT_ATTR_CQ		= 1 << 3,
659	IBV_SRQ_INIT_ATTR_RESERVED	= 1 << 4
660};
661
662struct ibv_srq_init_attr_ex {
663	void		       *srq_context;
664	struct ibv_srq_attr	attr;
665
666	uint32_t		comp_mask;
667	enum ibv_srq_type	srq_type;
668	struct ibv_pd	       *pd;
669	struct ibv_xrcd	       *xrcd;
670	struct ibv_cq	       *cq;
671};
672
673enum ibv_wq_type {
674	IBV_WQT_RQ
675};
676
677enum ibv_wq_init_attr_mask {
678	IBV_WQ_INIT_ATTR_FLAGS		= 1 << 0,
679	IBV_WQ_INIT_ATTR_RESERVED	= 1 << 1,
680};
681
682enum ibv_wq_flags {
683	IBV_WQ_FLAGS_CVLAN_STRIPPING		= 1 << 0,
684	IBV_WQ_FLAGS_SCATTER_FCS		= 1 << 1,
685	IBV_WQ_FLAGS_RESERVED			= 1 << 2,
686};
687
688struct ibv_wq_init_attr {
689	void		       *wq_context;
690	enum ibv_wq_type	wq_type;
691	uint32_t		max_wr;
692	uint32_t		max_sge;
693	struct	ibv_pd	       *pd;
694	struct	ibv_cq	       *cq;
695	uint32_t		comp_mask; /* Use ibv_wq_init_attr_mask */
696	uint32_t		create_flags; /* use ibv_wq_flags */
697};
698
699enum ibv_wq_state {
700	IBV_WQS_RESET,
701	IBV_WQS_RDY,
702	IBV_WQS_ERR,
703	IBV_WQS_UNKNOWN
704};
705
706enum ibv_wq_attr_mask {
707	IBV_WQ_ATTR_STATE	= 1 << 0,
708	IBV_WQ_ATTR_CURR_STATE	= 1 << 1,
709	IBV_WQ_ATTR_FLAGS	= 1 << 2,
710	IBV_WQ_ATTR_RESERVED	= 1 << 3,
711};
712
713struct ibv_wq_attr {
714	/* enum ibv_wq_attr_mask */
715	uint32_t		attr_mask;
716	/* Move the WQ to this state */
717	enum	ibv_wq_state	wq_state;
718	/* Assume this is the current WQ state */
719	enum	ibv_wq_state	curr_wq_state;
720	uint32_t		flags; /* Use ibv_wq_flags */
721	uint32_t		flags_mask; /* Use ibv_wq_flags */
722};
723
724/*
725 * Receive Work Queue Indirection Table.
726 * It's used in order to distribute incoming packets between different
727 * Receive Work Queues. Associating Receive WQs with different CPU cores
728 * allows to workload the traffic between different CPU cores.
729 * The Indirection Table can contain only WQs of type IBV_WQT_RQ.
730*/
731struct ibv_rwq_ind_table {
732	struct ibv_context *context;
733	int ind_tbl_handle;
734	int ind_tbl_num;
735	uint32_t comp_mask;
736};
737
738enum ibv_ind_table_init_attr_mask {
739	IBV_CREATE_IND_TABLE_RESERVED = (1 << 0)
740};
741
742/*
743 * Receive Work Queue Indirection Table attributes
744 */
745struct ibv_rwq_ind_table_init_attr {
746	uint32_t log_ind_tbl_size;
747	/* Each entry is a pointer to a Receive Work Queue */
748	struct ibv_wq **ind_tbl;
749	uint32_t comp_mask;
750};
751
752enum ibv_qp_type {
753	IBV_QPT_RC = 2,
754	IBV_QPT_UC,
755	IBV_QPT_UD,
756	IBV_QPT_RAW_PACKET = 8,
757	IBV_QPT_XRC_SEND = 9,
758	IBV_QPT_XRC_RECV
759};
760
761struct ibv_qp_cap {
762	uint32_t		max_send_wr;
763	uint32_t		max_recv_wr;
764	uint32_t		max_send_sge;
765	uint32_t		max_recv_sge;
766	uint32_t		max_inline_data;
767};
768
769struct ibv_qp_init_attr {
770	void		       *qp_context;
771	struct ibv_cq	       *send_cq;
772	struct ibv_cq	       *recv_cq;
773	struct ibv_srq	       *srq;
774	struct ibv_qp_cap	cap;
775	enum ibv_qp_type	qp_type;
776	int			sq_sig_all;
777};
778
779enum ibv_qp_init_attr_mask {
780	IBV_QP_INIT_ATTR_PD		= 1 << 0,
781	IBV_QP_INIT_ATTR_XRCD		= 1 << 1,
782	IBV_QP_INIT_ATTR_CREATE_FLAGS	= 1 << 2,
783	IBV_QP_INIT_ATTR_MAX_TSO_HEADER = 1 << 3,
784	IBV_QP_INIT_ATTR_IND_TABLE	= 1 << 4,
785	IBV_QP_INIT_ATTR_RX_HASH	= 1 << 5,
786	IBV_QP_INIT_ATTR_RESERVED	= 1 << 6
787};
788
789enum ibv_qp_create_flags {
790	IBV_QP_CREATE_BLOCK_SELF_MCAST_LB	= 1 << 1,
791	IBV_QP_CREATE_SCATTER_FCS		= 1 << 8,
792	IBV_QP_CREATE_CVLAN_STRIPPING		= 1 << 9,
793};
794
795struct ibv_rx_hash_conf {
796	/* enum ibv_rx_hash_function_flags */
797	uint8_t	rx_hash_function;
798	uint8_t	rx_hash_key_len;
799	uint8_t	*rx_hash_key;
800	/* enum ibv_rx_hash_fields */
801	uint64_t	rx_hash_fields_mask;
802};
803
804struct ibv_qp_init_attr_ex {
805	void		       *qp_context;
806	struct ibv_cq	       *send_cq;
807	struct ibv_cq	       *recv_cq;
808	struct ibv_srq	       *srq;
809	struct ibv_qp_cap	cap;
810	enum ibv_qp_type	qp_type;
811	int			sq_sig_all;
812
813	uint32_t		comp_mask;
814	struct ibv_pd	       *pd;
815	struct ibv_xrcd	       *xrcd;
816	uint32_t                create_flags;
817	uint16_t		max_tso_header;
818	struct ibv_rwq_ind_table       *rwq_ind_tbl;
819	struct ibv_rx_hash_conf	rx_hash_conf;
820};
821
822enum ibv_qp_open_attr_mask {
823	IBV_QP_OPEN_ATTR_NUM		= 1 << 0,
824	IBV_QP_OPEN_ATTR_XRCD	        = 1 << 1,
825	IBV_QP_OPEN_ATTR_CONTEXT	= 1 << 2,
826	IBV_QP_OPEN_ATTR_TYPE		= 1 << 3,
827	IBV_QP_OPEN_ATTR_RESERVED	= 1 << 4
828};
829
830struct ibv_qp_open_attr {
831	uint32_t		comp_mask;
832	uint32_t		qp_num;
833	struct ibv_xrcd        *xrcd;
834	void		       *qp_context;
835	enum ibv_qp_type	qp_type;
836};
837
838enum ibv_qp_attr_mask {
839	IBV_QP_STATE			= 1 << 	0,
840	IBV_QP_CUR_STATE		= 1 << 	1,
841	IBV_QP_EN_SQD_ASYNC_NOTIFY	= 1 << 	2,
842	IBV_QP_ACCESS_FLAGS		= 1 << 	3,
843	IBV_QP_PKEY_INDEX		= 1 << 	4,
844	IBV_QP_PORT			= 1 << 	5,
845	IBV_QP_QKEY			= 1 << 	6,
846	IBV_QP_AV			= 1 << 	7,
847	IBV_QP_PATH_MTU			= 1 << 	8,
848	IBV_QP_TIMEOUT			= 1 << 	9,
849	IBV_QP_RETRY_CNT		= 1 << 10,
850	IBV_QP_RNR_RETRY		= 1 << 11,
851	IBV_QP_RQ_PSN			= 1 << 12,
852	IBV_QP_MAX_QP_RD_ATOMIC		= 1 << 13,
853	IBV_QP_ALT_PATH			= 1 << 14,
854	IBV_QP_MIN_RNR_TIMER		= 1 << 15,
855	IBV_QP_SQ_PSN			= 1 << 16,
856	IBV_QP_MAX_DEST_RD_ATOMIC	= 1 << 17,
857	IBV_QP_PATH_MIG_STATE		= 1 << 18,
858	IBV_QP_CAP			= 1 << 19,
859	IBV_QP_DEST_QPN			= 1 << 20,
860	IBV_QP_RATE_LIMIT		= 1 << 25,
861};
862
863enum ibv_qp_state {
864	IBV_QPS_RESET,
865	IBV_QPS_INIT,
866	IBV_QPS_RTR,
867	IBV_QPS_RTS,
868	IBV_QPS_SQD,
869	IBV_QPS_SQE,
870	IBV_QPS_ERR,
871	IBV_QPS_UNKNOWN
872};
873
874enum ibv_mig_state {
875	IBV_MIG_MIGRATED,
876	IBV_MIG_REARM,
877	IBV_MIG_ARMED
878};
879
880struct ibv_qp_attr {
881	enum ibv_qp_state	qp_state;
882	enum ibv_qp_state	cur_qp_state;
883	enum ibv_mtu		path_mtu;
884	enum ibv_mig_state	path_mig_state;
885	uint32_t		qkey;
886	uint32_t		rq_psn;
887	uint32_t		sq_psn;
888	uint32_t		dest_qp_num;
889	int			qp_access_flags;
890	struct ibv_qp_cap	cap;
891	struct ibv_ah_attr	ah_attr;
892	struct ibv_ah_attr	alt_ah_attr;
893	uint16_t		pkey_index;
894	uint16_t		alt_pkey_index;
895	uint8_t			en_sqd_async_notify;
896	uint8_t			sq_draining;
897	uint8_t			max_rd_atomic;
898	uint8_t			max_dest_rd_atomic;
899	uint8_t			min_rnr_timer;
900	uint8_t			port_num;
901	uint8_t			timeout;
902	uint8_t			retry_cnt;
903	uint8_t			rnr_retry;
904	uint8_t			alt_port_num;
905	uint8_t			alt_timeout;
906	uint32_t		rate_limit;
907};
908
909enum ibv_wr_opcode {
910	IBV_WR_RDMA_WRITE,
911	IBV_WR_RDMA_WRITE_WITH_IMM,
912	IBV_WR_SEND,
913	IBV_WR_SEND_WITH_IMM,
914	IBV_WR_RDMA_READ,
915	IBV_WR_ATOMIC_CMP_AND_SWP,
916	IBV_WR_ATOMIC_FETCH_AND_ADD,
917	IBV_WR_LOCAL_INV,
918	IBV_WR_BIND_MW,
919	IBV_WR_SEND_WITH_INV,
920	IBV_WR_TSO,
921};
922
923enum ibv_send_flags {
924	IBV_SEND_FENCE		= 1 << 0,
925	IBV_SEND_SIGNALED	= 1 << 1,
926	IBV_SEND_SOLICITED	= 1 << 2,
927	IBV_SEND_INLINE		= 1 << 3,
928	IBV_SEND_IP_CSUM	= 1 << 4
929};
930
931struct ibv_sge {
932	uint64_t		addr;
933	uint32_t		length;
934	uint32_t		lkey;
935};
936
937struct ibv_send_wr {
938	uint64_t		wr_id;
939	struct ibv_send_wr     *next;
940	struct ibv_sge	       *sg_list;
941	int			num_sge;
942	enum ibv_wr_opcode	opcode;
943	int			send_flags;
944	__be32			imm_data;
945	union {
946		struct {
947			uint64_t	remote_addr;
948			uint32_t	rkey;
949		} rdma;
950		struct {
951			uint64_t	remote_addr;
952			uint64_t	compare_add;
953			uint64_t	swap;
954			uint32_t	rkey;
955		} atomic;
956		struct {
957			struct ibv_ah  *ah;
958			uint32_t	remote_qpn;
959			uint32_t	remote_qkey;
960		} ud;
961	} wr;
962	union {
963		struct {
964			uint32_t    remote_srqn;
965		} xrc;
966	} qp_type;
967	union {
968		struct {
969			struct ibv_mw	*mw;
970			uint32_t		rkey;
971			struct ibv_mw_bind_info	bind_info;
972		} bind_mw;
973		struct {
974			void		       *hdr;
975			uint16_t		hdr_sz;
976			uint16_t		mss;
977		} tso;
978	};
979};
980
981struct ibv_recv_wr {
982	uint64_t		wr_id;
983	struct ibv_recv_wr     *next;
984	struct ibv_sge	       *sg_list;
985	int			num_sge;
986};
987
988struct ibv_mw_bind {
989	uint64_t		wr_id;
990	int			send_flags;
991	struct ibv_mw_bind_info bind_info;
992};
993
994struct ibv_srq {
995	struct ibv_context     *context;
996	void		       *srq_context;
997	struct ibv_pd	       *pd;
998	uint32_t		handle;
999
1000	pthread_mutex_t		mutex;
1001	pthread_cond_t		cond;
1002	uint32_t		events_completed;
1003};
1004
1005/*
1006 * Work Queue. QP can be created without internal WQs "packaged" inside it,
1007 * this QP can be configured to use "external" WQ object as its
1008 * receive/send queue.
1009 * WQ associated (many to one) with Completion Queue it owns WQ properties
1010 * (PD, WQ size etc).
1011 * WQ of type IBV_WQT_RQ:
1012 * - Contains receive WQEs, in this case its PD serves as scatter as well.
1013 * - Exposes post receive function to be used to post a list of work
1014 *   requests (WRs) to its receive queue.
1015 */
1016struct ibv_wq {
1017	struct ibv_context     *context;
1018	void		       *wq_context;
1019	struct	ibv_pd	       *pd;
1020	struct	ibv_cq	       *cq;
1021	uint32_t		wq_num;
1022	uint32_t		handle;
1023	enum ibv_wq_state       state;
1024	enum ibv_wq_type	wq_type;
1025	int (*post_recv)(struct ibv_wq *current,
1026			 struct ibv_recv_wr *recv_wr,
1027			 struct ibv_recv_wr **bad_recv_wr);
1028	pthread_mutex_t		mutex;
1029	pthread_cond_t		cond;
1030	uint32_t		events_completed;
1031	uint32_t		comp_mask;
1032};
1033
1034struct ibv_qp {
1035	struct ibv_context     *context;
1036	void		       *qp_context;
1037	struct ibv_pd	       *pd;
1038	struct ibv_cq	       *send_cq;
1039	struct ibv_cq	       *recv_cq;
1040	struct ibv_srq	       *srq;
1041	uint32_t		handle;
1042	uint32_t		qp_num;
1043	enum ibv_qp_state       state;
1044	enum ibv_qp_type	qp_type;
1045
1046	pthread_mutex_t		mutex;
1047	pthread_cond_t		cond;
1048	uint32_t		events_completed;
1049};
1050
1051struct ibv_comp_channel {
1052	struct ibv_context     *context;
1053	int			fd;
1054	int			refcnt;
1055};
1056
1057struct ibv_cq {
1058	struct ibv_context     *context;
1059	struct ibv_comp_channel *channel;
1060	void		       *cq_context;
1061	uint32_t		handle;
1062	int			cqe;
1063
1064	pthread_mutex_t		mutex;
1065	pthread_cond_t		cond;
1066	uint32_t		comp_events_completed;
1067	uint32_t		async_events_completed;
1068};
1069
1070struct ibv_poll_cq_attr {
1071	uint32_t comp_mask;
1072};
1073
1074struct ibv_cq_ex {
1075	struct ibv_context     *context;
1076	struct ibv_comp_channel *channel;
1077	void		       *cq_context;
1078	uint32_t		handle;
1079	int			cqe;
1080
1081	pthread_mutex_t		mutex;
1082	pthread_cond_t		cond;
1083	uint32_t		comp_events_completed;
1084	uint32_t		async_events_completed;
1085
1086	uint32_t		comp_mask;
1087	enum ibv_wc_status status;
1088	uint64_t wr_id;
1089	int (*start_poll)(struct ibv_cq_ex *current,
1090			     struct ibv_poll_cq_attr *attr);
1091	int (*next_poll)(struct ibv_cq_ex *current);
1092	void (*end_poll)(struct ibv_cq_ex *current);
1093	enum ibv_wc_opcode (*read_opcode)(struct ibv_cq_ex *current);
1094	uint32_t (*read_vendor_err)(struct ibv_cq_ex *current);
1095	uint32_t (*read_byte_len)(struct ibv_cq_ex *current);
1096	uint32_t (*read_imm_data)(struct ibv_cq_ex *current);
1097	uint32_t (*read_qp_num)(struct ibv_cq_ex *current);
1098	uint32_t (*read_src_qp)(struct ibv_cq_ex *current);
1099	int (*read_wc_flags)(struct ibv_cq_ex *current);
1100	uint32_t (*read_slid)(struct ibv_cq_ex *current);
1101	uint8_t (*read_sl)(struct ibv_cq_ex *current);
1102	uint8_t (*read_dlid_path_bits)(struct ibv_cq_ex *current);
1103	uint64_t (*read_completion_ts)(struct ibv_cq_ex *current);
1104	uint16_t (*read_cvlan)(struct ibv_cq_ex *current);
1105	uint32_t (*read_flow_tag)(struct ibv_cq_ex *current);
1106};
1107
1108static inline struct ibv_cq *ibv_cq_ex_to_cq(struct ibv_cq_ex *cq)
1109{
1110	return (struct ibv_cq *)cq;
1111}
1112
1113static inline int ibv_start_poll(struct ibv_cq_ex *cq,
1114				    struct ibv_poll_cq_attr *attr)
1115{
1116	return cq->start_poll(cq, attr);
1117}
1118
1119static inline int ibv_next_poll(struct ibv_cq_ex *cq)
1120{
1121	return cq->next_poll(cq);
1122}
1123
1124static inline void ibv_end_poll(struct ibv_cq_ex *cq)
1125{
1126	cq->end_poll(cq);
1127}
1128
1129static inline enum ibv_wc_opcode ibv_wc_read_opcode(struct ibv_cq_ex *cq)
1130{
1131	return cq->read_opcode(cq);
1132}
1133
1134static inline uint32_t ibv_wc_read_vendor_err(struct ibv_cq_ex *cq)
1135{
1136	return cq->read_vendor_err(cq);
1137}
1138
1139static inline uint32_t ibv_wc_read_byte_len(struct ibv_cq_ex *cq)
1140{
1141	return cq->read_byte_len(cq);
1142}
1143
1144static inline uint32_t ibv_wc_read_imm_data(struct ibv_cq_ex *cq)
1145{
1146	return cq->read_imm_data(cq);
1147}
1148
1149static inline uint32_t ibv_wc_read_qp_num(struct ibv_cq_ex *cq)
1150{
1151	return cq->read_qp_num(cq);
1152}
1153
1154static inline uint32_t ibv_wc_read_src_qp(struct ibv_cq_ex *cq)
1155{
1156	return cq->read_src_qp(cq);
1157}
1158
1159static inline int ibv_wc_read_wc_flags(struct ibv_cq_ex *cq)
1160{
1161	return cq->read_wc_flags(cq);
1162}
1163
1164static inline uint32_t ibv_wc_read_slid(struct ibv_cq_ex *cq)
1165{
1166	return cq->read_slid(cq);
1167}
1168
1169static inline uint8_t ibv_wc_read_sl(struct ibv_cq_ex *cq)
1170{
1171	return cq->read_sl(cq);
1172}
1173
1174static inline uint8_t ibv_wc_read_dlid_path_bits(struct ibv_cq_ex *cq)
1175{
1176	return cq->read_dlid_path_bits(cq);
1177}
1178
1179static inline uint64_t ibv_wc_read_completion_ts(struct ibv_cq_ex *cq)
1180{
1181	return cq->read_completion_ts(cq);
1182}
1183
1184static inline uint16_t ibv_wc_read_cvlan(struct ibv_cq_ex *cq)
1185{
1186	return cq->read_cvlan(cq);
1187}
1188
1189static inline uint32_t ibv_wc_read_flow_tag(struct ibv_cq_ex *cq)
1190{
1191	return cq->read_flow_tag(cq);
1192}
1193
1194static inline int ibv_post_wq_recv(struct ibv_wq *wq,
1195				   struct ibv_recv_wr *recv_wr,
1196				   struct ibv_recv_wr **bad_recv_wr)
1197{
1198	return wq->post_recv(wq, recv_wr, bad_recv_wr);
1199}
1200
1201struct ibv_ah {
1202	struct ibv_context     *context;
1203	struct ibv_pd	       *pd;
1204	uint32_t		handle;
1205};
1206
1207enum ibv_flow_flags {
1208	IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1 << 0,
1209	IBV_FLOW_ATTR_FLAGS_DONT_TRAP = 1 << 1,
1210};
1211
1212enum ibv_flow_attr_type {
1213	/* steering according to rule specifications */
1214	IBV_FLOW_ATTR_NORMAL		= 0x0,
1215	/* default unicast and multicast rule -
1216	 * receive all Eth traffic which isn't steered to any QP
1217	 */
1218	IBV_FLOW_ATTR_ALL_DEFAULT	= 0x1,
1219	/* default multicast rule -
1220	 * receive all Eth multicast traffic which isn't steered to any QP
1221	 */
1222	IBV_FLOW_ATTR_MC_DEFAULT	= 0x2,
1223	/* sniffer rule - receive all port traffic */
1224	IBV_FLOW_ATTR_SNIFFER		= 0x3,
1225};
1226
1227enum ibv_flow_spec_type {
1228	IBV_FLOW_SPEC_ETH		= 0x20,
1229	IBV_FLOW_SPEC_IPV4		= 0x30,
1230	IBV_FLOW_SPEC_IPV6		= 0x31,
1231	IBV_FLOW_SPEC_IPV4_EXT		= 0x32,
1232	IBV_FLOW_SPEC_TCP		= 0x40,
1233	IBV_FLOW_SPEC_UDP		= 0x41,
1234	IBV_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
1235	IBV_FLOW_SPEC_INNER		= 0x100,
1236	IBV_FLOW_SPEC_ACTION_TAG	= 0x1000,
1237	IBV_FLOW_SPEC_ACTION_DROP	= 0x1001,
1238};
1239
1240struct ibv_flow_eth_filter {
1241	uint8_t		dst_mac[6];
1242	uint8_t		src_mac[6];
1243	uint16_t	ether_type;
1244	/*
1245	 * same layout as 802.1q: prio 3, cfi 1, vlan id 12
1246	 */
1247	uint16_t	vlan_tag;
1248};
1249
1250struct ibv_flow_spec_eth {
1251	enum ibv_flow_spec_type  type;
1252	uint16_t  size;
1253	struct ibv_flow_eth_filter val;
1254	struct ibv_flow_eth_filter mask;
1255};
1256
1257struct ibv_flow_ipv4_filter {
1258	uint32_t src_ip;
1259	uint32_t dst_ip;
1260};
1261
1262struct ibv_flow_spec_ipv4 {
1263	enum ibv_flow_spec_type  type;
1264	uint16_t  size;
1265	struct ibv_flow_ipv4_filter val;
1266	struct ibv_flow_ipv4_filter mask;
1267};
1268
1269struct ibv_flow_ipv4_ext_filter {
1270	uint32_t src_ip;
1271	uint32_t dst_ip;
1272	uint8_t  proto;
1273	uint8_t  tos;
1274	uint8_t  ttl;
1275	uint8_t  flags;
1276};
1277
1278struct ibv_flow_spec_ipv4_ext {
1279	enum ibv_flow_spec_type  type;
1280	uint16_t  size;
1281	struct ibv_flow_ipv4_ext_filter val;
1282	struct ibv_flow_ipv4_ext_filter mask;
1283};
1284
1285struct ibv_flow_ipv6_filter {
1286	uint8_t  src_ip[16];
1287	uint8_t  dst_ip[16];
1288	uint32_t flow_label;
1289	uint8_t  next_hdr;
1290	uint8_t  traffic_class;
1291	uint8_t  hop_limit;
1292};
1293
1294struct ibv_flow_spec_ipv6 {
1295	enum ibv_flow_spec_type  type;
1296	uint16_t  size;
1297	struct ibv_flow_ipv6_filter val;
1298	struct ibv_flow_ipv6_filter mask;
1299};
1300
1301struct ibv_flow_tcp_udp_filter {
1302	uint16_t dst_port;
1303	uint16_t src_port;
1304};
1305
1306struct ibv_flow_spec_tcp_udp {
1307	enum ibv_flow_spec_type  type;
1308	uint16_t  size;
1309	struct ibv_flow_tcp_udp_filter val;
1310	struct ibv_flow_tcp_udp_filter mask;
1311};
1312
1313struct ibv_flow_tunnel_filter {
1314	uint32_t tunnel_id;
1315};
1316
1317struct ibv_flow_spec_tunnel {
1318	enum ibv_flow_spec_type  type;
1319	uint16_t  size;
1320	struct ibv_flow_tunnel_filter val;
1321	struct ibv_flow_tunnel_filter mask;
1322};
1323
1324struct ibv_flow_spec_action_tag {
1325	enum ibv_flow_spec_type  type;
1326	uint16_t  size;
1327	uint32_t  tag_id;
1328};
1329
1330struct ibv_flow_spec_action_drop {
1331	enum ibv_flow_spec_type  type;
1332	uint16_t  size;
1333};
1334
1335struct ibv_flow_spec {
1336	union {
1337		struct {
1338			enum ibv_flow_spec_type	type;
1339			uint16_t		size;
1340		} hdr;
1341		struct ibv_flow_spec_eth eth;
1342		struct ibv_flow_spec_ipv4 ipv4;
1343		struct ibv_flow_spec_tcp_udp tcp_udp;
1344		struct ibv_flow_spec_ipv4_ext ipv4_ext;
1345		struct ibv_flow_spec_ipv6 ipv6;
1346		struct ibv_flow_spec_tunnel tunnel;
1347		struct ibv_flow_spec_action_tag flow_tag;
1348		struct ibv_flow_spec_action_drop drop;
1349	};
1350};
1351
1352struct ibv_flow_attr {
1353	uint32_t comp_mask;
1354	enum ibv_flow_attr_type type;
1355	uint16_t size;
1356	uint16_t priority;
1357	uint8_t num_of_specs;
1358	uint8_t port;
1359	uint32_t flags;
1360	/* Following are the optional layers according to user request
1361	 * struct ibv_flow_spec_xxx [L2]
1362	 * struct ibv_flow_spec_yyy [L3/L4]
1363	 */
1364};
1365
1366struct ibv_flow {
1367	uint32_t	   comp_mask;
1368	struct ibv_context *context;
1369	uint32_t	   handle;
1370};
1371
1372struct ibv_device;
1373struct ibv_context;
1374
1375/* Obsolete, never used, do not touch */
1376struct _ibv_device_ops {
1377	struct ibv_context *	(*_dummy1)(struct ibv_device *device, int cmd_fd);
1378	void			(*_dummy2)(struct ibv_context *context);
1379};
1380
1381enum {
1382	IBV_SYSFS_NAME_MAX	= 64,
1383	IBV_SYSFS_PATH_MAX	= 256
1384};
1385
1386struct ibv_device {
1387	struct _ibv_device_ops	_ops;
1388	enum ibv_node_type	node_type;
1389	enum ibv_transport_type	transport_type;
1390	/* Name of underlying kernel IB device, eg "mthca0" */
1391	char			name[IBV_SYSFS_NAME_MAX];
1392	/* Name of uverbs device, eg "uverbs0" */
1393	char			dev_name[IBV_SYSFS_NAME_MAX];
1394	/* Path to infiniband_verbs class device in sysfs */
1395	char			dev_path[IBV_SYSFS_PATH_MAX];
1396	/* Path to infiniband class device in sysfs */
1397	char			ibdev_path[IBV_SYSFS_PATH_MAX];
1398};
1399
1400struct ibv_context_ops {
1401	int			(*query_device)(struct ibv_context *context,
1402					      struct ibv_device_attr *device_attr);
1403	int			(*query_port)(struct ibv_context *context, uint8_t port_num,
1404					      struct ibv_port_attr *port_attr);
1405	struct ibv_pd *		(*alloc_pd)(struct ibv_context *context);
1406	int			(*dealloc_pd)(struct ibv_pd *pd);
1407	struct ibv_mr *		(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
1408					  int access);
1409	int			(*rereg_mr)(struct ibv_mr *mr,
1410					    int flags,
1411					    struct ibv_pd *pd, void *addr,
1412					    size_t length,
1413					    int access);
1414	int			(*dereg_mr)(struct ibv_mr *mr);
1415	struct ibv_mw *		(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
1416	int			(*bind_mw)(struct ibv_qp *qp, struct ibv_mw *mw,
1417					   struct ibv_mw_bind *mw_bind);
1418	int			(*dealloc_mw)(struct ibv_mw *mw);
1419	struct ibv_cq *		(*create_cq)(struct ibv_context *context, int cqe,
1420					     struct ibv_comp_channel *channel,
1421					     int comp_vector);
1422	int			(*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc);
1423	int			(*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
1424	void			(*cq_event)(struct ibv_cq *cq);
1425	int			(*resize_cq)(struct ibv_cq *cq, int cqe);
1426	int			(*destroy_cq)(struct ibv_cq *cq);
1427	struct ibv_srq *	(*create_srq)(struct ibv_pd *pd,
1428					      struct ibv_srq_init_attr *srq_init_attr);
1429	int			(*modify_srq)(struct ibv_srq *srq,
1430					      struct ibv_srq_attr *srq_attr,
1431					      int srq_attr_mask);
1432	int			(*query_srq)(struct ibv_srq *srq,
1433					     struct ibv_srq_attr *srq_attr);
1434	int			(*destroy_srq)(struct ibv_srq *srq);
1435	int			(*post_srq_recv)(struct ibv_srq *srq,
1436						 struct ibv_recv_wr *recv_wr,
1437						 struct ibv_recv_wr **bad_recv_wr);
1438	struct ibv_qp *		(*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
1439	int			(*query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1440					    int attr_mask,
1441					    struct ibv_qp_init_attr *init_attr);
1442	int			(*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1443					     int attr_mask);
1444	int			(*destroy_qp)(struct ibv_qp *qp);
1445	int			(*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr,
1446					     struct ibv_send_wr **bad_wr);
1447	int			(*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr,
1448					     struct ibv_recv_wr **bad_wr);
1449	struct ibv_ah *		(*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr);
1450	int			(*destroy_ah)(struct ibv_ah *ah);
1451	int			(*attach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid,
1452						uint16_t lid);
1453	int			(*detach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid,
1454						uint16_t lid);
1455	void			(*async_event)(struct ibv_async_event *event);
1456};
1457
1458struct ibv_context {
1459	struct ibv_device      *device;
1460	struct ibv_context_ops	ops;
1461	int			cmd_fd;
1462	int			async_fd;
1463	int			num_comp_vectors;
1464	pthread_mutex_t		mutex;
1465	void		       *abi_compat;
1466};
1467
1468enum ibv_cq_init_attr_mask {
1469	IBV_CQ_INIT_ATTR_MASK_FLAGS	= 1 << 0,
1470	IBV_CQ_INIT_ATTR_MASK_RESERVED	= 1 << 1
1471};
1472
1473enum ibv_create_cq_attr_flags {
1474	IBV_CREATE_CQ_ATTR_SINGLE_THREADED = 1 << 0,
1475	IBV_CREATE_CQ_ATTR_RESERVED = 1 << 1,
1476};
1477
1478struct ibv_cq_init_attr_ex {
1479	/* Minimum number of entries required for CQ */
1480	uint32_t			cqe;
1481	/* Consumer-supplied context returned for completion events */
1482	void			*cq_context;
1483	/* Completion channel where completion events will be queued.
1484	 * May be NULL if completion events will not be used.
1485	 */
1486	struct ibv_comp_channel *channel;
1487	/* Completion vector used to signal completion events.
1488	 *  Must be < context->num_comp_vectors.
1489	 */
1490	uint32_t			comp_vector;
1491	 /* Or'ed bit of enum ibv_create_cq_wc_flags. */
1492	uint64_t		wc_flags;
1493	/* compatibility mask (extended verb). Or'd flags of
1494	 * enum ibv_cq_init_attr_mask
1495	 */
1496	uint32_t		comp_mask;
1497	/* create cq attr flags - one or more flags from
1498	 * enum ibv_create_cq_attr_flags
1499	 */
1500	uint32_t		flags;
1501};
1502
1503enum ibv_values_mask {
1504	IBV_VALUES_MASK_RAW_CLOCK	= 1 << 0,
1505	IBV_VALUES_MASK_RESERVED	= 1 << 1
1506};
1507
1508struct ibv_values_ex {
1509	uint32_t	comp_mask;
1510	struct timespec raw_clock;
1511};
1512
1513enum verbs_context_mask {
1514	VERBS_CONTEXT_XRCD	= 1 << 0,
1515	VERBS_CONTEXT_SRQ	= 1 << 1,
1516	VERBS_CONTEXT_QP	= 1 << 2,
1517	VERBS_CONTEXT_CREATE_FLOW = 1 << 3,
1518	VERBS_CONTEXT_DESTROY_FLOW = 1 << 4,
1519	VERBS_CONTEXT_RESERVED	= 1 << 5
1520};
1521
1522struct verbs_context {
1523	/*  "grows up" - new fields go here */
1524	int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table);
1525	struct ibv_rwq_ind_table *(*create_rwq_ind_table)(struct ibv_context *context,
1526							  struct ibv_rwq_ind_table_init_attr *init_attr);
1527	int (*destroy_wq)(struct ibv_wq *wq);
1528	int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr);
1529	struct ibv_wq * (*create_wq)(struct ibv_context *context,
1530				     struct ibv_wq_init_attr *wq_init_attr);
1531	int (*query_rt_values)(struct ibv_context *context,
1532			       struct ibv_values_ex *values);
1533	struct ibv_cq_ex *(*create_cq_ex)(struct ibv_context *context,
1534					  struct ibv_cq_init_attr_ex *init_attr);
1535	struct verbs_ex_private *priv;
1536	int (*query_device_ex)(struct ibv_context *context,
1537			       const struct ibv_query_device_ex_input *input,
1538			       struct ibv_device_attr_ex *attr,
1539			       size_t attr_size);
1540	int (*ibv_destroy_flow) (struct ibv_flow *flow);
1541	void (*ABI_placeholder2) (void); /* DO NOT COPY THIS GARBAGE */
1542	struct ibv_flow * (*ibv_create_flow) (struct ibv_qp *qp,
1543					      struct ibv_flow_attr *flow_attr);
1544	void (*ABI_placeholder1) (void); /* DO NOT COPY THIS GARBAGE */
1545	struct ibv_qp *(*open_qp)(struct ibv_context *context,
1546			struct ibv_qp_open_attr *attr);
1547	struct ibv_qp *(*create_qp_ex)(struct ibv_context *context,
1548			struct ibv_qp_init_attr_ex *qp_init_attr_ex);
1549	int (*get_srq_num)(struct ibv_srq *srq, uint32_t *srq_num);
1550	struct ibv_srq *	(*create_srq_ex)(struct ibv_context *context,
1551						 struct ibv_srq_init_attr_ex *srq_init_attr_ex);
1552	struct ibv_xrcd *	(*open_xrcd)(struct ibv_context *context,
1553					     struct ibv_xrcd_init_attr *xrcd_init_attr);
1554	int			(*close_xrcd)(struct ibv_xrcd *xrcd);
1555	uint64_t has_comp_mask;
1556	size_t   sz;			/* Must be immediately before struct ibv_context */
1557	struct ibv_context context;	/* Must be last field in the struct */
1558};
1559
1560static inline struct verbs_context *verbs_get_ctx(struct ibv_context *ctx)
1561{
1562	return (ctx->abi_compat != __VERBS_ABI_IS_EXTENDED) ?
1563		NULL : container_of(ctx, struct verbs_context, context);
1564}
1565
1566#define verbs_get_ctx_op(ctx, op) ({ \
1567	struct verbs_context *__vctx = verbs_get_ctx(ctx); \
1568	(!__vctx || (__vctx->sz < sizeof(*__vctx) - offsetof(struct verbs_context, op)) || \
1569	 !__vctx->op) ? NULL : __vctx; })
1570
1571#define verbs_set_ctx_op(_vctx, op, ptr) ({ \
1572	struct verbs_context *vctx = _vctx; \
1573	if (vctx && (vctx->sz >= sizeof(*vctx) - offsetof(struct verbs_context, op))) \
1574		vctx->op = ptr; })
1575
1576/**
1577 * ibv_get_device_list - Get list of IB devices currently available
1578 * @num_devices: optional.  if non-NULL, set to the number of devices
1579 * returned in the array.
1580 *
1581 * Return a NULL-terminated array of IB devices.  The array can be
1582 * released with ibv_free_device_list().
1583 */
1584struct ibv_device **ibv_get_device_list(int *num_devices);
1585
1586/**
1587 * ibv_free_device_list - Free list from ibv_get_device_list()
1588 *
1589 * Free an array of devices returned from ibv_get_device_list().  Once
1590 * the array is freed, pointers to devices that were not opened with
1591 * ibv_open_device() are no longer valid.  Client code must open all
1592 * devices it intends to use before calling ibv_free_device_list().
1593 */
1594void ibv_free_device_list(struct ibv_device **list);
1595
1596/**
1597 * ibv_get_device_name - Return kernel device name
1598 */
1599const char *ibv_get_device_name(struct ibv_device *device);
1600
1601/**
1602 * ibv_get_device_guid - Return device's node GUID
1603 */
1604__be64 ibv_get_device_guid(struct ibv_device *device);
1605
1606/**
1607 * ibv_open_device - Initialize device for use
1608 */
1609struct ibv_context *ibv_open_device(struct ibv_device *device);
1610
1611/**
1612 * ibv_close_device - Release device
1613 */
1614int ibv_close_device(struct ibv_context *context);
1615
1616/**
1617 * ibv_get_async_event - Get next async event
1618 * @event: Pointer to use to return async event
1619 *
1620 * All async events returned by ibv_get_async_event() must eventually
1621 * be acknowledged with ibv_ack_async_event().
1622 */
1623int ibv_get_async_event(struct ibv_context *context,
1624			struct ibv_async_event *event);
1625
1626/**
1627 * ibv_ack_async_event - Acknowledge an async event
1628 * @event: Event to be acknowledged.
1629 *
1630 * All async events which are returned by ibv_get_async_event() must
1631 * be acknowledged.  To avoid races, destroying an object (CQ, SRQ or
1632 * QP) will wait for all affiliated events to be acknowledged, so
1633 * there should be a one-to-one correspondence between acks and
1634 * successful gets.
1635 */
1636void ibv_ack_async_event(struct ibv_async_event *event);
1637
1638/**
1639 * ibv_query_device - Get device properties
1640 */
1641int ibv_query_device(struct ibv_context *context,
1642		     struct ibv_device_attr *device_attr);
1643
1644/**
1645 * ibv_query_port - Get port properties
1646 */
1647int ibv_query_port(struct ibv_context *context, uint8_t port_num,
1648		   struct ibv_port_attr *port_attr);
1649
1650static inline int ___ibv_query_port(struct ibv_context *context,
1651				    uint8_t port_num,
1652				    struct ibv_port_attr *port_attr)
1653{
1654	/* For compatibility when running with old libibverbs */
1655	port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
1656	port_attr->reserved   = 0;
1657
1658	return ibv_query_port(context, port_num, port_attr);
1659}
1660
1661#define ibv_query_port(context, port_num, port_attr) \
1662	___ibv_query_port(context, port_num, port_attr)
1663
1664/**
1665 * ibv_query_gid - Get a GID table entry
1666 */
1667int ibv_query_gid(struct ibv_context *context, uint8_t port_num,
1668		  int index, union ibv_gid *gid);
1669
1670/**
1671 * ibv_query_pkey - Get a P_Key table entry
1672 */
1673int ibv_query_pkey(struct ibv_context *context, uint8_t port_num,
1674		   int index, __be16 *pkey);
1675
1676/**
1677 * ibv_alloc_pd - Allocate a protection domain
1678 */
1679struct ibv_pd *ibv_alloc_pd(struct ibv_context *context);
1680
1681/**
1682 * ibv_dealloc_pd - Free a protection domain
1683 */
1684int ibv_dealloc_pd(struct ibv_pd *pd);
1685
1686static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp,
1687					       struct ibv_flow_attr *flow)
1688{
1689	struct verbs_context *vctx = verbs_get_ctx_op(qp->context,
1690						      ibv_create_flow);
1691	if (!vctx || !vctx->ibv_create_flow) {
1692		errno = ENOSYS;
1693		return NULL;
1694	}
1695
1696	return vctx->ibv_create_flow(qp, flow);
1697}
1698
1699static inline int ibv_destroy_flow(struct ibv_flow *flow_id)
1700{
1701	struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context,
1702						      ibv_destroy_flow);
1703	if (!vctx || !vctx->ibv_destroy_flow)
1704		return -ENOSYS;
1705	return vctx->ibv_destroy_flow(flow_id);
1706}
1707
1708/**
1709 * ibv_open_xrcd - Open an extended connection domain
1710 */
1711static inline struct ibv_xrcd *
1712ibv_open_xrcd(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr)
1713{
1714	struct verbs_context *vctx = verbs_get_ctx_op(context, open_xrcd);
1715	if (!vctx) {
1716		errno = ENOSYS;
1717		return NULL;
1718	}
1719	return vctx->open_xrcd(context, xrcd_init_attr);
1720}
1721
1722/**
1723 * ibv_close_xrcd - Close an extended connection domain
1724 */
1725static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
1726{
1727	struct verbs_context *vctx = verbs_get_ctx(xrcd->context);
1728	return vctx->close_xrcd(xrcd);
1729}
1730
1731/**
1732 * ibv_reg_mr - Register a memory region
1733 */
1734struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
1735			  size_t length, int access);
1736
1737
1738enum ibv_rereg_mr_err_code {
1739	/* Old MR is valid, invalid input */
1740	IBV_REREG_MR_ERR_INPUT = -1,
1741	/* Old MR is valid, failed via don't fork on new address range */
1742	IBV_REREG_MR_ERR_DONT_FORK_NEW = -2,
1743	/* New MR is valid, failed via do fork on old address range */
1744	IBV_REREG_MR_ERR_DO_FORK_OLD = -3,
1745	/* MR shouldn't be used, command error */
1746	IBV_REREG_MR_ERR_CMD = -4,
1747	/* MR shouldn't be used, command error, invalid fork state on new address range */
1748	IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW = -5,
1749};
1750
1751/**
1752 * ibv_rereg_mr - Re-Register a memory region
1753 */
1754int ibv_rereg_mr(struct ibv_mr *mr, int flags,
1755		 struct ibv_pd *pd, void *addr,
1756		 size_t length, int access);
1757/**
1758 * ibv_dereg_mr - Deregister a memory region
1759 */
1760int ibv_dereg_mr(struct ibv_mr *mr);
1761
1762/**
1763 * ibv_alloc_mw - Allocate a memory window
1764 */
1765static inline struct ibv_mw *ibv_alloc_mw(struct ibv_pd *pd,
1766					  enum ibv_mw_type type)
1767{
1768	struct ibv_mw *mw;
1769
1770	if (!pd->context->ops.alloc_mw) {
1771		errno = ENOSYS;
1772		return NULL;
1773	}
1774
1775	mw = pd->context->ops.alloc_mw(pd, type);
1776	return mw;
1777}
1778
1779/**
1780 * ibv_dealloc_mw - Free a memory window
1781 */
1782static inline int ibv_dealloc_mw(struct ibv_mw *mw)
1783{
1784	return mw->context->ops.dealloc_mw(mw);
1785}
1786
1787/**
1788 * ibv_inc_rkey - Increase the 8 lsb in the given rkey
1789 */
1790static inline uint32_t ibv_inc_rkey(uint32_t rkey)
1791{
1792	const uint32_t mask = 0x000000ff;
1793	uint8_t newtag = (uint8_t)((rkey + 1) & mask);
1794
1795	return (rkey & ~mask) | newtag;
1796}
1797
1798/**
1799 * ibv_bind_mw - Bind a memory window to a region
1800 */
1801static inline int ibv_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
1802			      struct ibv_mw_bind *mw_bind)
1803{
1804	if (mw->type != IBV_MW_TYPE_1)
1805		return EINVAL;
1806
1807	return mw->context->ops.bind_mw(qp, mw, mw_bind);
1808}
1809
1810/**
1811 * ibv_create_comp_channel - Create a completion event channel
1812 */
1813struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context);
1814
1815/**
1816 * ibv_destroy_comp_channel - Destroy a completion event channel
1817 */
1818int ibv_destroy_comp_channel(struct ibv_comp_channel *channel);
1819
1820/**
1821 * ibv_create_cq - Create a completion queue
1822 * @context - Context CQ will be attached to
1823 * @cqe - Minimum number of entries required for CQ
1824 * @cq_context - Consumer-supplied context returned for completion events
1825 * @channel - Completion channel where completion events will be queued.
1826 *     May be NULL if completion events will not be used.
1827 * @comp_vector - Completion vector used to signal completion events.
1828 *     Must be >= 0 and < context->num_comp_vectors.
1829 */
1830struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
1831			     void *cq_context,
1832			     struct ibv_comp_channel *channel,
1833			     int comp_vector);
1834
1835/**
1836 * ibv_create_cq_ex - Create a completion queue
1837 * @context - Context CQ will be attached to
1838 * @cq_attr - Attributes to create the CQ with
1839 */
1840static inline
1841struct ibv_cq_ex *ibv_create_cq_ex(struct ibv_context *context,
1842				   struct ibv_cq_init_attr_ex *cq_attr)
1843{
1844	struct verbs_context *vctx = verbs_get_ctx_op(context, create_cq_ex);
1845
1846	if (!vctx) {
1847		errno = ENOSYS;
1848		return NULL;
1849	}
1850
1851	if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1)) {
1852		errno = EINVAL;
1853		return NULL;
1854	}
1855
1856	return vctx->create_cq_ex(context, cq_attr);
1857}
1858
1859/**
1860 * ibv_resize_cq - Modifies the capacity of the CQ.
1861 * @cq: The CQ to resize.
1862 * @cqe: The minimum size of the CQ.
1863 *
1864 * Users can examine the cq structure to determine the actual CQ size.
1865 */
1866int ibv_resize_cq(struct ibv_cq *cq, int cqe);
1867
1868/**
1869 * ibv_destroy_cq - Destroy a completion queue
1870 */
1871int ibv_destroy_cq(struct ibv_cq *cq);
1872
1873/**
1874 * ibv_get_cq_event - Read next CQ event
1875 * @channel: Channel to get next event from.
1876 * @cq: Used to return pointer to CQ.
1877 * @cq_context: Used to return consumer-supplied CQ context.
1878 *
1879 * All completion events returned by ibv_get_cq_event() must
1880 * eventually be acknowledged with ibv_ack_cq_events().
1881 */
1882int ibv_get_cq_event(struct ibv_comp_channel *channel,
1883		     struct ibv_cq **cq, void **cq_context);
1884
1885/**
1886 * ibv_ack_cq_events - Acknowledge CQ completion events
1887 * @cq: CQ to acknowledge events for
1888 * @nevents: Number of events to acknowledge.
1889 *
1890 * All completion events which are returned by ibv_get_cq_event() must
1891 * be acknowledged.  To avoid races, ibv_destroy_cq() will wait for
1892 * all completion events to be acknowledged, so there should be a
1893 * one-to-one correspondence between acks and successful gets.  An
1894 * application may accumulate multiple completion events and
1895 * acknowledge them in a single call to ibv_ack_cq_events() by passing
1896 * the number of events to ack in @nevents.
1897 */
1898void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents);
1899
1900/**
1901 * ibv_poll_cq - Poll a CQ for work completions
1902 * @cq:the CQ being polled
1903 * @num_entries:maximum number of completions to return
1904 * @wc:array of at least @num_entries of &struct ibv_wc where completions
1905 *   will be returned
1906 *
1907 * Poll a CQ for (possibly multiple) completions.  If the return value
1908 * is < 0, an error occurred.  If the return value is >= 0, it is the
1909 * number of completions returned.  If the return value is
1910 * non-negative and strictly less than num_entries, then the CQ was
1911 * emptied.
1912 */
1913static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc)
1914{
1915	return cq->context->ops.poll_cq(cq, num_entries, wc);
1916}
1917
1918/**
1919 * ibv_req_notify_cq - Request completion notification on a CQ.  An
1920 *   event will be added to the completion channel associated with the
1921 *   CQ when an entry is added to the CQ.
1922 * @cq: The completion queue to request notification for.
1923 * @solicited_only: If non-zero, an event will be generated only for
1924 *   the next solicited CQ entry.  If zero, any CQ entry, solicited or
1925 *   not, will generate an event.
1926 */
1927static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only)
1928{
1929	return cq->context->ops.req_notify_cq(cq, solicited_only);
1930}
1931
1932/**
1933 * ibv_create_srq - Creates a SRQ associated with the specified protection
1934 *   domain.
1935 * @pd: The protection domain associated with the SRQ.
1936 * @srq_init_attr: A list of initial attributes required to create the SRQ.
1937 *
1938 * srq_attr->max_wr and srq_attr->max_sge are read the determine the
1939 * requested size of the SRQ, and set to the actual values allocated
1940 * on return.  If ibv_create_srq() succeeds, then max_wr and max_sge
1941 * will always be at least as large as the requested values.
1942 */
1943struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
1944			       struct ibv_srq_init_attr *srq_init_attr);
1945
1946static inline struct ibv_srq *
1947ibv_create_srq_ex(struct ibv_context *context,
1948		  struct ibv_srq_init_attr_ex *srq_init_attr_ex)
1949{
1950	struct verbs_context *vctx;
1951	uint32_t mask = srq_init_attr_ex->comp_mask;
1952
1953	if (!(mask & ~(IBV_SRQ_INIT_ATTR_PD | IBV_SRQ_INIT_ATTR_TYPE)) &&
1954	    (mask & IBV_SRQ_INIT_ATTR_PD) &&
1955	    (!(mask & IBV_SRQ_INIT_ATTR_TYPE) ||
1956	     (srq_init_attr_ex->srq_type == IBV_SRQT_BASIC)))
1957		return ibv_create_srq(srq_init_attr_ex->pd,
1958				      (struct ibv_srq_init_attr *)srq_init_attr_ex);
1959
1960	vctx = verbs_get_ctx_op(context, create_srq_ex);
1961	if (!vctx) {
1962		errno = ENOSYS;
1963		return NULL;
1964	}
1965	return vctx->create_srq_ex(context, srq_init_attr_ex);
1966}
1967
1968/**
1969 * ibv_modify_srq - Modifies the attributes for the specified SRQ.
1970 * @srq: The SRQ to modify.
1971 * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
1972 *   the current values of selected SRQ attributes are returned.
1973 * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
1974 *   are being modified.
1975 *
1976 * The mask may contain IBV_SRQ_MAX_WR to resize the SRQ and/or
1977 * IBV_SRQ_LIMIT to set the SRQ's limit and request notification when
1978 * the number of receives queued drops below the limit.
1979 */
1980int ibv_modify_srq(struct ibv_srq *srq,
1981		   struct ibv_srq_attr *srq_attr,
1982		   int srq_attr_mask);
1983
1984/**
1985 * ibv_query_srq - Returns the attribute list and current values for the
1986 *   specified SRQ.
1987 * @srq: The SRQ to query.
1988 * @srq_attr: The attributes of the specified SRQ.
1989 */
1990int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
1991
1992static inline int ibv_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num)
1993{
1994	struct verbs_context *vctx = verbs_get_ctx_op(srq->context, get_srq_num);
1995
1996	if (!vctx)
1997		return ENOSYS;
1998
1999	return vctx->get_srq_num(srq, srq_num);
2000}
2001
2002/**
2003 * ibv_destroy_srq - Destroys the specified SRQ.
2004 * @srq: The SRQ to destroy.
2005 */
2006int ibv_destroy_srq(struct ibv_srq *srq);
2007
2008/**
2009 * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ.
2010 * @srq: The SRQ to post the work request on.
2011 * @recv_wr: A list of work requests to post on the receive queue.
2012 * @bad_recv_wr: On an immediate failure, this parameter will reference
2013 *   the work request that failed to be posted on the QP.
2014 */
2015static inline int ibv_post_srq_recv(struct ibv_srq *srq,
2016				    struct ibv_recv_wr *recv_wr,
2017				    struct ibv_recv_wr **bad_recv_wr)
2018{
2019	return srq->context->ops.post_srq_recv(srq, recv_wr, bad_recv_wr);
2020}
2021
2022/**
2023 * ibv_create_qp - Create a queue pair.
2024 */
2025struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
2026			     struct ibv_qp_init_attr *qp_init_attr);
2027
2028static inline struct ibv_qp *
2029ibv_create_qp_ex(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_init_attr_ex)
2030{
2031	struct verbs_context *vctx;
2032	uint32_t mask = qp_init_attr_ex->comp_mask;
2033
2034	if (mask == IBV_QP_INIT_ATTR_PD)
2035		return ibv_create_qp(qp_init_attr_ex->pd,
2036				     (struct ibv_qp_init_attr *)qp_init_attr_ex);
2037
2038	vctx = verbs_get_ctx_op(context, create_qp_ex);
2039	if (!vctx) {
2040		errno = ENOSYS;
2041		return NULL;
2042	}
2043	return vctx->create_qp_ex(context, qp_init_attr_ex);
2044}
2045
2046/**
2047 * ibv_query_rt_values_ex - Get current real time @values of a device.
2048 * @values - in/out - defines the attributes we need to query/queried.
2049 * (Or's bits of enum ibv_values_mask on values->comp_mask field)
2050 */
2051static inline int
2052ibv_query_rt_values_ex(struct ibv_context *context,
2053		       struct ibv_values_ex *values)
2054{
2055	struct verbs_context *vctx;
2056
2057	vctx = verbs_get_ctx_op(context, query_rt_values);
2058	if (!vctx)
2059		return ENOSYS;
2060
2061	if (values->comp_mask & ~(IBV_VALUES_MASK_RESERVED - 1))
2062		return EINVAL;
2063
2064	return vctx->query_rt_values(context, values);
2065}
2066
2067/**
2068 * ibv_query_device_ex - Get extended device properties
2069 */
2070static inline int
2071ibv_query_device_ex(struct ibv_context *context,
2072		    const struct ibv_query_device_ex_input *input,
2073		    struct ibv_device_attr_ex *attr)
2074{
2075	struct verbs_context *vctx;
2076	int ret;
2077
2078	vctx = verbs_get_ctx_op(context, query_device_ex);
2079	if (!vctx)
2080		goto legacy;
2081
2082	ret = vctx->query_device_ex(context, input, attr, sizeof(*attr));
2083	if (ret == ENOSYS)
2084		goto legacy;
2085
2086	return ret;
2087
2088legacy:
2089	memset(attr, 0, sizeof(*attr));
2090	ret = ibv_query_device(context, &attr->orig_attr);
2091
2092	return ret;
2093}
2094
2095/**
2096 * ibv_open_qp - Open a shareable queue pair.
2097 */
2098static inline struct ibv_qp *
2099ibv_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *qp_open_attr)
2100{
2101	struct verbs_context *vctx = verbs_get_ctx_op(context, open_qp);
2102	if (!vctx) {
2103		errno = ENOSYS;
2104		return NULL;
2105	}
2106	return vctx->open_qp(context, qp_open_attr);
2107}
2108
2109/**
2110 * ibv_modify_qp - Modify a queue pair.
2111 */
2112int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
2113		  int attr_mask);
2114
2115/**
2116 * ibv_query_qp - Returns the attribute list and current values for the
2117 *   specified QP.
2118 * @qp: The QP to query.
2119 * @attr: The attributes of the specified QP.
2120 * @attr_mask: A bit-mask used to select specific attributes to query.
2121 * @init_attr: Additional attributes of the selected QP.
2122 *
2123 * The qp_attr_mask may be used to limit the query to gathering only the
2124 * selected attributes.
2125 */
2126int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
2127		 int attr_mask,
2128		 struct ibv_qp_init_attr *init_attr);
2129
2130/**
2131 * ibv_destroy_qp - Destroy a queue pair.
2132 */
2133int ibv_destroy_qp(struct ibv_qp *qp);
2134
2135/*
2136 * ibv_create_wq - Creates a WQ associated with the specified protection
2137 * domain.
2138 * @context: ibv_context.
2139 * @wq_init_attr: A list of initial attributes required to create the
2140 * WQ. If WQ creation succeeds, then the attributes are updated to
2141 * the actual capabilities of the created WQ.
2142 *
2143 * wq_init_attr->max_wr and wq_init_attr->max_sge determine
2144 * the requested size of the WQ, and set to the actual values allocated
2145 * on return.
2146 * If ibv_create_wq() succeeds, then max_wr and max_sge will always be
2147 * at least as large as the requested values.
2148 *
2149 * Return Value
2150 * ibv_create_wq() returns a pointer to the created WQ, or NULL if the request
2151 * fails.
2152 */
2153static inline struct ibv_wq *ibv_create_wq(struct ibv_context *context,
2154					   struct ibv_wq_init_attr *wq_init_attr)
2155{
2156	struct verbs_context *vctx = verbs_get_ctx_op(context, create_wq);
2157	struct ibv_wq *wq;
2158
2159	if (!vctx) {
2160		errno = ENOSYS;
2161		return NULL;
2162	}
2163
2164	wq = vctx->create_wq(context, wq_init_attr);
2165	if (wq) {
2166		wq->events_completed = 0;
2167		pthread_mutex_init(&wq->mutex, NULL);
2168		pthread_cond_init(&wq->cond, NULL);
2169	}
2170
2171	return wq;
2172}
2173
2174/*
2175 * ibv_modify_wq - Modifies the attributes for the specified WQ.
2176 * @wq: The WQ to modify.
2177 * @wq_attr: On input, specifies the WQ attributes to modify.
2178 *    wq_attr->attr_mask: A bit-mask used to specify which attributes of the WQ
2179 *    are being modified.
2180 * On output, the current values of selected WQ attributes are returned.
2181 *
2182 * Return Value
2183 * ibv_modify_wq() returns 0 on success, or the value of errno
2184 * on failure (which indicates the failure reason).
2185 *
2186*/
2187static inline int ibv_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr)
2188{
2189	struct verbs_context *vctx = verbs_get_ctx_op(wq->context, modify_wq);
2190
2191	if (!vctx)
2192		return ENOSYS;
2193
2194	return vctx->modify_wq(wq, wq_attr);
2195}
2196
2197/*
2198 * ibv_destroy_wq - Destroys the specified WQ.
2199 * @ibv_wq: The WQ to destroy.
2200 * Return Value
2201 * ibv_destroy_wq() returns 0 on success, or the value of errno
2202 * on failure (which indicates the failure reason).
2203*/
2204static inline int ibv_destroy_wq(struct ibv_wq *wq)
2205{
2206	struct verbs_context *vctx;
2207
2208	vctx = verbs_get_ctx_op(wq->context, destroy_wq);
2209	if (!vctx)
2210		return ENOSYS;
2211
2212	return vctx->destroy_wq(wq);
2213}
2214
2215/*
2216 * ibv_create_rwq_ind_table - Creates a receive work queue Indirection Table
2217 * @context: ibv_context.
2218 * @init_attr: A list of initial attributes required to create the Indirection Table.
2219 * Return Value
2220 * ibv_create_rwq_ind_table returns a pointer to the created
2221 * Indirection Table, or NULL if the request fails.
2222 */
2223static inline struct ibv_rwq_ind_table *ibv_create_rwq_ind_table(struct ibv_context *context,
2224								 struct ibv_rwq_ind_table_init_attr *init_attr)
2225{
2226	struct verbs_context *vctx;
2227
2228	vctx = verbs_get_ctx_op(context, create_rwq_ind_table);
2229	if (!vctx) {
2230		errno = ENOSYS;
2231		return NULL;
2232	}
2233
2234	return vctx->create_rwq_ind_table(context, init_attr);
2235}
2236
2237/*
2238 * ibv_destroy_rwq_ind_table - Destroys the specified Indirection Table.
2239 * @rwq_ind_table: The Indirection Table to destroy.
2240 * Return Value
2241 * ibv_destroy_rwq_ind_table() returns 0 on success, or the value of errno
2242 * on failure (which indicates the failure reason).
2243*/
2244static inline int ibv_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2245{
2246	struct verbs_context *vctx;
2247
2248	vctx = verbs_get_ctx_op(rwq_ind_table->context, destroy_rwq_ind_table);
2249	if (!vctx)
2250		return ENOSYS;
2251
2252	return vctx->destroy_rwq_ind_table(rwq_ind_table);
2253}
2254
2255/**
2256 * ibv_post_send - Post a list of work requests to a send queue.
2257 *
2258 * If IBV_SEND_INLINE flag is set, the data buffers can be reused
2259 * immediately after the call returns.
2260 */
2261static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
2262				struct ibv_send_wr **bad_wr)
2263{
2264	return qp->context->ops.post_send(qp, wr, bad_wr);
2265}
2266
2267/**
2268 * ibv_post_recv - Post a list of work requests to a receive queue.
2269 */
2270static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr,
2271				struct ibv_recv_wr **bad_wr)
2272{
2273	return qp->context->ops.post_recv(qp, wr, bad_wr);
2274}
2275
2276/**
2277 * ibv_create_ah - Create an address handle.
2278 */
2279struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
2280
2281/**
2282 * ibv_init_ah_from_wc - Initializes address handle attributes from a
2283 *   work completion.
2284 * @context: Device context on which the received message arrived.
2285 * @port_num: Port on which the received message arrived.
2286 * @wc: Work completion associated with the received message.
2287 * @grh: References the received global route header.  This parameter is
2288 *   ignored unless the work completion indicates that the GRH is valid.
2289 * @ah_attr: Returned attributes that can be used when creating an address
2290 *   handle for replying to the message.
2291 */
2292int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num,
2293			struct ibv_wc *wc, struct ibv_grh *grh,
2294			struct ibv_ah_attr *ah_attr);
2295
2296/**
2297 * ibv_create_ah_from_wc - Creates an address handle associated with the
2298 *   sender of the specified work completion.
2299 * @pd: The protection domain associated with the address handle.
2300 * @wc: Work completion information associated with a received message.
2301 * @grh: References the received global route header.  This parameter is
2302 *   ignored unless the work completion indicates that the GRH is valid.
2303 * @port_num: The outbound port number to associate with the address.
2304 *
2305 * The address handle is used to reference a local or global destination
2306 * in all UD QP post sends.
2307 */
2308struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc,
2309				     struct ibv_grh *grh, uint8_t port_num);
2310
2311/**
2312 * ibv_destroy_ah - Destroy an address handle.
2313 */
2314int ibv_destroy_ah(struct ibv_ah *ah);
2315
2316/**
2317 * ibv_attach_mcast - Attaches the specified QP to a multicast group.
2318 * @qp: QP to attach to the multicast group.  The QP must be a UD QP.
2319 * @gid: Multicast group GID.
2320 * @lid: Multicast group LID in host byte order.
2321 *
2322 * In order to route multicast packets correctly, subnet
2323 * administration must have created the multicast group and configured
2324 * the fabric appropriately.  The port associated with the specified
2325 * QP must also be a member of the multicast group.
2326 */
2327int ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
2328
2329/**
2330 * ibv_detach_mcast - Detaches the specified QP from a multicast group.
2331 * @qp: QP to detach from the multicast group.
2332 * @gid: Multicast group GID.
2333 * @lid: Multicast group LID in host byte order.
2334 */
2335int ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
2336
2337/**
2338 * ibv_fork_init - Prepare data structures so that fork() may be used
2339 * safely.  If this function is not called or returns a non-zero
2340 * status, then libibverbs data structures are not fork()-safe and the
2341 * effect of an application calling fork() is undefined.
2342 */
2343int ibv_fork_init(void);
2344
2345/**
2346 * ibv_node_type_str - Return string describing node_type enum value
2347 */
2348const char *ibv_node_type_str(enum ibv_node_type node_type);
2349
2350/**
2351 * ibv_port_state_str - Return string describing port_state enum value
2352 */
2353const char *ibv_port_state_str(enum ibv_port_state port_state);
2354
2355/**
2356 * ibv_event_type_str - Return string describing event_type enum value
2357 */
2358const char *ibv_event_type_str(enum ibv_event_type event);
2359
2360#define ETHERNET_LL_SIZE 6
2361int ibv_resolve_eth_l2_from_gid(struct ibv_context *context,
2362				struct ibv_ah_attr *attr,
2363				uint8_t eth_mac[ETHERNET_LL_SIZE],
2364				uint16_t *vid);
2365
2366static inline int ibv_is_qpt_supported(uint32_t caps, enum ibv_qp_type qpt)
2367{
2368	return !!(caps & (1 << qpt));
2369}
2370
2371END_C_DECLS
2372
2373#  undef __attribute_const
2374
2375
2376#endif /* INFINIBAND_VERBS_H */
2377