1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_IB_MGT_IBMF_IBMF_IMPL_H
27#define	_SYS_IB_MGT_IBMF_IBMF_IMPL_H
28
29
30/*
31 * This file contains the IBMF implementation dependent structures and defines.
32 */
33
34#ifdef __cplusplus
35extern "C" {
36#endif
37
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/modctl.h>
41#include <sys/kmem.h>
42#include <sys/ksynch.h>
43#include <sys/taskq.h>
44#include <sys/sunddi.h>
45#include <sys/disp.h>
46#include <sys/ib/ibtl/ibvti.h>
47#include <sys/ib/mgt/ibmf/ibmf.h>
48#include <sys/ib/mgt/ibmf/ibmf_rmpp.h>
49#include <sys/ib/mgt/ibmf/ibmf_kstat.h>
50#include <sys/ib/mgt/ibmf/ibmf_trace.h>
51
52#define	IBMF_MEM_PER_WQE		(IBMF_MAD_SIZE + sizeof (ib_grh_t))
53#define	IBMF_MAX_SQ_WRE			64
54#define	IBMF_MAX_RQ_WRE			64
55#define	IBMF_MAX_POSTED_RQ_PER_QP	512
56#define	IBMF_MAX_POSTED_SQ_PER_QP	512
57#define	IBMF_MAX_SQ_WR_SGL_ELEMENTS	1
58#define	IBMF_MAX_RQ_WR_SGL_ELEMENTS	1
59#define	IBMF_MGMT_Q_KEY			0x80010000
60#define	IBMF_P_KEY_DEF_FULL		0xFFFF
61#define	IBMF_P_KEY_DEF_LIMITED		0x7FFF
62#define	IBMF_P_KEY_BASE_MASK		0x7FFF
63#define	IBMF_PKEY_MEMBERSHIP_MASK	0x8000
64
65#define	IBMF_TASKQ_1THREAD		1
66#define	IBMF_TASKQ_NTHREADS		128
67
68/*
69 * Work request ID format used for receive requests.
70 *
71 *  bit 0 set to 1
72 */
73#define	IBMF_RCV_CQE			0x1
74
75/*
76 * Convenience macro used in the RMPP protocol to obtain R_Method field
77 * of MAD header with Response bit flipped.
78 */
79#define	IBMF_FLIP_RESP_BIT(r_method)					\
80	(((r_method & 0x80) ^ 0x80) | (r_method & 0x7F))
81
82/* Work Request ID macros */
83#define	IBMF_IS_RECV_WR_ID(id)				\
84	(((uint64_t)(id) & IBMF_RCV_CQE) ? B_TRUE : B_FALSE)
85#define	IBMF_IS_SEND_WR_ID(id)				\
86	(!(IBMF_IS_RECV_WR_ID((id))))
87
88/* Decrement IBMF message reference count */
89#define	IBMF_MSG_DECR_REFCNT(msg)			{	\
90	ASSERT(MUTEX_HELD(&(msg)->im_mutex));			\
91	(msg)->im_ref_count--;					\
92}
93
94/* Increment IBMF message reference count */
95#define	IBMF_MSG_INCR_REFCNT(msg)				\
96	(msg)->im_ref_count++;
97
98/* Callback setup/cleanup macros */
99#define	IBMF_RECV_CB_SETUP(clp)				{	\
100	ASSERT(MUTEX_HELD(&(clp)->ic_mutex));			\
101	(clp)->ic_flags |= IBMF_CLIENT_RECV_CB_ACTIVE;		\
102	(clp)->ic_recvs_active++;				\
103	mutex_enter(&(clp)->ic_kstat_mutex);			\
104	IBMF_ADD32_KSTATS((clp), recvs_active, 1);		\
105	mutex_exit(&(clp)->ic_kstat_mutex);			\
106}
107
108#define	IBMF_RECV_CB_CLEANUP(clp)			{		\
109	ASSERT(MUTEX_HELD(&(clp)->ic_mutex));				\
110	(clp)->ic_recvs_active--;					\
111	mutex_enter(&(clp)->ic_kstat_mutex);				\
112	IBMF_SUB32_KSTATS((clp), recvs_active, 1);			\
113	mutex_exit(&(clp)->ic_kstat_mutex);				\
114	if ((clp)->ic_recvs_active == 0)				\
115		(clp)->ic_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE;		\
116	if ((((clp)->ic_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) &&	\
117	    (((clp)->ic_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0))	\
118		cv_signal(&(clp)->ic_recv_cb_teardown_cv);		\
119}
120
121#define	IBMF_ALT_RECV_CB_SETUP(altqp)			{		\
122	ASSERT(MUTEX_HELD(&(altqp)->isq_mutex));			\
123	(altqp)->isq_flags |= IBMF_CLIENT_RECV_CB_ACTIVE;		\
124	(altqp)->isq_recvs_active++;					\
125	mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
126	IBMF_ADD32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1);	\
127	mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
128}
129
130#define	IBMF_ALT_RECV_CB_CLEANUP(altqp)			{		\
131	ASSERT(MUTEX_HELD(&(altqp)->isq_mutex));			\
132	(altqp)->isq_recvs_active--;					\
133	mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
134	IBMF_SUB32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1);	\
135	mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
136	if ((altqp)->isq_recvs_active == 0)				\
137		(altqp)->isq_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE;	\
138	if ((((altqp)->isq_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) &&	\
139	    (((altqp)->isq_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0))	\
140		cv_signal(&(altqp)->isq_recv_cb_teardown_cv);		\
141}
142
143/* warlock annotations for ibmf.h and ibmf_msg.h structures */
144_NOTE(READ_ONLY_DATA(_ibmf_msg::im_msgbufs_send.im_bufs_cl_data
145	_ibmf_msg::im_msgbufs_send.im_bufs_cl_data_len
146	_ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr
147	_ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr_len
148	_ibmf_msg::im_msgbufs_send.im_bufs_mad_hdr
149	_ib_mad_hdr_t))
150
151/*
152 * WQE pool management contexts
153 */
154typedef struct _ibmf_wqe_mgt {
155	struct _ibmf_wqe_mgt	*wqe_mgt_next; /* next wqe management entry */
156	void			*wqes_kmem;	/* kmem allocated for WQEs */
157	uint64_t		wqes_kmem_sz; /* sizeof WQE kmem allocated */
158	ib_vaddr_t		wqes_ib_mem;	/* Registered memory */
159	ibt_lkey_t		wqes_ib_lkey;	/* Lkey that goes with it */
160	ibt_mr_hdl_t		wqes_ib_mem_hdl; /* IB mem handle */
161	kmutex_t		wqes_mutex;	/* WQE mgt context mutex */
162} ibmf_wqe_mgt_t;
163_NOTE(MUTEX_PROTECTS_DATA(ibmf_wqe_mgt_t::wqes_mutex,
164    ibmf_wqe_mgt_t::wqes_kmem
165    ibmf_wqe_mgt_t::wqes_kmem_sz
166    ibmf_wqe_mgt_t::wqes_ib_mem
167    ibmf_wqe_mgt_t::wqes_ib_lkey
168    ibmf_wqe_mgt_t::wqes_ib_mem_hdl))
169
170/*
171 * structure used to keep track of qp handles
172 */
173typedef struct _ibmf_qp_t {
174	struct _ibmf_qp_t	*iq_next;	/* next in the list */
175	ibt_qp_hdl_t		iq_qp_handle;	/* qp handle from IB xport */
176	int			iq_port_num;	/* port num for this qp */
177	int			iq_qp_num;	/* qp num */
178	int			iq_qp_ref;	/* no. of clients using this */
179	uint_t			iq_flags;	/* for implementing state m/c */
180	uint_t			iq_rwqes_posted; /* posted receive wqes */
181	kmutex_t		iq_mutex;	/* mutex for some fields */
182} ibmf_qp_t;
183_NOTE(READ_ONLY_DATA(ibmf_qp_t::iq_port_num ibmf_qp_t::iq_qp_handle))
184_NOTE(MUTEX_PROTECTS_DATA(ibmf_qp_t::iq_mutex,
185    ibmf_qp_t::iq_rwqes_posted))
186
187/* defines for iq_flags */
188#define	IBMF_QP_FLAGS_INVALID				0x0001
189#define	IBMF_QP_FLAGS_INITING				0x0002
190#define	IBMF_QP_FLAGS_INITED				0x0004
191#define	IBMF_QP_FLAGS_UNINITING				0x0008
192
193/*
194 * structure used to keep track of qp handles for qps other than
195 * the special qps
196 */
197typedef struct _ibmf_alt_qp_t {
198	struct _ibmf_alt_qp_t	*isq_next;	/* next qp ctx on list */
199	ibt_qp_hdl_t		isq_qp_handle;	/* qp handle from IB xport */
200	ibt_chan_sizes_t	isq_qp_sizes;	/* qp sizes returned by alloc */
201	struct _ibmf_client	*isq_client_hdl; /* associated client handle */
202	ibmf_msg_cb_t		isq_recv_cb;	/* recv callback for this qp */
203	void			*isq_recv_cb_arg; /* arg for recv cb */
204	kcondvar_t		isq_recv_cb_teardown_cv; /* wait on teardown */
205	kmutex_t		isq_mutex;		/* qp context mutex */
206	int			isq_flags;	/* to keep track of state */
207	int			isq_sends_active; /* outstanding sends */
208	int			isq_recvs_active; /* outstanding recvs */
209	ib_qpn_t		isq_qpn;	/* qp number */
210	ib_pkey_t		isq_pkey;	/* qp's partition key */
211	ib_qkey_t		isq_qkey;	/* qp's queue keye */
212	int			isq_port_num;	/* port num for this qp */
213	boolean_t		isq_supports_rmpp; /* qp supports rmpp */
214	kcondvar_t		isq_sqd_cv; 	/* wait on SQD event */
215	int			isq_wqes_alloced; /* wqes allocated for QP */
216	kcondvar_t		isq_wqes_cv; 	/* wait on wqes destruction */
217	uint_t			isq_rwqes_posted; /* posted receive wqes */
218
219	/* Manage Send/Receive WQEs for Special QPs */
220	struct kmem_cache	*isq_send_wqes_cache; /* Send WQE cache */
221	struct kmem_cache	*isq_recv_wqes_cache; /* Receive WQE cache */
222	vmem_t			*isq_wqe_ib_vmem; /* IB virtual address arena */
223	kmutex_t		isq_wqe_mutex;	/* WQE management list mutex */
224	ibmf_wqe_mgt_t		*isq_wqe_mgt_list; /* WQE management list */
225} ibmf_alt_qp_t;
226_NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_mutex,
227    ibmf_alt_qp_t::isq_sends_active
228    ibmf_alt_qp_t::isq_recvs_active
229    ibmf_alt_qp_t::isq_pkey
230    ibmf_alt_qp_t::isq_qkey
231    ibmf_alt_qp_t::isq_recv_cb
232    ibmf_alt_qp_t::isq_recv_cb_arg
233    ibmf_alt_qp_t::isq_flags
234    ibmf_alt_qp_t::isq_rwqes_posted))
235_NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_wqe_mutex,
236    ibmf_alt_qp_t::isq_wqe_mgt_list))
237_NOTE(READ_ONLY_DATA(ibmf_alt_qp_t::isq_port_num))
238
239#define	IBMF_MSG_FLAGS_QUEUED		0x00001000	/* in the ib xport */
240#define	IBMF_MSG_FLAGS_DONE		0x00002000	/* xport done */
241#define	IBMF_MSG_FLAGS_BLOCKING		0x00004000	/* sync command */
242
243/*
244 * This structure is used to keep track of IBT returned ibt_ud_dest_t
245 * structures.
246 */
247typedef struct ibmf_ud_dest_s {
248	ibt_ud_dest_t		ud_dest;
249	struct ibmf_ud_dest_s	*ud_next;
250} ibmf_ud_dest_t;
251
252/*
253 * ibmf_msg_impl definition
254 *	The IBMF client initializes various members of the msg while sending
255 *	the message. IBMF fills in the various members of the msg when a message
256 *	is received.
257 */
258typedef struct _ibmf_msg_impl {
259	ibmf_addr_info_t	im_local_addr;	/* local addressing info */
260	ibmf_global_addr_info_t	im_global_addr;	/* global addressing info */
261	int32_t			im_msg_status;	/* completion status */
262	uint32_t		im_msg_flags;	/* flags */
263	size_t			im_msg_sz_limit; /* max. message size */
264	ibmf_msg_bufs_t		im_msgbufs_send; /* input data to ibmf */
265	ibmf_msg_bufs_t		im_msgbufs_recv; /* output data from ibmf */
266	struct _ibmf_msg_impl	*im_msg_next;	/* next message on the list */
267	struct _ibmf_msg_impl	*im_msg_prev;	/* prev message on the list */
268	void			*im_client;	/* client that allocd the pkt */
269	ibmf_qp_handle_t	im_qp_hdl;	/* qp handle */
270	ibt_ud_dest_t		*im_ud_dest;	/* ptr to the pkt's ud_dest */
271	ibmf_ud_dest_t		*im_ibmf_ud_dest; /* ptr to the pkt's ud_dest */
272	ibmf_msg_cb_t		im_trans_cb;	/* transaction completion cb */
273	void			*im_trans_cb_arg; /* arg for completion cb */
274	uint64_t		im_tid;		/* transaction ID */
275	uint8_t			im_mgt_class; 	/* management class */
276	kmutex_t		im_mutex;	/* protects trans context */
277	uint32_t		im_state;	/* message state */
278	uint32_t		im_transp_op_flags; /* transaction operation */
279	uint32_t		im_flags;	/* message flags */
280	uint32_t		im_trans_state_flags;	/* state flags */
281	kcondvar_t		im_trans_cv;	/* wait for op completion */
282	ibmf_rmpp_ctx_t		im_rmpp_ctx; 	/* RMPP context */
283	ibmf_retrans_t		im_retrans;	/* retransmission info */
284	timeout_id_t		im_rp_timeout_id; /* response timeout ID */
285	timeout_id_t		im_tr_timeout_id; /* transaction timeout ID */
286	timeout_id_t		im_rp_unset_timeout_id; /* id for untimeout() */
287	timeout_id_t		im_tr_unset_timeout_id; /* id for untimeout() */
288	int			im_ref_count;	/* reference count */
289	boolean_t		im_unsolicited; /* msg was unsolicited recv */
290	int			im_pending_send_compls; /* send completions */
291} ibmf_msg_impl_t;
292_NOTE(READ_ONLY_DATA(ibmf_msg_impl_t::im_trans_cb
293    ibmf_msg_impl_t::im_trans_cb_arg
294    ibmf_msg_impl_t::im_transp_op_flags
295    ibmf_msg_impl_t::im_local_addr
296    ibmf_msg_impl_t::im_unsolicited
297    ibmf_msg_impl_t::im_client))
298_NOTE(MUTEX_PROTECTS_DATA(ibmf_msg_impl_t::im_mutex,
299    ibmf_msg_impl_t::im_flags
300    ibmf_msg_impl_t::im_trans_state_flags
301    ibmf_msg_impl_t::im_msgbufs_recv
302    ibmf_msg_impl_t::im_msg_status
303    ibmf_msg_impl_t::im_rmpp_ctx))
304
305/* im_flags */
306#define	IBMF_MSG_FLAGS_SEQUENCED	0x1
307#define	IBMF_MSG_FLAGS_SEND_RMPP	0x2
308#define	IBMF_MSG_FLAGS_RECV_RMPP	0x4
309#define	IBMF_MSG_FLAGS_NOT_RMPP		0x8
310#define	IBMF_MSG_FLAGS_BUSY		0x10
311#define	IBMF_MSG_FLAGS_FREE		0x20
312#define	IBMF_MSG_FLAGS_ON_LIST		0x40
313#define	IBMF_MSG_FLAGS_SET_TERMINATION	0x80
314#define	IBMF_MSG_FLAGS_TERMINATION	0x100
315
316/* retransmission parameter defaults for im_retrans field */
317#define	IBMF_RETRANS_DEF_RTV		4000000		/* 4 seconds */
318#define	IBMF_RETRANS_DEF_RTTV		100000		/* 100 milliseconds */
319#define	IBMF_RETRANS_DEF_TRANS_TO	40000000	/* 40 seconds */
320#define	IBMF_RETRANS_DEF_RETRIES	0
321
322/*
323 * Transaction state flags (im_trans_state_flags) definitions
324 * Don't use 0x0 as a flag value since clients OR and AND the flags
325 */
326#define	IBMF_TRANS_STATE_FLAG_UNINIT		0x1
327#define	IBMF_TRANS_STATE_FLAG_INIT		0x2
328#define	IBMF_TRANS_STATE_FLAG_WAIT		0x4
329#define	IBMF_TRANS_STATE_FLAG_DONE		0x8
330#define	IBMF_TRANS_STATE_FLAG_SIGNALED		0x10
331#define	IBMF_TRANS_STATE_FLAG_TIMEOUT		0x20
332#define	IBMF_TRANS_STATE_FLAG_RECV_ACTIVE	0x40
333#define	IBMF_TRANS_STATE_FLAG_RECV_DONE		0x80
334#define	IBMF_TRANS_STATE_FLAG_SEND_DONE		0x100
335
336/* Timer types */
337typedef	enum _ibmf_timer_t {
338	IBMF_RESP_TIMER			= 1,
339	IBMF_TRANS_TIMER		= 2
340} ibmf_timer_t;
341
342/*
343 * structure to hold specific client info taken from ibmf_register_info_t
344 * since we can register for more than one client at a time, but each specific
345 * ibmf_client_t only holds one client itself.
346 */
347typedef struct _ibmf_client_info {
348	ib_guid_t		ci_guid;
349	uint_t			port_num;
350	ibmf_client_type_t	client_class;
351} ibmf_client_info_t;
352
353/*
354 * Defines for the client type (agent/manager/agent+manager)
355 * Bits 16-19 of the client_class specify the client type.
356 */
357#define	IBMF_AGENT_ID			0x00010000
358#define	IBMF_MANAGER_ID			0x00020000
359#define	IBMF_AGENT_MANAGER_ID		0x00030000
360
361/*
362 * structure used to keep track of clients
363 */
364typedef struct _ibmf_client {
365	void			*ic_client_sig;	/* set for valid handles */
366	struct _ibmf_ci		*ic_myci;	/* pointer to CI */
367	struct _ibmf_client	*ic_next;	/* next client on list */
368	struct _ibmf_client	*ic_prev;	/* previous client on list */
369
370	taskq_t			*ic_send_taskq;	/* taskq for send cb */
371	taskq_t			*ic_recv_taskq;	/* taskq for receive cb */
372	uint_t			ic_init_state_class; /* taskq initialization */
373
374	ibmf_msg_impl_t		*ic_msg_list; /* protected by ic_mutex */
375	ibmf_msg_impl_t		*ic_msg_last; /* last message on list */
376	ibmf_msg_impl_t		*ic_term_msg_list; /* termination loop mesgs */
377	ibmf_msg_impl_t		*ic_term_msg_last; /* last message on list */
378	kmutex_t		ic_msg_mutex; /* protect the message list */
379
380	/* IBTL asynchronous event callback (eg. HCA offline) */
381	ibmf_async_event_cb_t	ic_async_cb; /* async/unsolicited handling */
382	void			*ic_async_cb_arg; /* args for async cb */
383
384	/* Asynchronous/Unsolicited message handler */
385	ibmf_msg_cb_t		ic_recv_cb;
386	void			*ic_recv_cb_arg;
387	kcondvar_t		ic_recv_cb_teardown_cv; /* wait on teardown */
388
389	ibmf_client_info_t	ic_client_info; /* client registration info */
390	ibmf_qp_t		*ic_qp;		/* special qp context */
391	ibt_hca_hdl_t		ic_ci_handle;	/* == ic_myci->ic_ci_handle */
392	kmutex_t		ic_mutex;	/* prot the client struct */
393	int			ic_flags;	/* to keep track of state */
394	int			ic_reg_flags;	/* flags specified during */
395						/* registration */
396
397	/* Statistics */
398	int			ic_msgs_alloced; /* no. msgs alloced by/for */
399	int			ic_msgs_active; /* no. msgs active */
400	int			ic_trans_active; /* outstanding transacts  */
401	int			ic_sends_active; /* outstanding sends */
402	int			ic_recvs_active; /* outstanding recvs */
403
404	ib_lid_t		ic_base_lid;	/* used to calculate pathbits */
405	kmutex_t		ic_kstat_mutex;	/* protect the kstat */
406	struct kstat		*ic_kstatp;	/* kstats for client */
407} ibmf_client_t;
408_NOTE(READ_ONLY_DATA(ibmf_client_t::ic_ci_handle
409    ibmf_client_t::ic_client_info
410    ibmf_client_t::ic_client_sig))
411_NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_msg_mutex,
412    ibmf_client_t::ic_msg_list
413    ibmf_client_t::ic_msg_last
414    ibmf_client_t::ic_term_msg_list
415    ibmf_client_t::ic_term_msg_last))
416_NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_mutex,
417    ibmf_client_t::ic_msgs_alloced
418    ibmf_client_t::ic_flags
419    ibmf_client_t::ic_recv_cb
420    ibmf_client_t::ic_recv_cb_arg))
421_NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_kstat_mutex,
422    ibmf_client_t::ic_kstatp))
423
424#define	IBMF_CLIENT_RECV_CB_ACTIVE		0x00000001 /* rcv CB active */
425#define	IBMF_CLIENT_SEND_CB_ACTIVE		0x00000010 /* send CB active */
426#define	IBMF_CLIENT_TEAR_DOWN_CB		0x00000100 /* client wants to */
427							    /* remove recv_cb */
428
429/* IBMF_MAD_ONLY is used by the alternate QP context only (isq_flags) */
430#define	IBMF_MAD_ONLY				0x00002000
431#define	IBMF_RAW_ONLY				0x00004000
432
433#define	IBMF_REG_MSG_LIST	0
434#define	IBMF_TERM_MSG_LIST	1
435
436/*
437 * Send WQE context
438 */
439typedef struct _ibmf_send_wqe {
440	struct _ibmf_send_wqe	*send_wqe_next;
441	ibt_send_wr_t		send_wr;	/* IBT send work request */
442	ibmf_client_t		*send_client;	/* client that sent this */
443	void			*send_mem;	/* memory used in send */
444	ib_vaddr_t		send_sg_mem;	/* registered memory */
445	ibt_lkey_t		send_sg_lkey;	/* Lkey that goes with it */
446	ibt_mr_hdl_t		send_mem_hdl;	/* == ci_send_mr_handle in ci */
447	uint_t			send_wqe_flags;
448	uchar_t			send_port_num;	/* port this is posted to */
449	ibt_qp_hdl_t		send_qp_handle;	/* qp handle for this wqe */
450	ibmf_qp_handle_t	send_ibmf_qp_handle; /* ibmf qp handle */
451	ibmf_msg_impl_t		*send_msg;	/* message context */
452	uint32_t		send_status;	/* completion status */
453	uint32_t		send_rmpp_segment; /* rmpp segment */
454} ibmf_send_wqe_t;
455
456/*
457 * Receive WQE context
458 */
459typedef struct _ibmf_recv_wqe {
460	struct _ibmf_recv_wqe	*recv_wqe_next;
461	ibt_recv_wr_t		recv_wr;
462	ibmf_client_t		*recv_client;	/* client that received this */
463	void			*recv_mem;	/* memory used in WQEs */
464	ibmf_qp_t		*recv_qpp;	/* qp this is posted */
465	ibt_wc_t		recv_wc;	/* corresponding  cqe */
466	ib_vaddr_t		recv_sg_mem;	/* registered mem */
467	ibt_lkey_t		recv_sg_lkey;	/* Lkey that goes with it */
468	ibt_mr_hdl_t		recv_mem_hdl;	/* == ci_recv_mr_handle in ci */
469	uint_t			recv_wqe_flags;
470	uchar_t			recv_port_num;	/* port this is posted to */
471	ibt_qp_hdl_t		recv_qp_handle;	/* ibt qp handle for this wqe */
472	ibmf_qp_handle_t	recv_ibmf_qp_handle; /* ibmf qp handle */
473	ibmf_msg_impl_t		*recv_msg;	/* message context */
474} ibmf_recv_wqe_t;
475
476#define	IBMF_RECV_WQE_FREE		0x00000001	/* WQE is free */
477
478/*
479 * Struct that keeps track of the underlying IB channel interface. There
480 * is one per CI. Each clients on a given ci gets a reference to the CI.
481 * References are tracked used ci_ref field; when ci_ref drops to 0, the
482 * structure can be freed.
483 */
484typedef struct _ibmf_ci {
485	struct _ibmf_ci		*ci_next;
486	kmutex_t		ci_mutex;	/* protects the CI struct */
487	ibmf_client_t		*ci_clients;	/* list of clients;head */
488	ibmf_client_t		*ci_clients_last; /* tail */
489	kmutex_t		ci_clients_mutex; /* protect the client list */
490	ib_guid_t		ci_node_guid;	/* node GUID */
491	ibt_hca_hdl_t		ci_ci_handle;	/* HCA handle */
492	ibt_pd_hdl_t		ci_pd;		/* protection domain */
493	ibmf_qp_t		*ci_qp_list;	/* sp. QP list for all ports */
494	ibmf_qp_t		*ci_qp_list_tail;
495	kcondvar_t		ci_qp_cv;	/* wait for QP valid state */
496	ibt_cq_hdl_t		ci_cq_handle;	/* CQ handle for sp. QPs */
497	ibt_cq_hdl_t		ci_alt_cq_handle; /* CQ handle for alt. QPs */
498	ibmf_alt_qp_t		*ci_alt_qp_list; /* alternate QP list */
499
500	/* UD destination resources */
501	uint32_t		ci_ud_dest_list_count; /* resources in pool */
502	kmutex_t		ci_ud_dest_list_mutex; /* UD dest list mutex */
503	ibmf_ud_dest_t		*ci_ud_dest_list_head; /* start of list */
504
505	/* Send/Receive WQEs for Special QPs */
506	struct kmem_cache	*ci_send_wqes_cache; /* Send WQE cache */
507	struct kmem_cache	*ci_recv_wqes_cache; /* Receive WQE cache */
508	vmem_t			*ci_wqe_ib_vmem; /* IB virtual address arena */
509	kmutex_t		ci_wqe_mutex;	/* WQE management list mutex */
510	ibmf_wqe_mgt_t		*ci_wqe_mgt_list; /* WQE management list */
511
512	uint_t			ci_nports;	/* num ports on the CI */
513	uint32_t		ci_vendor_id:24; /* HCA vendor ID */
514	uint16_t		ci_device_id;	/* HCA device ID */
515	uint_t			ci_ref;		/* reference count */
516	uint16_t		ci_state;	/* CI context state */
517	uint16_t		ci_state_flags;	/* CI context state flags */
518	kcondvar_t		ci_state_cv;	/* wait on a state change */
519	uint_t			ci_init_state;	/* used in cleanup */
520
521	/* free QP synchronization with WQE completion processing */
522	int			ci_wqes_alloced; /* wqes alloced for sp QPs */
523	kcondvar_t		ci_wqes_cv; 	/* wait on wqes destruction */
524
525	/* port kstats */
526	struct kstat		*ci_port_kstatp;	/* kstats for client */
527} ibmf_ci_t;
528_NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_ud_dest_list_mutex,
529    ibmf_ci_t::ci_ud_dest_list_count
530    ibmf_ci_t::ci_ud_dest_list_head))
531_NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex,
532    ibmf_ci_t::ci_state
533    ibmf_ci_t::ci_port_kstatp))
534_NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_clients_mutex,
535    ibmf_ci_t::ci_clients
536    ibmf_ci_t::ci_clients_last))
537_NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex,
538    ibmf_qp_t::iq_next
539    ibmf_qp_t::iq_flags))
540_NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_wqe_mutex,
541    ibmf_ci_t::ci_wqe_mgt_list))
542_NOTE(READ_ONLY_DATA(ibmf_ci_t::ci_cq_handle))
543
544#define	IBMF_CI_BLOCKED_ON_SEND_WQE		0x00000001 /* blockers on wqe */
545
546/* defines for ci_init_state */
547#define	IBMF_CI_INIT_HCA_INITED				0x0001
548#define	IBMF_CI_INIT_MUTEX_CV_INITED			0x0002
549#define	IBMF_CI_INIT_SEND_TASKQ_DONE			0x0004
550#define	IBMF_CI_INIT_RECV_TASKQ_DONE			0x0008
551#define	IBMF_CI_INIT_CQ_INITED				0x0010
552#define	IBMF_CI_INIT_WQES_ALLOCED			0x0020
553#define	IBMF_CI_INIT_HCA_LINKED				0x0040
554#define	IBMF_CI_INIT_QP_LIST_INITED			0x0080
555
556/* defines for ci_state */
557#define	IBMF_CI_STATE_PRESENT				0x0001
558#define	IBMF_CI_STATE_INITED				0x0002
559#define	IBMF_CI_STATE_GONE				0x0003
560
561/* defines for ci_state_flags */
562#define	IBMF_CI_STATE_INIT_WAIT				0x0001
563#define	IBMF_CI_STATE_UNINIT_WAIT			0x0002
564#define	IBMF_CI_STATE_VALIDATE_WAIT			0x0004
565
566#define	IBMF_CI_STATE_INVALIDATING			0x0100
567#define	IBMF_CI_STATE_VALIDATING			0x0200
568#define	IBMF_CI_STATE_UNINITING				0x0400
569#define	IBMF_CI_STATE_INITING				0x0800
570
571/*
572 * for keeping track of ibmf state
573 */
574typedef struct _ibmf_state {
575	struct _ibmf_ci		*ibmf_ci_list;
576	struct _ibmf_ci		*ibmf_ci_list_tail;
577	ibt_clnt_hdl_t		ibmf_ibt_handle;
578	ibt_cq_handler_t	ibmf_cq_handler;
579	kmutex_t		ibmf_mutex;
580	ibt_clnt_modinfo_t	ibmf_ibt_modinfo;
581	taskq_t			*ibmf_taskq;	/* taskq for MAD processing */
582						/* for classes not registered */
583} ibmf_state_t;
584_NOTE(MUTEX_PROTECTS_DATA(ibmf_state_t::ibmf_mutex,
585    ibmf_ci_t::ci_next))
586
587/* UD Destination resource cache definitions */
588/*
589 * It is preferred that the difference between the hi and lo water
590 * marks be only a few ud_dest resources. The intent is that a
591 * thread that needs to run ibmf_i_populate_ud_dest_list() does not
592 * spend too much time in this ud_dest resource population process
593 * before it returns to its caller. A benefit of a higher lo water
594 * mark is that the larger available pool of resources supports high
595 * stress scenarios better.
596 */
597#define	IBMF_UD_DEST_HI_WATER_MARK	512
598#define	IBMF_UD_DEST_LO_WATER_MARK	500
599
600/*
601 * Prototypes
602 */
603/* ci related functions */
604int ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num);
605int ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp);
606void ibmf_i_release_ci(ibmf_ci_t *cip);
607
608/* client related functions */
609int ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip,
610    ibmf_register_info_t *client_infop);
611int ibmf_i_validate_class_mask(ibmf_register_info_t *client_infop);
612int ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags,
613    ibmf_client_t **clientpp);
614void ibmf_i_add_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp);
615
616void ibmf_i_free_client(ibmf_client_t *clientp);
617void ibmf_i_delete_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp);
618int ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num,
619    ibmf_client_type_t class, ibmf_client_t **clientpp);
620
621/* qp related functions */
622int ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num,
623    ibmf_client_type_t class, ibmf_qp_t **qppp);
624void ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qpp);
625int ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key,
626    ib_qkey_t q_key, uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep);
627int ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags);
628int ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags,
629    uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump);
630int ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key,
631    ib_qkey_t q_key, uint_t flags);
632int ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey,
633    uint8_t port, ib_pkey_t *pkeyixp);
634int ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix,
635    ib_pkey_t *pkeyp);
636
637/* pkt related functions */
638int ibmf_i_issue_pkt(ibmf_client_t *clientp, ibmf_msg_impl_t *msgp,
639    ibmf_qp_handle_t ibmf_qp_handle, ibmf_send_wqe_t *send_wqep);
640int ibmf_i_alloc_ud_dest(ibmf_client_t *clientp,
641    ibmf_msg_impl_t *msgimplp, ibt_ud_dest_hdl_t *ud_dest_p, boolean_t block);
642void ibmf_i_free_ud_dest(ibmf_client_t *clientp,
643    ibmf_msg_impl_t *msgimplp);
644void ibmf_i_init_ud_dest(ibmf_ci_t *cip);
645void ibmf_i_fini_ud_dest(ibmf_ci_t *cip);
646ibmf_ud_dest_t *ibmf_i_get_ud_dest(ibmf_ci_t *cip);
647void ibmf_i_put_ud_dest(ibmf_ci_t *cip, ibmf_ud_dest_t *ud_dest);
648void ibmf_i_pop_ud_dest_thread(void *argp);
649void ibmf_i_clean_ud_dest_list(ibmf_ci_t *cip, boolean_t all);
650int ibmf_i_alloc_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgp,
651    boolean_t block, ibmf_send_wqe_t **swqepp);
652void ibmf_i_free_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgimplp,
653    ibmf_send_wqe_t *swqep);
654int ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block,
655    ibmf_qp_handle_t ibmf_qp_handle);
656int ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle);
657int ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,
658    ibmf_qp_handle_t ibmf_qp_handle);
659int ibmf_i_check_for_loopback(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t msgp,
660    void *msg_cb_args, ibmf_retrans_t *retrans, boolean_t *loopback);
661int ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status);
662int ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status);
663int ibmf_i_send_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
664    ibmf_msg_impl_t *msgimplp, int block);
665int ibmf_i_send_single_pkt(ibmf_client_t *clientp,
666    ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int block);
667
668/* WQE related functions */
669int ibmf_i_init_wqes(ibmf_ci_t *cip);
670void ibmf_i_fini_wqes(ibmf_ci_t *cip);
671void ibmf_i_init_send_wqe(ibmf_client_t *clientp,
672    ibmf_msg_impl_t *msgimplp, ibt_wr_ds_t *sglp, ibmf_send_wqe_t *wqep,
673    ibt_ud_dest_hdl_t ud_dest, ibt_qp_hdl_t ibt_qp_handle,
674    ibmf_qp_handle_t ibmf_qp_handle);
675void ibmf_i_init_recv_wqe(ibmf_qp_t *qpp, ibt_wr_ds_t *sglp,
676    ibmf_recv_wqe_t *wqep, ibt_qp_hdl_t ibt_qp_handle,
677    ibmf_qp_handle_t ibmf_qp_handle);
678void ibmf_i_mad_completions(ibt_cq_hdl_t cq_handle, void *arg);
679#ifdef DEBUG
680void ibmf_i_dump_wcp(ibmf_ci_t *cip, ibt_wc_t *wcp, ibmf_recv_wqe_t *recv_wqep);
681#endif
682
683void ibmf_ibt_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
684    ibt_async_code_t code, ibt_async_event_t *event);
685
686/* msg related functions */
687void ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb,
688    void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block);
689void ibmf_i_client_add_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp);
690void ibmf_i_client_rem_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
691    uint_t *refcnt);
692int ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp,
693    int km_flags);
694void ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp);
695int ibmf_i_msg_transport(ibmf_client_t *clientp,
696    ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int blocking);
697void ibmf_i_decrement_ref_count(ibmf_msg_impl_t *msgimplp);
698void ibmf_i_handle_send_completion(ibmf_ci_t *cip, ibt_wc_t *wcp);
699void ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp);
700int ibmf_setup_recvbuf_on_error(ibmf_msg_impl_t *msgimplp, uchar_t *mad);
701
702/* transaction related functions */
703void ibmf_i_terminate_transaction(ibmf_client_t *clientp,
704    ibmf_msg_impl_t *msgimplp, uint32_t status);
705void ibmf_i_notify_client(ibmf_msg_impl_t *msgimplp);
706void ibmf_i_notify_sequence(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
707    int msg_flags);
708
709/* timer related functions */
710void ibmf_i_set_timer(void (*func)(void *), ibmf_msg_impl_t *msgimplp,
711    ibmf_timer_t type);
712void ibmf_i_unset_timer(ibmf_msg_impl_t *msgimplp, ibmf_timer_t type);
713void ibmf_i_recv_timeout(void *argp);
714void ibmf_i_send_timeout(void *argp);
715void ibmf_i_err_terminate_timeout(void *msgp);
716
717/* rmpp related functions */
718boolean_t ibmf_i_find_msg_client(ibmf_client_t *cl, ibmf_msg_impl_t *msgimplp,
719    boolean_t inc_refcnt);
720boolean_t ibmf_i_is_rmpp(ibmf_client_t *clientp,
721    ibmf_qp_handle_t ibmf_qp_handle);
722void ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp,
723    uint32_t *offp);
724ibmf_msg_impl_t *ibmf_i_find_msg(ibmf_client_t *clientp, uint64_t tid,
725    uint8_t mgt_class, uint8_t r_method, ib_lid_t lid, ib_gid_t *gid,
726    boolean_t gid_pr, ibmf_rmpp_hdr_t *rmpp_hdr, boolean_t msg_list);
727#ifdef NOTDEF
728ibmf_msg_impl_t *ibmf_i_find_term_msg(ibmf_client_t *clientp, uint64_t tid,
729    uint8_t mgt_class, ib_lid_t lid, ib_gid_t *gid, boolean_t gid_pr,
730    ibmf_rmpp_hdr_t *rmpp_hd);
731#endif
732void ibmf_i_handle_rmpp(ibmf_client_t *clientp, ibmf_qp_handle_t qp_hdl,
733    ibmf_msg_impl_t *msgimpl, uchar_t *madp);
734int ibmf_i_send_rmpp(ibmf_msg_impl_t *msgimplp, uint8_t rmpp_type,
735    uint8_t rmpp_status, uint32_t segno, uint32_t nwl, int block);
736int ibmf_i_send_rmpp_pkts(ibmf_client_t *clientp,
737    ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, boolean_t isDS,
738    int block);
739void ibmf_i_send_rmpp_window(ibmf_msg_impl_t *msgimplp, int block);
740int ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp);
741
742/* Alternate QP WQE cache functions */
743int ibmf_altqp_send_wqe_cache_constructor(void *buf, void *cdrarg,
744    int kmflags);
745void ibmf_altqp_send_wqe_cache_destructor(void *buf, void *cdrarg);
746int ibmf_altqp_recv_wqe_cache_constructor(void *buf, void *cdrarg,
747    int kmflags);
748void ibmf_altqp_recv_wqe_cache_destructor(void *buf, void *cdrarg);
749int ibmf_i_init_altqp_wqes(ibmf_alt_qp_t *qp_ctx);
750void ibmf_i_fini_altqp_wqes(ibmf_alt_qp_t *qp_ctx);
751int ibmf_i_extend_wqe_cache(ibmf_ci_t *cip, ibmf_qp_handle_t ibmf_qp_handle,
752    boolean_t block);
753
754/* Receive callback functions */
755void ibmf_i_recv_cb_setup(ibmf_client_t *clientp);
756void ibmf_i_recv_cb_cleanup(ibmf_client_t *clientp);
757void ibmf_i_alt_recv_cb_setup(ibmf_alt_qp_t *qpp);
758void ibmf_i_alt_recv_cb_cleanup(ibmf_alt_qp_t *qpp);
759
760/* UD Dest population thread */
761int ibmf_ud_dest_tq_disp(ibmf_ci_t *cip);
762
763#ifdef __cplusplus
764}
765#endif
766
767#endif /* _SYS_IB_MGT_IBMF_IBMF_IMPL_H */
768