1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * tavor_cfg.c
29 *    Tavor Configuration Profile Routines
30 *
31 *    Implements the routines necessary for initializing and (later) tearing
32 *    down the list of Tavor configuration information.
33 */
34
35#include <sys/types.h>
36#include <sys/conf.h>
37#include <sys/ddi.h>
38#include <sys/sunddi.h>
39#include <sys/modctl.h>
40#include <sys/bitmap.h>
41
42#include <sys/ib/adapters/tavor/tavor.h>
43
44/* Set to enable alternative configurations: 0 = automatic config, 1 = manual */
45uint32_t tavor_alt_config_enable	= 0;
46
47/* Number of supported QPs and their maximum size */
48uint32_t tavor_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
49uint32_t tavor_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
50
51/* Number of supported SGL per WQE */
52uint32_t tavor_wqe_max_sgl		= TAVOR_NUM_WQE_SGL;
53
54/* Number of supported CQs and their maximum size */
55uint32_t tavor_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
56uint32_t tavor_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
57
58/* Select to enable SRQ or not; NOTE: 0 for disabled, 1 for enabled */
59uint32_t tavor_srq_enable		= 1;
60
61/* Number of supported SRQs and their maximum size */
62uint32_t tavor_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
63uint32_t tavor_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
64uint32_t tavor_srq_max_sgl		= TAVOR_SRQ_MAX_SGL;
65
66/* Default size for all EQs */
67uint32_t tavor_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
68
69/* Number of supported RDB (for incoming RDMA Read/Atomic) */
70uint32_t tavor_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
71
72/*
73 * Number of support multicast groups, number of QP per multicast group, and
74 * the number of entries (from the total number) in the multicast group "hash
75 * table"
76 */
77uint32_t tavor_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
78uint32_t tavor_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
79uint32_t tavor_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
80
81/*
82 * Number of supported MPTs (memory regions and windows) and their maximum
83 * size.  Also the number of MTT per "MTT segment" (see tavor_mr.h for more
84 * details)
85 */
86uint32_t tavor_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
87uint32_t tavor_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
88uint32_t tavor_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
89
90/*
91 * Number of supported Tavor mailboxes ("In" and "Out") and their maximum
92 * sizes, respectively
93 */
94uint32_t tavor_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
95uint32_t tavor_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
96uint32_t tavor_log_num_intr_inmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
97uint32_t tavor_log_num_intr_outmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
98uint32_t tavor_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
99uint32_t tavor_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
100
101/* Number of supported UAR pages */
102uint32_t tavor_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
103
104/* Number of supported Protection Domains (PD) */
105uint32_t tavor_log_num_pd		= TAVOR_NUM_PD_SHIFT;
106
107/* Number of supported Address Handles (AH) */
108uint32_t tavor_log_num_ah		= TAVOR_NUM_AH_SHIFT;
109
110/*
111 * Number of total supported PKeys per PKey table (i.e.
112 * per port).  Also the number of SGID per GID table.
113 */
114uint32_t tavor_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
115uint32_t tavor_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
116
117/* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
118uint32_t tavor_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
119uint32_t tavor_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
120
121/* Maximum supported MTU and portwidth */
122uint32_t tavor_max_mtu			= TAVOR_MAX_MTU;
123uint32_t tavor_max_port_width		= TAVOR_MAX_PORT_WIDTH;
124
125/* Number of supported Virtual Lanes (VL) */
126uint32_t tavor_max_vlcap		= TAVOR_MAX_VLCAP;
127
128/* Number of supported ports (1 or 2) */
129uint32_t tavor_num_ports		= TAVOR_NUM_PORTS;
130
131/*
132 * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
133 * QP1, respectively.
134 */
135uint32_t tavor_qp0_agents_in_fw		= 1;
136uint32_t tavor_qp1_agents_in_fw		= 0;
137
138/*
139 * Whether DMA mappings should be made with DDI_DMA_STREAMING or with
140 * DDI_DMA_CONSISTENT mode.  Note: 0 for "streaming", 1 for "consistent"
141 */
142uint32_t tavor_streaming_consistent	= 1;
143
144/*
145 * For DMA mappings made with DDI_DMA_CONSISTENT, this flag determines
146 * whether to override the necessity for calls to ddi_dma_sync().
147 */
148uint32_t tavor_consistent_syncoverride  = 0;
149
150/*
151 * Whether DMA mappings should bypass the PCI IOMMU or not.
152 * tavor_iommu_bypass is a global setting for all memory addresses.  However,
153 * if set to BYPASS, memory attempted to be registered for streaming (ie:
154 * NON-COHERENT) will necessarily turn off BYPASS for that registration.  To
155 * instead disable streaming in this situation the
156 * 'tavor_disable_streaming_on_bypass' can be set to 1.  This setting will
157 * change the memory mapping to be implicitly consistent (ie: COHERENT), and
158 * will still perform the iommu BYPASS operation.
159 */
160uint32_t tavor_iommu_bypass		= 1;
161uint32_t tavor_disable_streaming_on_bypass = 0;
162
163/*
164 * Whether QP work queues should be allocated from system memory or
165 * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
166 */
167uint32_t tavor_qp_wq_inddr		= 0;
168
169/*
170 * Whether SRQ work queues should be allocated from system memory or
171 * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
172 */
173uint32_t tavor_srq_wq_inddr		= 0;
174
175/*
176 * Whether Tavor should use MSI (Message Signaled Interrupts), if available.
177 * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
178 */
179uint32_t tavor_use_msi_if_avail		= 1;
180
181/*
182 * This is a patchable variable that determines the time we will wait after
183 * initiating SW reset before we do our first read from Tavor config space.
184 * If this value is set too small (less than the default 100ms), it is
185 * possible for Tavor hardware to be unready to respond to the config cycle
186 * reads.  This could cause master abort on the PCI bridge.  Note: If
187 * "tavor_sw_reset_delay" is set to zero, then no software reset of the Tavor
188 * device will be attempted.
189 */
190uint32_t tavor_sw_reset_delay		= TAVOR_SW_RESET_DELAY;
191
192/*
193 * These are patchable variables for tavor command polling. The poll_delay is
194 * the number of usec to wait in-between calls to poll the 'go' bit.  The
195 * poll_max is the total number of usec to loop in waiting for the 'go' bit to
196 * clear.
197 */
198uint32_t tavor_cmd_poll_delay		= TAVOR_CMD_POLL_DELAY;
199uint32_t tavor_cmd_poll_max		= TAVOR_CMD_POLL_MAX;
200
201/*
202 * This is a patchable variable that determines the frequency with which
203 * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
204 * set in at least every 2^tavor_qp_ackreq_freq packets (but at least once
205 * per message, i.e. in the last packet).  Tuning this value can increase
206 * IB fabric utilization by cutting down on the number of unnecessary ACKs.
207 */
208uint32_t tavor_qp_ackreq_freq		= TAVOR_QP_ACKREQ_FREQ;
209
210/*
211 * This is a patchable variable that determines the default value for the
212 * maximum number of outstanding split transactions.  The number of
213 * outstanding split transations (i.e. PCI reads) has an affect on device
214 * throughput.  The value here should not be modified as it defines the
215 * default (least common denominator - one (1) PCI read) behavior that is
216 * guaranteed to work, regardless of how the Tavor firmware has been
217 * initialized.  The format for this variable is the same as the corresponding
218 * field in the "PCI-X Command Register".
219 */
220#ifdef	__sparc
221/*
222 * Default SPARC platforms to be 1 outstanding PCI read.
223 */
224int tavor_max_out_splt_trans	= 0;
225#else
226/*
227 * Default non-SPARC platforms to be the default as set in tavor firmware
228 * number of outstanding PCI reads.
229 */
230int tavor_max_out_splt_trans	= -1;
231#endif
232
233/*
234 * This is a patchable variable that determines the default value for the
235 * maximum size of PCI read burst.  This maximum size has an affect on
236 * device throughput.  The value here should not be modified as it defines
237 * the default (least common denominator - 512B read) behavior that is
238 * guaranteed to work, regardless of how the Tavor device has been
239 * initialized.  The format for this variable is the same as the corresponding
240 * field in the "PCI-X Command Register".
241 */
242#ifdef	__sparc
243/*
244 * Default SPARC platforms to be 512B read.
245 */
246int tavor_max_mem_rd_byte_cnt	= 0;
247static void tavor_check_iommu_bypass(tavor_state_t *state,
248    tavor_cfg_profile_t *cp);
249#else
250/*
251 * Default non-SPARC platforms to be the default as set in tavor firmware.
252 *
253 */
254int tavor_max_mem_rd_byte_cnt	= -1;
255#endif
256
257static void tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp);
258static void tavor_cfg_prop_lookup(tavor_state_t *state,
259    tavor_cfg_profile_t *cp);
260
261/*
262 * tavor_cfg_profile_init_phase1()
263 *    Context: Only called from attach() path context
264 */
265int
266tavor_cfg_profile_init_phase1(tavor_state_t *state)
267{
268	tavor_cfg_profile_t	*cp;
269
270	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase1);
271
272	/*
273	 * Allocate space for the configuration profile structure
274	 */
275	cp = (tavor_cfg_profile_t *)kmem_zalloc(sizeof (tavor_cfg_profile_t),
276	    KM_SLEEP);
277
278	cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
279	cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
280	cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
281	cp->cp_cmd_poll_delay		= tavor_cmd_poll_delay;
282	cp->cp_cmd_poll_max		= tavor_cmd_poll_max;
283	cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
284	cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
285	cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
286	cp->cp_srq_enable		= tavor_srq_enable;
287	cp->cp_fmr_enable		= 0;
288	cp->cp_fmr_max_remaps		= 0;
289
290	/*
291	 * Although most of the configuration is enabled in "phase2" of the
292	 * cfg_profile_init, we have to setup the OUT mailboxes here, since
293	 * they are used immediately after this "phase1" completes.  Check for
294	 * alt_config_enable, and set the values appropriately.  Otherwise, the
295	 * config profile is setup using the values based on the dimm size.
296	 * While it is expected that the mailbox size and number will remain
297	 * the same independent of dimm size, we separate it out here anyway
298	 * for completeness.
299	 *
300	 * We have to setup SRQ settings here because MOD_STAT_CFG must be
301	 * called before our call to QUERY_DEVLIM.  If SRQ is enabled, then we
302	 * must enable it in the firmware so that the phase2 settings will have
303	 * the right device limits.
304	 */
305	if (tavor_alt_config_enable) {
306		cp->cp_log_num_outmbox		= tavor_log_num_outmbox;
307		cp->cp_log_num_intr_outmbox	= tavor_log_num_intr_outmbox;
308		cp->cp_log_outmbox_size		= tavor_log_outmbox_size;
309		cp->cp_log_num_inmbox		= tavor_log_num_inmbox;
310		cp->cp_log_num_intr_inmbox	= tavor_log_num_intr_inmbox;
311		cp->cp_log_inmbox_size		= tavor_log_inmbox_size;
312		cp->cp_log_num_srq		= tavor_log_num_srq;
313		cp->cp_log_max_srq_sz		= tavor_log_max_srq_sz;
314
315	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
316		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
317		cp->cp_log_num_intr_outmbox	=
318		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
319		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
320		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
321		cp->cp_log_num_intr_inmbox	=
322		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
323		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
324		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_256;
325		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
326
327	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
328		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
329		cp->cp_log_num_intr_outmbox	=
330		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
331		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
332		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
333		cp->cp_log_num_intr_inmbox	=
334		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
335		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
336		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
337		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
338
339	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
340		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
341		cp->cp_log_num_intr_outmbox	=
342		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
343		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
344		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
345		cp->cp_log_num_intr_inmbox	=
346		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
347		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
348		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_MIN;
349		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT_MIN;
350
351	} else {
352		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
353		    TAVOR_TNF_ERROR, "");
354		return (DDI_FAILURE);
355	}
356
357	/*
358	 * Set default DMA mapping mode.  Ensure consistency of flags
359	 * with both architecture type and other configuration flags.
360	 */
361	if (tavor_streaming_consistent == 0) {
362#ifdef	__sparc
363		cp->cp_streaming_consistent = DDI_DMA_STREAMING;
364
365		/* Can't do both "streaming" and IOMMU bypass */
366		if (tavor_iommu_bypass != 0) {
367			TNF_PROBE_0(tavor_cfg_profile_streamingbypass_fail,
368			    TAVOR_TNF_ERROR, "");
369			kmem_free(cp, sizeof (tavor_cfg_profile_t));
370			return (DDI_FAILURE);
371		}
372#else
373		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
374#endif
375	} else {
376		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
377	}
378
379	/* Determine whether to override ddi_dma_sync() */
380	cp->cp_consistent_syncoverride = tavor_consistent_syncoverride;
381
382	/* Attach the configuration profile to Tavor softstate */
383	state->ts_cfg_profile = cp;
384
385	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase1);
386	return (DDI_SUCCESS);
387}
388
389/*
390 * tavor_cfg_profile_init_phase2()
391 *    Context: Only called from attach() path context
392 */
393int
394tavor_cfg_profile_init_phase2(tavor_state_t *state)
395{
396	tavor_cfg_profile_t	*cp;
397
398	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase2);
399
400	/* Read the configuration profile from Tavor softstate */
401	cp = state->ts_cfg_profile;
402
403	/*
404	 * Verify the config profile setting.  The 'setting' should already be
405	 * set, during a call to ddi_dev_regsize() to get the size of DDR
406	 * memory, or during a fallback to a smaller supported size.  If it is
407	 * not set, we should not have reached this 'phase2'.  So we assert
408	 * here.
409	 */
410	ASSERT(state->ts_cfg_profile_setting != 0);
411
412	/*
413	 * The automatic configuration override is the
414	 * 'tavor_alt_config_enable' variable.  If this is set, we no longer
415	 * use the DIMM size to enable the correct profile.  Instead, all of
416	 * the tavor config options at the top of this file are used directly.
417	 *
418	 * This allows customization for a user who knows what they are doing
419	 * to set tavor configuration values manually.
420	 *
421	 * If this variable is 0, we do automatic config for both 128MB and
422	 * 256MB DIMM sizes.
423	 */
424	if (tavor_alt_config_enable) {
425		/*
426		 * Initialize the configuration profile
427		 */
428		cp->cp_log_num_qp		= tavor_log_num_qp;
429		cp->cp_log_max_qp_sz		= tavor_log_max_qp_sz;
430
431		/* Determine WQE sizes from requested max SGLs */
432		tavor_cfg_wqe_sizes(cp);
433
434		cp->cp_log_num_cq		= tavor_log_num_cq;
435		cp->cp_log_max_cq_sz		= tavor_log_max_cq_sz;
436		cp->cp_log_default_eq_sz	= tavor_log_default_eq_sz;
437		cp->cp_log_num_rdb		= tavor_log_num_rdb;
438		cp->cp_log_num_mcg		= tavor_log_num_mcg;
439		cp->cp_num_qp_per_mcg		= tavor_num_qp_per_mcg;
440		cp->cp_log_num_mcg_hash		= tavor_log_num_mcg_hash;
441		cp->cp_log_num_mpt		= tavor_log_num_mpt;
442		cp->cp_log_max_mrw_sz		= tavor_log_max_mrw_sz;
443		cp->cp_log_num_mttseg		= tavor_log_num_mttseg;
444		cp->cp_log_num_uar		= tavor_log_num_uar;
445		cp->cp_log_num_pd		= tavor_log_num_pd;
446		cp->cp_log_num_ah		= tavor_log_num_ah;
447		cp->cp_log_max_pkeytbl		= tavor_log_max_pkeytbl;
448		cp->cp_log_max_gidtbl		= tavor_log_max_gidtbl;
449		cp->cp_hca_max_rdma_in_qp	= tavor_hca_max_rdma_in_qp;
450		cp->cp_hca_max_rdma_out_qp	= tavor_hca_max_rdma_out_qp;
451		cp->cp_max_mtu			= tavor_max_mtu;
452		cp->cp_max_port_width		= tavor_max_port_width;
453		cp->cp_max_vlcap		= tavor_max_vlcap;
454		cp->cp_num_ports		= tavor_num_ports;
455		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
456		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
457		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
458		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
459		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
460		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
461
462	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
463		/*
464		 * Initialize the configuration profile
465		 */
466		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_256;
467		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
468
469		/* Determine WQE sizes from requested max SGLs */
470		tavor_cfg_wqe_sizes(cp);
471
472		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_256;
473		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
474		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
475		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_256;
476		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
477		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
478		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
479		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_256;
480		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_256;
481		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
482		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
483		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
484		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
485		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
486		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
487		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
488		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
489		cp->cp_max_mtu			= TAVOR_MAX_MTU;
490		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
491		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
492		cp->cp_num_ports		= TAVOR_NUM_PORTS;
493		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
494		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
495		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
496		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
497		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
498		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
499
500	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
501		/*
502		 * Initialize the configuration profile
503		 */
504		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
505		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
506
507		/* Determine WQE sizes from requested max SGLs */
508		tavor_cfg_wqe_sizes(cp);
509
510		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
511		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
512		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
513		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
514		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
515		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
516		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
517		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
518		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
519		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
520		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
521		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
522		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
523		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
524		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
525		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
526		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
527		cp->cp_max_mtu			= TAVOR_MAX_MTU;
528		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
529		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
530		cp->cp_num_ports		= TAVOR_NUM_PORTS;
531		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
532		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
533		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
534		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
535		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
536		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
537
538	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
539		/*
540		 * Initialize the configuration profile for minimal footprint.
541		 */
542
543		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_MIN;
544		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT_MIN;
545
546		/* Determine WQE sizes from requested max SGLs */
547		tavor_cfg_wqe_sizes(cp);
548
549		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_MIN;
550		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT_MIN;
551		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
552		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_MIN;
553		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT_MIN;
554		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG_MIN;
555		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT_MIN;
556		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_MIN;
557		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_MIN;
558		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT_MIN;
559		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT_MIN;
560		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
561		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT_MIN;
562		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
563		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
564		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
565		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
566		cp->cp_max_mtu			= TAVOR_MAX_MTU;
567		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
568		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
569		cp->cp_num_ports		= TAVOR_NUM_PORTS;
570		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
571		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
572		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
573		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
574		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
575		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
576
577	} else {
578		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
579		    TAVOR_TNF_ERROR, "");
580		return (DDI_FAILURE);
581	}
582
583	/*
584	 * Set IOMMU bypass or not.  Ensure consistency of flags with
585	 * architecture type.
586	 */
587#ifdef __sparc
588	if (tavor_iommu_bypass == 1) {
589		tavor_check_iommu_bypass(state, cp);
590	} else {
591		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
592		cp->cp_disable_streaming_on_bypass = 0;
593	}
594#else
595	cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
596	cp->cp_disable_streaming_on_bypass = 0;
597#endif
598	/* Set whether QP WQEs will be in DDR or not */
599	cp->cp_qp_wq_inddr = (tavor_qp_wq_inddr == 0) ?
600	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
601
602	/* Set whether SRQ WQEs will be in DDR or not */
603	cp->cp_srq_wq_inddr = (tavor_srq_wq_inddr == 0) ?
604	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
605
606	cp->cp_use_msi_if_avail = tavor_use_msi_if_avail;
607
608	/* Determine additional configuration from optional properties */
609	tavor_cfg_prop_lookup(state, cp);
610
611	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase2);
612	return (DDI_SUCCESS);
613}
614
615
616/*
617 * tavor_cfg_profile_fini()
618 *    Context: Only called from attach() and/or detach() path contexts
619 */
620void
621tavor_cfg_profile_fini(tavor_state_t *state)
622{
623	TAVOR_TNF_ENTER(tavor_cfg_profile_fini);
624
625	/*
626	 * Free up the space for configuration profile
627	 */
628	kmem_free(state->ts_cfg_profile, sizeof (tavor_cfg_profile_t));
629
630	TAVOR_TNF_EXIT(tavor_cfg_profile_fini);
631}
632
633
634/*
635 * tavor_cfg_wqe_sizes()
636 *    Context: Only called from attach() path context
637 */
638static void
639tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp)
640{
641	uint_t	max_size, log2;
642	uint_t	max_sgl, real_max_sgl;
643
644	/*
645	 * Get the requested maximum number SGL per WQE from the Tavor
646	 * patchable variable
647	 */
648	max_sgl = tavor_wqe_max_sgl;
649
650	/*
651	 * Use requested maximum number of SGL to calculate the max descriptor
652	 * size (while guaranteeing that the descriptor size is a power-of-2
653	 * cachelines).  We have to use the calculation for QP1 MLX transport
654	 * because the possibility that we might need to inline a GRH, along
655	 * with all the other headers and alignment restrictions, sets the
656	 * maximum for the number of SGLs that we can advertise support for.
657	 */
658	max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
659	log2 = highbit(max_size);
660	if ((max_size & (max_size - 1)) == 0) {
661		log2 = log2 - 1;
662	}
663	max_size = (1 << log2);
664
665	/*
666	 * Now clip the maximum descriptor size based on Tavor HW maximum
667	 */
668	max_size = min(max_size, TAVOR_QP_WQE_MAX_SIZE);
669
670	/*
671	 * Then use the calculated max descriptor size to determine the "real"
672	 * maximum SGL (the number beyond which we would roll over to the next
673	 * power-of-2).
674	 */
675	real_max_sgl = (max_size - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
676
677	/* Then save away this configuration information */
678	cp->cp_wqe_max_sgl	= max_sgl;
679	cp->cp_wqe_real_max_sgl = real_max_sgl;
680
681	/* SRQ SGL gets set to it's own patchable variable value */
682	cp->cp_srq_max_sgl		= tavor_srq_max_sgl;
683}
684
685
686/*
687 * tavor_cfg_prop_lookup()
688 *    Context: Only called from attach() path context
689 */
690static void
691tavor_cfg_prop_lookup(tavor_state_t *state, tavor_cfg_profile_t *cp)
692{
693	uint_t		num_ports, nelementsp;
694	uchar_t		*datap;
695	int		status;
696
697	/*
698	 * Read the property defining the number of Tavor ports to
699	 * support.  If the property is undefined or invalid, then return.
700	 * We return here assuming also that OBP is not supposed to be setting
701	 * up other properties in this case (eg: HCA plugin cards).  But if
702	 * this property is valid, then we print out a message for the other
703	 * properties to show an OBP error.
704	 */
705	num_ports = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
706	    DDI_PROP_DONTPASS, "#ports", 0);
707	if ((num_ports > TAVOR_NUM_PORTS) || (num_ports == 0)) {
708		return;
709	}
710	cp->cp_num_ports   = num_ports;
711
712	/*
713	 * The system image guid is not currently supported in the 1275
714	 * binding.  So we leave this commented out for now.
715	 */
716#ifdef SUPPORTED_IN_1275_BINDING
717	/*
718	 * Read the property defining the value to use later to override the
719	 * default SystemImageGUID (in firmware).  If the property is
720	 * undefined, then return.
721	 */
722	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
723	    DDI_PROP_DONTPASS, "system-image-guid", &datap, &nelementsp);
724	if (status == DDI_PROP_SUCCESS) {
725		cp->cp_sysimgguid = ((uint64_t *)datap)[0];
726		ddi_prop_free(datap);
727	} else {
728		cmn_err(CE_NOTE,
729		    "Unable to read OBP system-image-guid property");
730	}
731#endif
732
733	/*
734	 * Read the property defining the value to use later to override
735	 * the default SystemImageGUID (in firmware).  If the property is
736	 * undefined, then return.
737	 */
738	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
739	    DDI_PROP_DONTPASS, "node-guid", &datap, &nelementsp);
740	if (status == DDI_PROP_SUCCESS) {
741		cp->cp_nodeguid = ((uint64_t *)datap)[0];
742		ddi_prop_free(datap);
743	} else {
744		cmn_err(CE_NOTE, "Unable to read OBP node-guid property");
745	}
746
747	/*
748	 * Using the value for the number of ports (above) read the properties
749	 * used to later to override the default PortGUIDs for each Tavor port.
750	 * If either of these properties are undefined, then return.
751	 */
752	if (num_ports == TAVOR_NUM_PORTS) {
753		status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY,
754		    state->ts_dip, DDI_PROP_DONTPASS, "port-2-guid", &datap,
755		    &nelementsp);
756		if (status == DDI_PROP_SUCCESS) {
757			cp->cp_portguid[1] = ((uint64_t *)datap)[0];
758			ddi_prop_free(datap);
759		} else {
760			cmn_err(CE_NOTE,
761			    "Unable to read OBP port-2-guid property");
762		}
763	}
764	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
765	    DDI_PROP_DONTPASS, "port-1-guid", &datap, &nelementsp);
766	if (status == DDI_PROP_SUCCESS) {
767		cp->cp_portguid[0] = ((uint64_t *)datap)[0];
768		ddi_prop_free(datap);
769	} else {
770		cmn_err(CE_NOTE, "Unable to read OBP port-1-guid property");
771	}
772}
773
774#ifdef __sparc
775/*
776 * tavor_check_iommu_bypass()
777 *    Context: Only called from attach() path context
778 */
779static void
780tavor_check_iommu_bypass(tavor_state_t *state, tavor_cfg_profile_t *cp)
781{
782	ddi_dma_handle_t	dmahdl;
783	ddi_dma_attr_t		dma_attr;
784	int			status;
785
786	tavor_dma_attr_init(&dma_attr);
787
788	/* Try mapping for IOMMU bypass (Force Physical) */
789	dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
790
791	/*
792	 * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
793	 * it is not possible to use IOMMU bypass with our PCI bridge parent.
794	 * For example, certain versions of Tomatillo do not support IOMMU
795	 * bypass.  Since the function we are in can only be called if iommu
796	 * bypass was requested in the config profile, we configure for bypass
797	 * if the ddi_dma_alloc_handle() was successful.  Otherwise, we
798	 * configure for non-bypass (ie: normal) mapping.
799	 */
800	status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
801	    DDI_DMA_SLEEP, NULL, &dmahdl);
802	if (status == DDI_DMA_BADATTR) {
803		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
804		cp->cp_disable_streaming_on_bypass = 0;
805	} else {
806		cp->cp_iommu_bypass = TAVOR_BINDMEM_BYPASS;
807		cp->cp_disable_streaming_on_bypass =
808		    tavor_disable_streaming_on_bypass;
809
810		if (status == DDI_SUCCESS) {
811			ddi_dma_free_handle(&dmahdl);
812		}
813	}
814}
815#endif
816