dapl_ia_open.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26/*
27 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31
32/*
33 *
34 * MODULE: dapl_ia_open.c
35 *
36 * PURPOSE: Interface Adapter management
37 * Description: Interfaces in this file are completely described in
38 *		the DAPL 1.1 API, Chapter 6, section 2
39 *
40 * $Id: dapl_ia_open.c,v 1.30 2003/07/31 14:04:17 jlentini Exp $
41 */
42
43#include "dapl.h"
44#include "dapl_provider.h"
45#include "dapl_evd_util.h"
46#include "dapl_hca_util.h"
47#include "dapl_ia_util.h"
48#include "dapl_adapter_util.h"
49#include <sys/systeminfo.h>
50#include <libdevinfo.h>
51
52
53/*
54 * LOCAL PROTOTYPES
55 */
56#if defined(IBHOSTS_NAMING)
57void dapli_assign_hca_ip_address(
58	DAPL_HCA		*hca_ptr,
59	char 			*device_name);
60#endif /* IBHOSTS_NAMING */
61
62static void dapli_hca_cleanup(DAPL_HCA *hca_ptr, DAT_BOOLEAN dec_ref);
63
64/*
65 * Determine whether the platform supports RO (Relaxed ordering)
66 * Return B_TRUE if it does support RO and B_FALSE if it does not support RO
67 *
68 * udapl_ro_disallowed is an out paramter returning whether or not
69 * relaxed ordering should be disabled (regardless of whether the platform
70 * is capable of supporting relaxed ordering)
71 *
72 */
73static boolean_t
74dapl_ro_disallowed(void)
75{
76	static const char * const non_ro_capable_platforms[] = {
77		"i86pc",
78		"i86xpv",
79		"SUNW,Sun-Fire-V215",
80		"SUNW,Sun-Fire-V245",
81		"SUNW,Sun-Fire-V445",
82		"SUNW,Sun-Fire-T1000",
83		"SUNW,Sun-Fire-T200",
84		"SUNW,Sun-Blade-T6300",
85		"SUNW,Sun-Blade-T6320",
86		"SUNW,SPARC-Enterprise-T1000",
87		"SUNW,SPARC-Enterprise-T2000",
88		"SUNW,SPARC-Enterprise-T5120",
89		"SUNW,SPARC-Enterprise-T5220",
90		NULL
91	};
92	char platform[256 + 1];
93	register int i;
94	register const char *cp;
95	int ret;
96	di_node_t root_node, node;
97	boolean_t ro_disallowed;
98	static const char *ro_disallowed_property =
99	    "pci-relaxed-ordering-disallowed";
100	int bool;
101	int *boolp = &bool;
102
103	ret = sysinfo(SI_PLATFORM, platform, sizeof (platform));
104	if ((ret != -1) && (ret <= sizeof (platform))) {
105		for (i = 0; (cp = non_ro_capable_platforms[i]) != NULL; ++i) {
106			if (strcmp(platform, cp) == 0)
107				return (B_TRUE);
108		}
109	}
110
111	/*
112	 * This function only finds and looks at the FIRST udapl node.
113	 * It is assumed that there can only be one such node.
114	 */
115	if ((root_node = di_init("/", DINFOSUBTREE | DINFOPROP)) == DI_NODE_NIL)
116		return (B_FALSE);
117
118	node = di_drv_first_node("daplt", root_node);
119	if (node != DI_NODE_NIL) {
120		ret = di_prop_lookup_ints(DDI_DEV_T_ANY, node,
121		    ro_disallowed_property, &boolp);
122		switch (ret) {
123		case 0:
124		case 1:
125			ro_disallowed = B_TRUE;
126			break;
127		default:
128			ro_disallowed = B_FALSE;
129			break;
130		}
131
132	}
133	else
134		ro_disallowed = B_FALSE;
135
136	di_fini(root_node);
137
138	return (ro_disallowed);
139}
140
141/*
142 * dapl_ia_open
143 *
144 * DAPL Requirements Version xxx, 6.2.1.1
145 *
146 * Open a provider and return a handle. The handle enables the user
147 * to invoke operations on this provider.
148 *
149 * The dat_ia_open  call is actually part of the DAT registration module.
150 * That function maps the DAT_NAME parameter of dat_ia_open to a DAT_PROVIDER,
151 * and calls this function.
152 *
153 * Input:
154 *	provider
155 *	async_evd_qlen
156 *	async_evd_handle_ptr
157 *
158 * Output:
159 *	async_evd_handle
160 *	ia_handle
161 *
162 * Return Values:
163 * 	DAT_SUCCESS
164 * 	DAT_INSUFFICIENT_RESOURCES
165 * 	DAT_INVALID_PARAMETER
166 * 	DAT_INVALID_HANDLE
167 * 	DAT_NAME_NOT_FOUND	(returned by dat registry if necessary)
168 */
169DAT_RETURN
170dapl_ia_open(
171	IN	const DAT_NAME_PTR	name,
172	IN	DAT_COUNT		async_evd_qlen,
173	INOUT	DAT_EVD_HANDLE		*async_evd_handle_ptr,
174	OUT	DAT_IA_HANDLE		*ia_handle_ptr,
175	IN	boolean_t		ro_aware_client)
176{
177	DAT_RETURN	dat_status;
178	DAT_PROVIDER	*provider;
179	DAPL_HCA	*hca_ptr;
180	DAPL_IA		*ia_ptr;
181	DAPL_EVD	*evd_ptr;
182	boolean_t	ro_disallowed;
183
184	dat_status = DAT_SUCCESS;
185	hca_ptr = NULL;
186	ia_ptr = NULL;
187
188	dapl_dbg_log(DAPL_DBG_TYPE_API,
189	    "dapl_ia_open(%s, %d, %p, %p, %d)\n",
190	    name,
191	    async_evd_qlen,
192	    async_evd_handle_ptr,
193	    ia_handle_ptr,
194	    ro_aware_client);
195
196	dat_status = dapl_provider_list_search(name, &provider);
197	if (DAT_SUCCESS != dat_status) {
198		dapl_dbg_log(DAPL_DBG_TYPE_API,
199		    "dapl_ia_open: dapl_provider_list_search(\"%s\") returned "
200		    "%d\n",
201		    name,
202		    dat_status);
203
204		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG1);
205		goto bail;
206	}
207
208	/* ia_handle_ptr and async_evd_handle_ptr cannot be NULL */
209	if (ia_handle_ptr == NULL) {
210		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG4);
211		goto bail;
212	}
213	if (async_evd_handle_ptr == NULL) {
214		dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
215		goto bail;
216	}
217
218	/* initialize the caller's OUT param */
219	*ia_handle_ptr = DAT_HANDLE_NULL;
220
221	/* get the hca_ptr */
222	hca_ptr = (DAPL_HCA *)provider->extension;
223
224	/*
225	 * Open the HCA if it has not been done before.
226	 */
227	dapl_os_lock(&hca_ptr->lock);
228	if (hca_ptr->ib_hca_handle == IB_INVALID_HANDLE) {
229		/* register with the HW */
230		dat_status = dapls_ib_open_hca(hca_ptr,
231		    &hca_ptr->ib_hca_handle);
232
233		if (dat_status != DAT_SUCCESS) {
234			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
235			    "dapls_ib_open_hca failed %d\n", dat_status);
236			dapl_os_unlock(&hca_ptr->lock);
237			goto bail;
238		}
239
240		/* create a cq domain for this HCA */
241		dat_status = dapls_ib_cqd_create(hca_ptr);
242
243		if (dat_status != DAT_SUCCESS) {
244			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
245			    "ERR: Cannot allocate CQD: err %x\n", dat_status);
246			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
247			dapl_os_unlock(&hca_ptr->lock);
248			goto bail;
249		}
250		/*
251		 * Obtain the IP address associated with this name and HCA.
252		 */
253
254#ifdef IBHOSTS_NAMING
255		dapli_assign_hca_ip_address(hca_ptr, name);
256#endif /* IBHOSTS_NAMING */
257
258		/*
259		 * Obtain IA attributes from the HCA to limit certain
260		 * operations.
261		 * If using DAPL_ATS naming, ib_query_hca will also set the ip
262		 * address.
263		 */
264		dat_status = dapls_ib_query_hca(hca_ptr,
265		    &hca_ptr->ia_attr,
266		    NULL,
267		    &hca_ptr->hca_address, NULL);
268		if (dat_status != DAT_SUCCESS) {
269			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
270			dapl_os_unlock(&hca_ptr->lock);
271			goto bail;
272		}
273	}
274
275	/* is the IA going to use the ConnectX? */
276	if (hca_ptr->hermon_resize_cq != 0) {
277		/*
278		 * We are running with a ConnectX.
279		 * Determine whether platform is RO capable.
280		 * If platform support RO and client does not
281		 * support RO and we are not disabling RO, reject the open.
282		 */
283		ro_disallowed = dapl_ro_disallowed();
284
285		if (! ro_aware_client && ! ro_disallowed) {
286			dapl_dbg_log(DAPL_DBG_TYPE_API,
287			    "dapl_ia_open: failing ro_disallowed %d "
288			    "ro_aware_client %d \n",
289			    ro_disallowed, ro_aware_client);
290
291			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
292			    DAT_INVALID_RO_COOKIE);
293			dapli_hca_cleanup(hca_ptr, DAT_FALSE);
294			dapl_os_unlock(&hca_ptr->lock);
295			goto bail;
296		}
297	} else {
298		/* We are not running with a Connect X */
299		ro_disallowed = B_TRUE;
300	}
301
302
303	/* Take a reference on the hca_handle */
304	dapl_os_atomic_inc(&hca_ptr->handle_ref_count);
305	dapl_os_unlock(&hca_ptr->lock);
306
307	/* Allocate and initialize ia structure */
308	ia_ptr = dapl_ia_alloc(provider, hca_ptr);
309	if (!ia_ptr) {
310		dapl_os_lock(&hca_ptr->lock);
311		dapli_hca_cleanup(hca_ptr, DAT_TRUE);
312		dapl_os_unlock(&hca_ptr->lock);
313		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
314		    DAT_RESOURCE_MEMORY);
315		goto bail;
316	}
317
318	/*
319	 * Note when we should be disabling relaxed ordering.
320	 * If the property indicates that we should not use relaxed ordering
321	 * we remember that fact.  If the platform is supposed to be
322	 * non relaxed ordering capable, we disable relaxed ordering as
323	 * well, just in case the property or the list indicating that
324	 * this platform is not relaxed ordering capable is mistaken.
325	 */
326	if (ro_disallowed)
327		ia_ptr->dapl_flags |= DAPL_DISABLE_RO;
328
329	/*
330	 * we need an async EVD for this IA
331	 * use the one passed in (if non-NULL) or create one
332	 */
333
334	evd_ptr = (DAPL_EVD *) *async_evd_handle_ptr;
335	if (evd_ptr) {
336		if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD) ||
337		    ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) {
338			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
339			    DAT_INVALID_HANDLE_EVD_ASYNC);
340			goto bail;
341		}
342		/*
343		 * InfiniBand allows only 1 asychronous event handler per HCA
344		 * (see InfiniBand Spec, release 1.1, vol I, section 11.5.2,
345		 *  page 559).
346		 *
347		 * We only need to make sure that this EVD's CQ belongs to
348		 * the same HCA as is being opened.
349		 */
350
351		if (evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle !=
352		    hca_ptr->ib_hca_handle) {
353			dat_status = DAT_ERROR(DAT_INVALID_HANDLE,
354			    DAT_INVALID_HANDLE_EVD_ASYNC);
355			goto bail;
356		}
357
358		ia_ptr->cleanup_async_error_evd = DAT_FALSE;
359		ia_ptr->async_error_evd = evd_ptr;
360	} else {
361		/*
362		 * Verify we have >0 length, and let the provider check the
363		 * size
364		 */
365		if (async_evd_qlen <= 0) {
366			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
367			    DAT_INVALID_ARG2);
368			goto bail;
369		}
370		dat_status = dapls_evd_internal_create(ia_ptr,
371		    NULL,	/* CNO ptr */
372		    async_evd_qlen,
373		    DAT_EVD_ASYNC_FLAG,
374		    &evd_ptr);
375		if (dat_status != DAT_SUCCESS) {
376			goto bail;
377		}
378
379		dapl_os_atomic_inc(&evd_ptr->evd_ref_count);
380
381		dapl_os_lock(&hca_ptr->lock);
382		if (hca_ptr->async_evd != (DAPL_EVD *) 0) {
383#if 0
384			/*
385			 * The async EVD for this HCA has already been assigned.
386			 * It's an error to try and assign another one.
387			 *
388			 * However, we need to somehow allow multiple IAs
389			 * off of the same HCA.  The right way to do this
390			 * is by dispatching events off the HCA to the
391			 * appropriate IA, but we aren't there yet.  So for
392			 * now we create the EVD but don't connect it to
393			 * anything.
394			 */
395			dapl_os_atomic_dec(&evd_ptr->evd_ref_count);
396			dapl_evd_free(evd_ptr);
397			dat_status = DAT_ERROR(DAT_INVALID_PARAMETER,
398			    DAT_INVALID_ARG4);
399			goto bail;
400#endif
401			dapl_os_unlock(&hca_ptr->lock);
402		} else {
403			hca_ptr->async_evd = evd_ptr;
404			dapl_os_unlock(&hca_ptr->lock);
405
406			/*
407			 * Register the handlers associated with the async EVD.
408			 */
409			dat_status = dapls_ia_setup_callbacks(ia_ptr, evd_ptr);
410			if (dat_status != DAT_SUCCESS) {
411				/* Assign the EVD so it gets cleaned up */
412				ia_ptr->cleanup_async_error_evd = DAT_TRUE;
413				ia_ptr->async_error_evd = evd_ptr;
414				goto bail;
415			}
416		}
417
418		ia_ptr->cleanup_async_error_evd = DAT_TRUE;
419		ia_ptr->async_error_evd = evd_ptr;
420	}
421
422	dat_status = DAT_SUCCESS;
423	*ia_handle_ptr = ia_ptr;
424	*async_evd_handle_ptr = evd_ptr;
425
426bail:
427	if (dat_status != DAT_SUCCESS) {
428		if (ia_ptr) {
429			/* This will release the async EVD if needed.  */
430			(void) dapl_ia_close(ia_ptr, DAT_CLOSE_ABRUPT_FLAG);
431		}
432	}
433
434	dapl_dbg_log(DAPL_DBG_TYPE_RTN,
435	    "dapl_ia_open () returns 0x%x\n",
436	    dat_status);
437
438	return (dat_status);
439}
440
441/*
442 * dapli_hca_cleanup
443 *
444 * Clean up partially allocated HCA stuff. Strictly to make cleanup
445 * simple.
446 */
447void
448dapli_hca_cleanup(
449	DAPL_HCA	*hca_ptr,
450	DAT_BOOLEAN	dec_ref)
451{
452	(void) dapls_ib_close_hca(hca_ptr->ib_hca_handle);
453	hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
454	if (dec_ref == DAT_TRUE) {
455		dapl_os_atomic_dec(&hca_ptr->handle_ref_count);
456	}
457}
458
459#if defined(IBHOSTS_NAMING)
460
461char *dapli_get_adapter_num(
462	char 			*device_name);
463
464void dapli_setup_dummy_addr(
465	IN  DAPL_HCA		*hca_ptr,
466	IN  char		*hca_name);
467/*
468 * dapli_assign_hca_ip_address
469 *
470 * Obtain the IP address of the passed in name, which represents a
471 * port on the hca. There are three methods here to obtain the
472 * appropriate IP address, each with their own shortcoming:
473 * 1) IPOIB_NAMING. Requires the implementation of the IPoIB
474 *    interface defined in include/dapl/ipoib_names.h. This is
475 *    not the recommended interface as IPoIB is limited at
476 *    the point we need to obtain an IP address on the
477 *    passive side of a connection. The code supporting this
478 *    implementation has been removed.
479 *
480 * 2) IBHOSTS. An entry exists in DNS and in the /etc/dapl/ibhosts
481 *    file. The immediate drawback here is that we must dictate
482 *    how to name the interface, which is a stated DAPL non-goal.
483 *    In the broader perspective, this method requires us to xmit
484 *    the IP address in the private data of a connection, which has
485 *    other fun problems. This is the default method and is known to
486 *    work, but it has problems.
487 *
488 * 3) Obtain the IP address from the driver, which has registered
489 *    the address with the SA for retrieval.
490 *
491 *
492 * Input:
493 *	hca_ptr			Pointer to HCA structure
494 *	device_name		Name of device as reported by the provider
495 *
496 * Output:
497 * 	none
498 *
499 * Returns:
500 * 	char * to string number
501 */
502void
503dapli_assign_hca_ip_address(
504	DAPL_HCA		*hca_ptr,
505	char 			*device_name)
506{
507	char		*adapter_num;
508#define	NAMELEN	128
509	struct addrinfo	*addr;
510	char 		hostname[NAMELEN];
511	char		*str;
512	int		rc;
513
514	/*
515	 * Obtain the IP address of the adapter. This is a simple
516	 * scheme that creates a name that must appear available to
517	 * DNS, e.g. it must be in the local site DNS or in the local
518	 * /etc/hosts file, etc.
519	 *
520	 *	<hostname>-ib<index>
521	 *
522	 * This scheme obviously doesn't work with adapters from
523	 * multiple vendors, but will suffice in common installations.
524	 */
525
526	rc = gethostname(hostname, NAMELEN);
527	/*
528	 * Strip off domain info if it exists (e.g. mynode.mydomain.com)
529	 */
530	for (str = hostname; *str && *str != '.'; ) {
531		str++;
532	}
533	if (*str == '.') {
534		*str = '\0';
535	}
536	dapl_os_strcat(hostname, "-ib");
537	adapter_num = dapli_get_adapter_num(device_name);
538	dapl_os_strcat(hostname, adapter_num);
539
540	rc = dapls_osd_getaddrinfo(hostname, &addr);
541
542	if (rc != 0) {
543		/* Not registered in DNS, provide a dummy value */
544		dapli_setup_dummy_addr(hca_ptr, hostname);
545	} else {
546		/*
547		 * hca_address is defined as a DAT_SOCK_ADDR6 whereas ai_addr
548		 * is a sockaddr
549		 */
550		(void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
551		    (void *)(addr->ai_addr), sizeof (DAT_SOCK_ADDR6));
552	}
553}
554
555
556/*
557 * dapli_stup_dummy_addr
558 *
559 * Set up a dummy local address for the HCA. Things are not going
560 * to work too well if this happens.
561 * We call this routine if:
562 *  - remote host adapter name is not in DNS
563 *  - IPoIB implementation is not correctly set up
564 *  - Similar nonsense.
565 *
566 * Input:
567 *      hca_ptr
568 *	rhost_name		Name of remote adapter
569 *
570 * Output:
571 * 	none
572 *
573 * Returns:
574 * 	none
575 */
576void
577dapli_setup_dummy_addr(
578	IN  DAPL_HCA		*hca_ptr,
579	IN  char		*rhost_name)
580{
581	struct sockaddr_in	*si;
582
583	/* Not registered in DNS, provide a dummy value */
584	dapl_dbg_log(DAPL_DBG_TYPE_ERR, "WARNING: <%s> not registered in DNS,"
585	    " using dummy IP value\n", rhost_name);
586	si = (struct sockaddr_in *)&hca_ptr->hca_address;
587	si->sin_family = AF_INET;
588	si->sin_addr.s_addr = 0x01020304;
589}
590
591
592/*
593 * dapls_get_adapter_num
594 *
595 * Given a device name, return a string of the device number
596 *
597 * Input:
598 *	device_name		Name of device as reported by the provider
599 *
600 * Output:
601 * 	none
602 *
603 * Returns:
604 * 	char * to string number
605 */
606char *
607dapli_get_adapter_num(
608	char 		*device_name)
609{
610	static char	*zero = "0";
611	char		*p;
612
613	/*
614	 * Optimisticaly simple algorithm: the device number appears at
615	 * the end of the device name string. Device that do not end
616	 * in a number are by default "0".
617	 */
618
619	for (p = device_name; *p; p++) {
620		if (isdigit(*p)) {
621			return (p);
622		}
623	}
624
625	return (zero);
626}
627#endif /* IBHOSTS_NAMING */
628
629
630/*
631 * Local variables:
632 *  c-indent-level: 4
633 *  c-basic-offset: 4
634 *  tab-width: 8
635 * End:
636 */
637