dapl_name_service.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26/*
27 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31/*
32 *
33 * MODULE: dapl_name_service.c
34 *
35 * PURPOSE: Provide simple, file base name services in the absence
36 *	    of DNS hooks for a particular transport type. If an
37 *	    InfiniBand implementation supports IPoIB, this should
38 *	    not be used.
39 *
40 * Description: Interfaces in this file are completely described in
41 *		dapl_name_service.h
42 */
43
44/*
45 * Include files for setting up a network name
46 */
47#include "dapl.h"
48#include "dapl_name_service.h"
49
50#include <netinet/in.h>
51#include <sys/sockio.h>
52#include <net/if.h>
53#include <net/if_dl.h>
54#include <net/if_arp.h>
55#include <net/if_types.h>
56#include <arpa/inet.h>
57#include <poll.h>
58#include <ibd/ibd.h>
59
60#ifdef IBHOSTS_NAMING
61#define	MAP_FILE		"/etc/dapl/ibhosts"
62#define	MAX_GID_ENTRIES		32
63DAPL_GID_MAP			g_gid_map_table[MAX_GID_ENTRIES];
64
65DAT_RETURN dapli_ns_create_gid_map(void);
66DAT_RETURN dapli_ns_add_address(IN DAPL_GID_MAP	*gme);
67#endif /* IBHOSTS_NAMING */
68
69/*
70 * dapls_ns_init
71 *
72 * Initialize naming services
73 *
74 * Input:
75 *	none
76 *
77 * Output:
78 * 	none
79 *
80 * Returns:
81 * 	DAT_SUCCESS
82 *	DAT_INVALID_PARAMETER
83 */
84DAT_RETURN
85dapls_ns_init(void)
86{
87	DAT_RETURN	dat_status;
88
89	dat_status = DAT_SUCCESS;
90#ifdef IBHOSTS_NAMING
91	dat_status = dapli_ns_create_gid_map();
92#endif /* IBHOSTS_NAMING */
93
94	return (dat_status);
95}
96
97#ifdef IBHOSTS_NAMING
98/*
99 * dapls_create_gid_map()
100 *
101 * Read /usr/local/etc/ibhosts to obtain host names and GIDs.
102 * Create a table containing IP addresses and GIDs which can
103 * be used for lookups.
104 *
105 * This implementation is a simple method providing name services
106 * when more advanced mechanisms do not exist. The proper way
107 * to obtain these mappings is to use a name service such as is
108 * provided by IPoIB on InfiniBand.
109 *
110 * Input:
111 *	device_name		Name of device as reported by the provider
112 *
113 * Output:
114 * 	none
115 *
116 * Returns:
117 * 	char * to string number
118 */
119DAT_RETURN
120dapli_ns_create_gid_map(void)
121{
122	FILE			*f;
123	ib_gid_t		gid;
124	char			hostname[128];
125	int			rc;
126	struct addrinfo		*addr;
127	struct sockaddr_in	*si;
128	DAPL_GID_MAP		gmt;
129
130	f = fopen(MAP_FILE, "r");
131	if (f == NULL) {
132		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ERROR: Must have file <%s> "
133		    "for IP/GID mappings\n", MAP_FILE);
134		return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
135	}
136
137	rc = fscanf(f, "%s " F64x " " F64x, hostname,
138	    &gid.gid_prefix, &gid.gid_guid);
139	while (rc != EOF) {
140		rc = dapls_osd_getaddrinfo(hostname, &addr);
141
142		if (rc != 0) {
143			/*
144			 * hostname not registered in DNS,
145			 * provide a dummy value
146			 */
147			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
148			    "WARNING: <%s> not registered in "
149			    "DNS, using dummy IP value\n", hostname);
150			gmt.ip_address = 0x01020304;
151		} else {
152			/*
153			 * Load into the ip/gid mapping table
154			 */
155			si = (struct sockaddr_in *)addr->ai_addr;
156			if (AF_INET == addr->ai_addr->sa_family) {
157				gmt.ip_address = si->sin_addr.s_addr;
158			} else {
159				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
160				    "WARNING: <%s> Address family "
161				    "not supported, using dummy "
162				    "IP value\n", hostname);
163				gmt.ip_address = 0x01020304;
164			}
165			dapls_osd_freeaddrinfo(addr);
166		}
167		gmt.gid.gid_prefix = gid.gid_prefix;
168		gmt.gid.gid_guid = gid.gid_guid;
169
170		dapli_ns_add_address(&gmt);
171		rc = fscanf(f, "%s " F64x " " F64x, hostname,
172		    &gid.gid_prefix, &gid.gid_guid);
173	}
174	(void) fclose(f);
175	return (DAT_SUCCESS);
176}
177
178/*
179 * dapli_ns_add_address
180 *
181 * Add a table entry to the  gid_map_table.
182 *
183 * Input:
184 *	remote_ia_address	remote IP address
185 *	gid			pointer to output gid
186 *
187 * Output:
188 * 	gid			filled in GID
189 *
190 * Returns:
191 * 	DAT_SUCCESS
192 *	DAT_INSUFFICIENT_RESOURCES
193 *	DAT_INVALID_PARAMETER
194 */
195DAT_RETURN
196dapli_ns_add_address(
197	IN DAPL_GID_MAP	*gme)
198{
199	DAPL_GID_MAP	*gmt;
200	int		count;
201
202	gmt = g_gid_map_table;
203	for (count = 0, gmt = g_gid_map_table; gmt->ip_address; gmt++) {
204		count++;
205	}
206	if (count > MAX_GID_ENTRIES) {
207		return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
208	}
209
210	*gmt = *gme;
211	return (DAT_SUCCESS);
212}
213
214/*
215 * dapls_ns_lookup_address
216 *
217 * Look up the provided IA_ADDRESS in the gid_map_table. Return
218 * the gid if found.
219 *
220 * Input:
221 *	remote_ia_address	remote IP address
222 *	gid			pointer to output gid
223 *	timeout			timeout in microseconds
224 *
225 * Output:
226 * 	gid			filled in GID
227 *
228 * Returns:
229 * 	DAT_SUCCESS
230 *	DAT_INSUFFICIENT_RESOURCES
231 *	DAT_INVALID_PARAMETER
232 */
233DAT_RETURN
234dapls_ns_lookup_address(
235	IN  DAPL_IA			*ia_ptr,
236	IN  DAT_IA_ADDRESS_PTR		remote_ia_address,
237	IN  DAT_TIMEOUT			timeout,
238	OUT ib_gid_t			*gid)
239{
240	DAPL_GID_MAP		*gmt;
241	struct sockaddr_in	*si;
242
243	/* unused here */
244	ia_ptr = ia_ptr;
245	si = (struct sockaddr_in *)remote_ia_address;
246
247	for (gmt = g_gid_map_table; gmt->ip_address; gmt++) {
248		if (gmt->ip_address == si->sin_addr.s_addr) {
249			gid->gid_guid = gmt->gid.gid_guid;
250			gid->gid_prefix = gmt->gid.gid_prefix;
251			return (DAT_SUCCESS);
252		}
253	}
254	return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
255}
256#endif /* IBHOSTS_NAMING */
257
258/*
259 * utility function for printing a socket
260 */
261char *
262dapls_inet_ntop(struct sockaddr *addr, char *buf, size_t len)
263{
264	void	*addr_ptr;
265
266	if (addr->sa_family == AF_INET) {
267		/* LINTED: E_BAD_PTR_CAST_ALIGN */
268		addr_ptr = (void *)&((struct sockaddr_in *)addr)->sin_addr;
269	} else if (addr->sa_family == AF_INET6) {
270		/* LINTED: E_BAD_PTR_CAST_ALIGN */
271		addr_ptr = (void *)&((struct sockaddr_in6 *)addr)->sin6_addr;
272	} else {
273		if (len > strlen("bad address")) {
274			(void) sprintf(buf, "bad address");
275		}
276		return (buf);
277	}
278	return ((char *)inet_ntop(addr->sa_family, addr_ptr, buf, len));
279}
280
281/*
282 * dapls_ns_lookup_address
283 *
284 * translates an IP address into a GID
285 *
286 * Input:
287 * 	ia_ptr			pointer to IA object
288 *	remote_ia_address	remote IP address
289 *	gid			pointer to output gid
290 *	timeout			timeout in microseconds
291 *
292 * Output:
293 * 	gid			filled in GID
294 *
295 * Returns:
296 * 	DAT_SUCCESS
297 *	DAT_INVALID_ADDRRESS
298 *	DAT_INVALID_PARAMETER
299 *	DAT_INTERNAL_ERROR
300 */
301
302#define	IBD_NAME	"ibd"
303#define	NS_MAX_RETRIES	60
304
305DAT_RETURN
306dapls_ns_lookup_v4(
307	IN  DAPL_IA			*ia_ptr,
308	IN  struct sockaddr_in		*addr,
309	IN  DAT_TIMEOUT			timeout,
310	OUT ib_gid_t			*gid);
311DAT_RETURN
312dapls_ns_lookup_v6(
313	IN  DAPL_IA			*ia_ptr,
314	IN  struct sockaddr_in6		*addr,
315	IN  DAT_TIMEOUT			timeout,
316	OUT ib_gid_t			*gid);
317
318static int dapls_ns_subnet_match_v4(int s, DAPL_IA *ia_ptr,
319    struct sockaddr_in *addr);
320static int dapls_ns_subnet_match_v6(int s, DAPL_IA *ia_ptr,
321    struct sockaddr_in6 *addr);
322
323static int dapls_ns_send_packet_v6(int s, struct sockaddr_in6 *addr);
324static int dapls_ns_resolve_addr(int af, struct sockaddr *addr,
325    DAT_TIMEOUT timeout);
326
327DAT_RETURN
328dapls_ns_lookup_address(
329	IN  DAPL_IA			*ia_ptr,
330	IN  DAT_IA_ADDRESS_PTR		remote_ia_address,
331	IN  DAT_TIMEOUT			timeout,
332	OUT ib_gid_t			*gid)
333{
334	DAT_RETURN		dat_status;
335	struct sockaddr		*sock = (struct sockaddr *)remote_ia_address;
336
337	if (sock->sa_family == AF_INET) {
338		dat_status = dapls_ns_lookup_v4(ia_ptr,
339		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
340		    (struct sockaddr_in *)sock, timeout, gid);
341	} else if (sock->sa_family == AF_INET6) {
342		dat_status = dapls_ns_lookup_v6(ia_ptr,
343		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
344		    (struct sockaddr_in6 *)sock, timeout, gid);
345	} else {
346		dat_status = DAT_INVALID_PARAMETER;
347	}
348	return (dat_status);
349}
350
351DAT_RETURN
352dapls_ns_lookup_v4(
353	IN  DAPL_IA			*ia_ptr,
354	IN  struct sockaddr_in		*addr,
355	IN  DAT_TIMEOUT			timeout,
356	OUT ib_gid_t			*gid)
357{
358	struct xarpreq		ar;
359	struct sockaddr_in	*sin;
360	uchar_t			*mac;
361	int			s, retries = 0;
362
363	(void) dapl_os_memzero(&ar, sizeof (ar));
364	sin = (struct sockaddr_in *)&ar.xarp_pa;
365	sin->sin_family = AF_INET;
366	sin->sin_addr.s_addr = addr->sin_addr.s_addr;
367	ar.xarp_ha.sdl_family = AF_LINK;
368
369	s = socket(AF_INET, SOCK_DGRAM, 0);
370	if (s < 0) {
371		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
372		    "ns_lookup_v4: socket: %s\n", strerror(errno));
373		return (DAT_INTERNAL_ERROR);
374	}
375	if (dapls_ns_subnet_match_v4(s, ia_ptr, addr) != 0) {
376		(void) close(s);
377		return (DAT_INVALID_ADDRESS);
378	}
379again:;
380	if (ioctl(s, SIOCGXARP, (caddr_t)&ar) < 0) {
381		/*
382		 * if SIOCGXARP failed, we force the ARP
383		 * cache to be filled by connecting to the
384		 * destination IP address.
385		 */
386		if (retries <= NS_MAX_RETRIES &&
387		    dapls_ns_resolve_addr(AF_INET, (struct sockaddr *)addr,
388		    timeout) == 0) {
389			retries++;
390			goto again;
391		}
392		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ns_lookup_v4: giving up\n");
393		(void) close(s);
394		return (DAT_ERROR(DAT_INVALID_ADDRESS,
395		    DAT_INVALID_ADDRESS_UNREACHABLE));
396	}
397	if ((ar.xarp_flags & ATF_COM) == 0 &&
398	    ar.xarp_ha.sdl_type == IFT_IB && retries <= NS_MAX_RETRIES) {
399		/*
400		 * we get here if arp resolution is still incomplete
401		 */
402		retries++;
403		(void) sleep(1);
404		goto again;
405	}
406	(void) close(s);
407
408	mac = (uchar_t *)LLADDR(&ar.xarp_ha);
409	if (ar.xarp_flags & ATF_COM &&
410	    ar.xarp_ha.sdl_type == IFT_IB &&
411	    ar.xarp_ha.sdl_alen >= sizeof (ipoib_mac_t)) {
412		ib_gid_t tmp_gid;
413
414		/* LINTED: E_BAD_PTR_CAST_ALIGN */
415		(void) dapl_os_memcpy(&tmp_gid,
416		    &((ipoib_mac_t *)mac)->ipoib_gidpref, sizeof (ib_gid_t));
417		/*
418		 * gids from the ARP table are in network order, convert
419		 * the gids from network order to host byte order
420		 */
421		gid->gid_prefix = BETOH_64(tmp_gid.gid_prefix);
422		gid->gid_guid = BETOH_64(tmp_gid.gid_guid);
423	} else {
424		int i, len;
425
426		len = ar.xarp_ha.sdl_alen;
427		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
428		    "ns_lookup_v4: failed, non IB address: "
429		    "len = %d, addr = 0x", len);
430		if (len > 0) {
431			for (i = 0; i < len; i++) {
432				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
433				    "%02x", (int)mac[i] & 0xff);
434			}
435		} else {
436			dapl_dbg_log(DAPL_DBG_TYPE_ERR, "0");
437		}
438		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "\n");
439		return (DAT_INVALID_ADDRESS);
440	}
441	return (DAT_SUCCESS);
442}
443
444DAT_RETURN
445dapls_ns_lookup_v6(
446	IN  DAPL_IA			*ia_ptr,
447	IN  struct sockaddr_in6		*addr,
448	IN  DAT_TIMEOUT			timeout,
449	OUT ib_gid_t			*gid)
450{
451	struct lifreq		lifr;
452	uchar_t			*mac;
453	int			s, retries = 0;
454
455	s = socket(AF_INET6, SOCK_DGRAM, 0);
456	if (s < 0) {
457		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
458		    "ns_lookup_v6: socket: %s\n", strerror(errno));
459		return (DAT_INTERNAL_ERROR);
460	}
461	if (dapls_ns_subnet_match_v6(s, ia_ptr, addr) != 0) {
462		(void) close(s);
463		return (DAT_INVALID_ADDRESS);
464	}
465	(void) dapl_os_memzero(&lifr, sizeof (lifr));
466	(void) dapl_os_memcpy(&lifr.lifr_nd.lnr_addr, addr, sizeof (*addr));
467	(void) dapl_os_strcpy(lifr.lifr_name, IBD_NAME);
468	(void) sprintf(&lifr.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
469	    ia_ptr->hca_ptr->hca_ibd_inst);
470
471again:;
472	if (ioctl(s, SIOCLIFGETND, (caddr_t)&lifr) < 0)  {
473		/*
474		 * if SIOCLIFGETND failed, we force the ND
475		 * cache to be filled by connecting to the
476		 * destination IP address.
477		 */
478		if (retries < NS_MAX_RETRIES &&
479		    dapls_ns_send_packet_v6(s, addr) == 0 &&
480		    dapls_ns_resolve_addr(AF_INET6, (struct sockaddr *)addr,
481		    timeout) == 0) {
482			retries++;
483			goto again;
484		}
485		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ns_lookup_v6: giving up\n");
486		(void) close(s);
487		return (DAT_ERROR(DAT_INVALID_ADDRESS,
488		    DAT_INVALID_ADDRESS_UNREACHABLE));
489	}
490	if (lifr.lifr_nd.lnr_hdw_len == 0 && retries <= NS_MAX_RETRIES) {
491		/*
492		 * lnr_hdw_len == 0 means that the ND entry
493		 * is still incomplete. we need to retry the ioctl.
494		 */
495		retries++;
496		(void) sleep(1);
497		goto again;
498	}
499	(void) close(s);
500
501	mac = (uchar_t *)lifr.lifr_nd.lnr_hdw_addr;
502	if (lifr.lifr_nd.lnr_hdw_len >= sizeof (ipoib_mac_t)) {
503		ib_gid_t tmp_gid;
504		/* LINTED: E_BAD_PTR_CAST_ALIGN */
505		(void) dapl_os_memcpy(&tmp_gid,
506		    &((ipoib_mac_t *)mac)->ipoib_gidpref, sizeof (ib_gid_t));
507		/*
508		 * gids from the ND table are in network order, convert
509		 * the gids from network order to host byte order
510		 */
511		gid->gid_prefix = BETOH_64(tmp_gid.gid_prefix);
512		gid->gid_guid = BETOH_64(tmp_gid.gid_guid);
513	} else {
514		int i, len;
515
516		len = lifr.lifr_nd.lnr_hdw_len;
517		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
518		    "ns_lookup_v6: failed, non IB address: "
519		    "len = %d, addr = 0x", len);
520		if (len > 0) {
521			for (i = 0; i < len; i++) {
522				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
523				    "%02x", (int)mac[i] & 0xff);
524			}
525		} else {
526			dapl_dbg_log(DAPL_DBG_TYPE_ERR, "0");
527		}
528		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "\n");
529		return (DAT_INVALID_ADDRESS);
530	}
531	return (DAT_SUCCESS);
532}
533
534static int
535dapls_ns_send_packet_v6(int s, struct sockaddr_in6 *addr)
536{
537	if (sendto(s, NULL, 0, MSG_DONTROUTE, (struct sockaddr *)addr,
538	    sizeof (*addr)) < 0) {
539		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
540		    "ns_send_packet_v6: failed: %s\n", strerror(errno));
541		return (-1);
542	}
543	return (0);
544}
545
546static int
547dapls_ns_subnet_match_v4(int s, DAPL_IA *ia_ptr, struct sockaddr_in *addr)
548{
549	struct lifreq		lifreq;
550	int			retval;
551	uint32_t		netmask, netaddr, netaddr_dest;
552
553	(void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
554	(void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
555	    ia_ptr->hca_ptr->hca_ibd_inst);
556
557	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
558	if (retval < 0) {
559		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
560		    "ns_subnet_match_v4: cannot get netmask: %s\n",
561		    strerror(errno));
562		return (-1);
563	}
564	netmask = ((struct sockaddr_in *)&lifreq.lifr_addr)->
565	    sin_addr.s_addr;
566
567	/*
568	 * we need to get the interface address here because the
569	 * address in ia_ptr->hca_ptr->hca_address might not
570	 * necessarily be an IPv4 address.
571	 */
572	retval = ioctl(s, SIOCGLIFADDR, (caddr_t)&lifreq);
573	if (retval < 0) {
574		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
575		    "ns_subnet_match_v4: cannot get local addr: %s\n",
576		    strerror(errno));
577		return (-1);
578	}
579	netaddr = ((struct sockaddr_in *)&lifreq.lifr_addr)->
580	    sin_addr.s_addr & netmask;
581	netaddr_dest = addr->sin_addr.s_addr & netmask;
582
583	if (netaddr != netaddr_dest) {
584		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
585		    "ns_subnet_match_v4: netaddrs don't match: "
586		    "local %x, remote %x\n", netaddr, netaddr_dest);
587		return (-1);
588	}
589	return (0);
590}
591
592static int
593dapls_ns_subnet_match_v6(int s, DAPL_IA *ia_ptr, struct sockaddr_in6 *addr)
594{
595	struct lifreq		lifreq;
596	struct sockaddr_in6	netmask_sock;
597	uchar_t			*netmask, *local_addr, *dest_addr;
598	int			i, retval;
599
600	(void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
601	(void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
602	    ia_ptr->hca_ptr->hca_ibd_inst);
603
604	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
605	if (retval < 0) {
606		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
607		    "ns_subnet_match_v6: cannot get netmask: %s\n",
608		    strerror(errno));
609		return (-1);
610	}
611	(void) dapl_os_memcpy(&netmask_sock, &lifreq.lifr_addr,
612	    sizeof (netmask_sock));
613
614	/*
615	 * we need to get the interface address here because the
616	 * address in ia_ptr->hca_ptr->hca_address might not
617	 * necessarily be an IPv6 address.
618	 */
619	retval = ioctl(s, SIOCGLIFADDR, (caddr_t)&lifreq);
620	if (retval < 0) {
621		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
622		    "ns_subnet_match_v6: cannot get local addr: %s\n",
623		    strerror(errno));
624		return (-1);
625	}
626	netmask = (uchar_t *)&netmask_sock.sin6_addr;
627	local_addr = (uchar_t *)&((struct sockaddr_in6 *)&lifreq.lifr_addr)->
628	    sin6_addr;
629	dest_addr = (uchar_t *)&addr->sin6_addr;
630
631	for (i = 0; i < sizeof (addr->sin6_addr); i++) {
632		if (((local_addr[i] ^ dest_addr[i]) & netmask[i]) != 0) {
633			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
634			    "ns_subnet_match_v6: subnets do not match\n");
635			return (-1);
636		}
637	}
638	return (0);
639}
640
641static int
642dapls_ns_resolve_addr(int af, struct sockaddr *addr, DAT_TIMEOUT timeout)
643{
644	struct sockaddr_storage	sock;
645	struct sockaddr_in	*v4dest;
646	struct sockaddr_in6	*v6dest;
647	struct pollfd		pollfd;
648	int			fd, retval;
649	int			tmo;
650	int			ip_version;
651
652	if (af == AF_INET) {
653		ip_version = 4;
654	} else if (af == AF_INET6) {
655		ip_version = 6;
656	} else {
657		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
658		    "ns_resolve_addr: invalid af %d\n", af);
659		return (-1);
660	}
661	fd = socket(af, SOCK_STREAM, 0);
662	if (fd < 0) {
663		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
664		    "ns_resolve_addr: ipv%d, cannot create socket %s\n",
665		    ip_version, strerror(errno));
666		return (-1);
667	}
668
669	/*
670	 * set socket to non-blocking mode
671	 */
672	retval = fcntl(fd, F_SETFL, O_NONBLOCK);
673	if (retval < 0) {
674		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
675		    "ns_resolve_addr: ipv%d, fcntl failed: %s\n",
676		    ip_version, strerror(errno));
677		(void) close(fd);
678		return (-1);
679	}
680
681	/*
682	 * connect to the discard port (9) at the dest IP
683	 */
684	(void) dapl_os_memzero(&sock, sizeof (sock));
685	if (af == AF_INET) {
686		v4dest = (struct sockaddr_in *)&sock;
687		v4dest->sin_family = AF_INET;
688		v4dest->sin_addr.s_addr =
689		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
690		    ((struct sockaddr_in *)addr)->sin_addr.s_addr;
691		v4dest->sin_port = htons(9);
692
693		retval = connect(fd, (struct sockaddr *)v4dest,
694		    sizeof (struct sockaddr_in));
695	} else {
696		v6dest = (struct sockaddr_in6 *)&sock;
697		v6dest->sin6_family = AF_INET6;
698		/* LINTED: E_BAD_PTR_CAST_ALIGN */
699		(void) dapl_os_memcpy(&v6dest->sin6_addr,
700		    &((struct sockaddr_in6 *)addr)->sin6_addr,
701		    sizeof (struct sockaddr_in6));
702		v6dest->sin6_port = htons(9);
703
704		retval = connect(fd, (struct sockaddr *)v6dest,
705		    sizeof (struct sockaddr_in6));
706	}
707
708	/*
709	 * we can return immediately if connect succeeds
710	 */
711	if (retval == 0) {
712		(void) close(fd);
713		return (0);
714	}
715	/*
716	 * receiving a RST means that the arp/nd entry should
717	 * already be resolved
718	 */
719	if (retval < 0 && errno == ECONNREFUSED) {
720		errno = 0;
721		(void) close(fd);
722		return (0);
723	}
724
725	/*
726	 * for all other cases, we poll on the fd
727	 */
728	pollfd.fd = fd;
729	pollfd.events = POLLIN | POLLOUT;
730	pollfd.revents = 0;
731
732	if (timeout == DAT_TIMEOUT_INFINITE ||
733	    timeout == 0) {
734		/*
735		 * -1 means infinite
736		 */
737		tmo = -1;
738	} else {
739		/*
740		 * convert timeout from usecs to msecs
741		 */
742		tmo = timeout/1000;
743	}
744	retval = poll(&pollfd, 1, tmo);
745	if (retval > 0) {
746		int	so_error = 0, len = sizeof (so_error);
747
748		retval = getsockopt(fd, SOL_SOCKET, SO_ERROR,
749		    &so_error, &len);
750		if (retval == 0) {
751			/*
752			 * we only return 0 if so_error == 0 or
753			 * so_error == ECONNREFUSED. for all other
754			 * cases retval is non-zero.
755			 */
756			if (so_error != 0 && so_error != ECONNREFUSED) {
757				retval = -1;
758				errno = so_error;
759				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
760				    "ns_resolve_addr: ipv%d, so_error: %s\n",
761				    ip_version, strerror(errno));
762			}
763		} else {
764			/*
765			 * if retval != 0, it must be -1. and errno must
766			 * have been set by getsockopt.
767			 */
768			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
769			    "ns_resolve_addr: ipv%d, getsockopt: %s\n",
770			    ip_version, strerror(errno));
771		}
772	} else {
773		if (retval == 0) {
774			errno = ETIMEDOUT;
775		}
776		retval = -1;
777		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
778		    "ns_resolve_addr: ipv%d, poll: %s\n",
779		    ip_version, strerror(errno));
780	}
781	(void) close(fd);
782	return (retval);
783}
784