1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Module for all network transactions. SLP messages can be multicast,
31 * unicast over UDP, or unicast over TCP; this module provides routines
32 * for all three. TCP transactions are handled by a single dedicated
33 * thread, while multicast and UDP unicast messages are sent by the
34 * calling thread.
35 *
36 * slp_uc_tcp_send:	enqueues a message on the TCP transaction thread's
37 *				queue.
38 * slp_tcp_wait:	blocks until all TCP-enqueued transactions for
39 *				a given SLP handle are complete
40 * slp_uc_udp_send:	unicasts a message using a datagram
41 * slp_mc_send:		multicasts a message
42 */
43
44/*
45 * todo: correct multicast interfaces;
46 */
47
48#include <stdio.h>
49#include <stdlib.h>
50#include <syslog.h>
51#include <sys/types.h>
52#include <sys/socket.h>
53#include <arpa/inet.h>
54#include <errno.h>
55#include <unistd.h>
56#include <time.h>
57#include <string.h>
58#include <slp-internal.h>
59#include <slp_net_utils.h>
60
61/*
62 * TCP thread particulars
63 */
64static SLPBoolean tcp_thr_running = SLP_FALSE;
65static slp_queue_t *tcp_q;
66static int tcp_sockfd;
67static mutex_t start_lock = DEFAULTMUTEX;
68
69/* Used to pass arguments to the TCP thread, via 'tcp_q' */
70struct tcp_rqst {
71	slp_handle_impl_t *hp;
72	slp_target_t *target;
73	const char *scopes;
74	SLPBoolean free_target;
75	unsigned short xid;
76};
77
78/* Used to keep track of broadcast interfaces */
79struct bc_ifs {
80	struct sockaddr_in *sin;
81	int num_ifs;
82};
83
84/*
85 * Private utility routines
86 */
87static SLPError start_tcp_thr();
88static void tcp_thread();
89static SLPError make_header(slp_handle_impl_t *, char *, const char *);
90static void udp_make_msghdr(struct sockaddr_in *, struct iovec *, int,
91			    struct msghdr *);
92static SLPError make_mc_target(slp_handle_impl_t *,
93				struct sockaddr_in *, char *,
94				struct pollfd **, nfds_t *, struct bc_ifs *);
95static SLPError make_bc_target(slp_handle_impl_t *, struct in_addr *,
96				int, struct bc_ifs *);
97static SLPError mc_sendmsg(struct pollfd *, struct msghdr *,
98				struct bc_ifs *);
99static SLPError bc_sendmsg(struct pollfd *, struct msghdr *, struct bc_ifs *);
100static void mc_recvmsg(struct pollfd *, nfds_t, slp_handle_impl_t *,
101			const char *, char *, void **, unsigned long long,
102			unsigned long long, unsigned long long *,
103			int *, int *, int);
104static void free_pfds(struct pollfd *, nfds_t);
105static void tcp_handoff(slp_handle_impl_t *, const char *,
106			struct sockaddr_in *, unsigned short);
107static unsigned long long now_millis();
108static int wait_for_response(unsigned long long, int *,
109				unsigned long long, unsigned long long *,
110				struct pollfd [], nfds_t);
111static int add2pr_list(slp_msg_t *, struct sockaddr_in *, void **);
112static void free_pr_node(void *, VISIT, int, void *);
113
114/*
115 * Unicasts a message using TCP. 'target' is a targets list
116 * containing DAs corresponding to 'scopes'. 'free_target' directs
117 * tcp_thread to free the target list when finished; this is useful
118 * when a target needs to be synthesised by another message thread
119 * (such as slp_mc_send for tcp_handoffs). If this message is a
120 * retransmission due to a large reply, 'xid' should be the same as for
121 * the original message.
122 *
123 * This call returns as soon as the message has been enqueued on 'tcp_q'.
124 * Callers interested in knowing when the transaction has completed
125 * should call slp_tcp_wait with the same SLP handle.
126 */
127void slp_uc_tcp_send(slp_handle_impl_t *hp, slp_target_t *target,
128			const char *scopes, SLPBoolean free_target,
129			unsigned short xid) {
130	struct tcp_rqst *rqst;
131
132	/* initialize TCP vars in handle, if necessary */
133	if (!hp->tcp_lock) {
134		if (!(hp->tcp_lock = malloc(sizeof (*(hp->tcp_lock))))) {
135			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
136				"out of memory");
137			return;
138		}
139		(void) mutex_init(hp->tcp_lock, NULL, NULL);
140	}
141	if (!hp->tcp_wait) {
142		if (!(hp->tcp_wait = malloc(sizeof (*(hp->tcp_wait))))) {
143			slp_err(LOG_CRIT, 0, "slp_uc_tcp_send",
144				"out of memory");
145			return;
146		}
147		(void) cond_init(hp->tcp_wait, NULL, NULL);
148	}
149	(void) mutex_lock(hp->tcp_lock);
150	(hp->tcp_ref_cnt)++;
151	(void) mutex_unlock(hp->tcp_lock);
152
153	/* start TCP thread, if not already running */
154	if (!tcp_thr_running)
155		if (start_tcp_thr() != SLP_OK)
156			return;
157
158	/* create and enqueue the request */
159	if (!(rqst = malloc(sizeof (*rqst)))) {
160		slp_err(LOG_CRIT, 0, "slp_uc_tcp_send", "out of memory");
161		return;
162	}
163	rqst->hp = hp;
164	rqst->target = target;
165	rqst->scopes = scopes;
166	rqst->free_target = free_target;
167	rqst->xid = xid;
168	(void) slp_enqueue(tcp_q, rqst);
169}
170
171/*
172 * Wait for TCP to complete, if a transaction corresponding to this
173 * SLP handle is pending. If none are pending, returns immediately.
174 */
175void slp_tcp_wait(slp_handle_impl_t *hp) {
176	(void) mutex_lock(hp->tcp_lock);
177	while (hp->tcp_ref_cnt > 0)
178		(void) cond_wait(hp->tcp_wait, hp->tcp_lock);
179	(void) mutex_unlock(hp->tcp_lock);
180}
181
182/*
183 * Unicasts a message using datagrams. 'target' should contain a
184 * list of DAs corresponding to 'scopes'.
185 *
186 * This call does not return until the transaction has completed. It
187 * may handoff a message to the TCP thread if necessary, but will not
188 * wait for that transaction to complete. Hence callers should always
189 * invoke slp_tcp_wait before cleaning up resources.
190 */
191void slp_uc_udp_send(slp_handle_impl_t *hp, slp_target_t *target,
192			const char *scopes) {
193	slp_target_t *ctarg;
194	struct sockaddr_in *sin;
195	struct msghdr msg[1];
196	char header[SLP_DEFAULT_SENDMTU];
197	int sockfd;
198	size_t mtu;
199	SLPBoolean use_tcp;
200	struct pollfd pfd[1];
201	unsigned long long now, sent;
202	char *reply = NULL;
203
204	use_tcp = SLP_FALSE;
205	/* build the header and iovec */
206	if (make_header(hp, header, scopes) != SLP_OK)
207		return;
208
209	mtu = slp_get_mtu();
210
211	/* walk targets list until we either succeed or run out of targets */
212	for (ctarg = target; ctarg; ctarg = slp_next_failover(ctarg)) {
213		char *state;
214		const char *timeouts;
215		int timeout;
216
217		sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
218
219		/* make the socket, msghdr and reply buf */
220		if ((sockfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
221			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
222				"could not create socket: %s",
223				strerror(errno));
224			return;
225		}
226		pfd[0].fd = sockfd;
227		pfd[0].events = POLLRDNORM;
228
229		udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
230		if (!reply && !(reply = malloc(mtu))) {
231			(void) close(sockfd);
232			slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
233				"out of memory");
234			return;
235		}
236
237		/* timeout loop */
238		timeouts = SLPGetProperty(SLP_CONFIG_DATAGRAMTIMEOUTS);
239		state = (char *)timeouts;
240		for (timeout = slp_get_next_onlist(&state);
241			timeout != -1 &&
242			!hp->cancel;
243			timeout = slp_get_next_onlist(&state)) {
244			int pollerr;
245
246			if (sendmsg(sockfd, msg, 0) < 0) {
247				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
248					"sendmsg failed: %s", strerror(errno));
249				continue; /* try again */
250			}
251			sent = now_millis();
252
253			pollerr = wait_for_response(
254				0, &timeout, sent, &now, pfd, 1);
255
256			if (pollerr == 0)
257				/* timeout */
258				continue;
259			if (pollerr < 0)
260				break;
261
262			/* only using one fd, so no need to scan pfd */
263			if (recvfrom(sockfd, reply, mtu, 0, NULL, NULL) < 0) {
264				/* if reply overflows, hand off to TCP */
265				if (errno == ENOMEM) {
266					free(reply); reply = NULL;
267					use_tcp = SLP_TRUE;
268					break;
269				}
270				slp_err(LOG_CRIT, 0, "slp_uc_udp_send",
271					"recvfrom failed: %s",
272					strerror(errno));
273			} else {
274				/* success -- but check error code */
275				slp_proto_err errcode = slp_get_errcode(reply);
276				switch (errcode) {
277				case SLP_MSG_PARSE_ERROR:
278				case SLP_VER_NOT_SUPPORTED:
279				case SLP_SICK_DA:
280				case SLP_DA_BUSY_NOW:
281				case SLP_OPTION_NOT_UNDERSTOOD:
282				case SLP_RQST_NOT_SUPPORTED: {
283				    char addrbuf[INET6_ADDRSTRLEN], *cname;
284
285				    cname = slp_ntop(addrbuf, INET6_ADDRSTRLEN,
286					(const void *) &(sin->sin_addr));
287				    cname = cname ? cname : "[invalid addr]";
288
289				    /* drop it */
290				    slp_err(LOG_INFO, 0,
291				"DA %s returned error code %d; dropping reply",
292							cname, errcode);
293				    free(reply); reply = NULL;
294				}
295				}
296			}
297			break;
298		}
299		if (timeout != -1)
300			/* success or cancel */
301			break;
302		/* else failure */
303		slp_mark_target_failed(ctarg);
304	}
305	(void) close(sockfd);
306	if (!ctarg || hp->cancel) {
307		/* failed all attempts or canceled by consumer */
308		if (reply) free(reply);
309		return;
310	}
311	/* success or tcp handoff */
312	if (reply) {
313		if (slp_get_overflow(reply))
314			use_tcp = SLP_TRUE;
315		else
316			slp_mark_target_used(ctarg);
317		(void) slp_enqueue(hp->q, reply);
318	}
319	if (use_tcp)
320		slp_uc_tcp_send(
321			hp, ctarg, scopes, SLP_FALSE, slp_get_xid(header));
322}
323
324/*
325 * Multicasts (or broadcasts) a message, using multicast convergance
326 * to collect results. Large replies will cause the message to be handed
327 * off to the TCP thread.
328 *
329 * This call does not return until the transaction is complete. It does
330 * not, however, wait until pending TCP transactions are complete, so
331 * callers should always invoke slp_tcp_wait before cleaning up any
332 * resources.
333 */
334void slp_mc_send(slp_handle_impl_t *hp, const char *scopes) {
335	char header[SLP_DEFAULT_SENDMTU], *state;
336	const char *timeouts;
337	struct sockaddr_in sin[1];
338	struct msghdr msg[1];
339	int maxwait, timeout, noresults, anyresults;
340	unsigned long long final_to, now, sent;
341	struct pollfd *pfd;
342	nfds_t nfds;
343	void *collator = NULL;
344	struct bc_ifs bcifs;
345
346	/* build the header and iovec */
347	if (make_header(hp, header, scopes) != SLP_OK)
348		return;
349
350	(void) memset(sin, 0, sizeof (sin));
351	if (make_mc_target(hp, sin, header, &pfd, &nfds, &bcifs) != SLP_OK)
352		return;
353	udp_make_msghdr(sin, hp->msg.iov, hp->msg.iovlen, msg);
354
355	maxwait = slp_get_mcmaxwait();
356	maxwait = maxwait ? maxwait : SLP_DEFAULT_MAXWAIT;
357
358	/* set the final timeout */
359	now = now_millis();
360	final_to = now + maxwait;
361
362	/* timeout prep and loop */
363	timeouts = SLPGetProperty(SLP_CONFIG_MULTICASTTIMEOUTS);
364	state = (char *)timeouts;
365	noresults = anyresults = 0;
366
367	for (timeout = slp_get_next_onlist(&state);
368		timeout != -1 &&
369		now < final_to &&
370		noresults < 2 &&
371		!hp->cancel;
372		timeout = slp_get_next_onlist(&state)) {
373
374		/* send msg */
375		if (mc_sendmsg(pfd, msg, &bcifs) != SLP_OK) {
376			continue; /* try again */
377		}
378		sent = now_millis();
379
380		/* receive results */
381		mc_recvmsg(pfd, nfds, hp, scopes, header, &collator, final_to,
382			sent, &now, &noresults, &anyresults, timeout);
383
384		if (!anyresults)
385			noresults++;
386		anyresults = 0;
387	}
388	/* clean up PR list collator */
389	if (collator)
390		slp_twalk(collator, free_pr_node, 0, NULL);
391
392	/* close all fds in pfd */
393	free_pfds(pfd, nfds);
394
395	/* free broadcast addrs, if used */
396	if (bcifs.sin) free(bcifs.sin);
397}
398
399/*
400 * Private net helper routines
401 */
402
403/*
404 * Starts the tcp_thread and allocates any necessary resources.
405 */
406static SLPError start_tcp_thr() {
407	SLPError err;
408	int terr;
409
410	(void) mutex_lock(&start_lock);
411	/* make sure someone else hasn't already intialized the thread */
412	if (tcp_thr_running) {
413		(void) mutex_unlock(&start_lock);
414		return (SLP_OK);
415	}
416
417	/* create the tcp queue */
418	if (!(tcp_q = slp_new_queue(&err))) {
419		(void) mutex_unlock(&start_lock);
420		return (err);
421	}
422
423	/* start the tcp thread */
424	if ((terr = thr_create(0, NULL, (void *(*)(void *)) tcp_thread,
425				NULL, 0, NULL)) != 0) {
426	    slp_err(LOG_CRIT, 0, "start_tcp_thr",
427		    "could not start thread: %s", strerror(terr));
428	    (void) mutex_unlock(&start_lock);
429	    return (SLP_INTERNAL_SYSTEM_ERROR);
430	}
431
432	tcp_thr_running = SLP_TRUE;
433	(void) mutex_unlock(&start_lock);
434	return (SLP_OK);
435}
436
437/*
438 * Called by the tcp thread to shut itself down. The queue must be
439 * empty (and should be, since the tcp thread will only shut itself
440 * down if nothing has been put in its queue for the timeout period).
441 */
442static void end_tcp_thr() {
443	(void) mutex_lock(&start_lock);
444
445	tcp_thr_running = SLP_FALSE;
446	slp_destroy_queue(tcp_q);
447
448	(void) mutex_unlock(&start_lock);
449	thr_exit(NULL);
450}
451
452/*
453 * The thread of control for the TCP thread. This sits in a loop, waiting
454 * on 'tcp_q' for new messages. If no message appear after 30 seconds,
455 * this thread cleans up resources and shuts itself down.
456 */
457static void tcp_thread() {
458	struct tcp_rqst *rqst;
459	char *reply, header[SLP_DEFAULT_SENDMTU];
460	timestruc_t to[1];
461	to->tv_nsec = 0;
462
463	for (;;) {
464		slp_target_t *ctarg, *targets;
465		slp_handle_impl_t *hp;
466		const char *scopes;
467		struct sockaddr_in *sin;
468		SLPBoolean free_target, etimed;
469		unsigned short xid;
470
471		/* set idle shutdown timeout */
472		to->tv_sec = time(NULL) + 30;
473		/* get the next request from the tcp queue */
474		if (!(rqst = slp_dequeue_timed(tcp_q, to, &etimed))) {
475			if (!etimed)
476				continue;
477			else
478				end_tcp_thr();
479		}
480
481		hp = rqst->hp;
482		scopes = rqst->scopes;
483		targets = rqst->target;
484		free_target = rqst->free_target;
485		xid = rqst->xid;
486		free(rqst);
487		reply = NULL;
488
489		/* Check if this handle has been cancelled */
490		if (hp->cancel)
491			goto transaction_complete;
492
493		/* build the header and iovec */
494		if (make_header(hp, header, scopes) != SLP_OK) {
495			if (free_target) slp_free_target(targets);
496			continue;
497		}
498		if (xid)
499			slp_set_xid(header, xid);
500
501	/* walk targets list until we either succeed or run out of targets */
502		for (ctarg = targets;
503			ctarg && !hp->cancel;
504			ctarg = slp_next_failover(ctarg)) {
505
506			sin = (struct sockaddr_in *)slp_get_target_sin(ctarg);
507
508			/* create the socket */
509			if ((tcp_sockfd = socket(AF_INET, SOCK_STREAM, 0))
510			    < 0) {
511				slp_err(LOG_CRIT, 0, "tcp_thread",
512					"could not create socket: %s",
513					strerror(errno));
514				ctarg = NULL;
515				break;
516			}
517
518			/* connect to target */
519			if (connect(tcp_sockfd, (struct sockaddr *)sin,
520				    sizeof (*sin)) < 0) {
521				slp_err(LOG_INFO, 0, "tcp_thread",
522					"could not connect, error = %s",
523					strerror(errno));
524				goto failed;
525			}
526
527			/* send the message and read the reply */
528			if (writev(tcp_sockfd, hp->msg.iov, hp->msg.iovlen)
529			    == -1) {
530				slp_err(LOG_INFO, 0, "tcp_thread",
531					"could not send, error = %s",
532					strerror(errno));
533				goto failed;
534			}
535
536			/* if success, break out of failover loop */
537			if ((slp_tcp_read(tcp_sockfd, &reply)) == SLP_OK) {
538				(void) close(tcp_sockfd);
539				break;
540			}
541
542		/* else if timed out, mark target failed and try next one */
543failed:
544			(void) close(tcp_sockfd);
545			slp_mark_target_failed(ctarg);
546		}
547
548		if (hp->cancel) {
549			if (reply) {
550				free(reply);
551			}
552		} else if (ctarg) {
553			/* success */
554			(void) slp_enqueue(hp->q, reply);
555			slp_mark_target_used(ctarg);
556		}
557
558	/* If all TCP transactions on this handle are complete, send notice */
559transaction_complete:
560		(void) mutex_lock(hp->tcp_lock);
561		if (--(hp->tcp_ref_cnt) == 0)
562			(void) cond_signal(hp->tcp_wait);
563		(void) mutex_unlock(hp->tcp_lock);
564
565		if (free_target)
566			slp_free_target(targets);
567	}
568}
569
570/*
571 * Performs a full read for TCP replies, dynamically allocating a
572 * buffer large enough to hold the reply.
573 */
574SLPError slp_tcp_read(int sockfd, char **reply) {
575	char lenbuf[5], *p;
576	size_t nleft;
577	ssize_t nread;
578	unsigned int len;
579
580	/* find out how long the reply is */
581	nleft = 5;
582	p = lenbuf;
583	while (nleft != 0) {
584		if ((nread = read(sockfd, p, 5)) < 0) {
585			if (errno == EINTR)
586				nread = 0;
587			else
588				return (SLP_NETWORK_ERROR);
589		} else if (nread == 0)
590			/* shouldn't hit EOF here */
591			return (SLP_NETWORK_ERROR);
592		nleft -= nread;
593		p += nread;
594	}
595
596	len = slp_get_length(lenbuf);
597
598	/* allocate space for the reply, and copy in what we've already read */
599	/* This buffer gets freed by a msg-specific unpacking routine later */
600	if (!(*reply = malloc(len))) {
601		slp_err(LOG_CRIT, 0, "tcp_read", "out of memory");
602		return (SLP_MEMORY_ALLOC_FAILED);
603	}
604	(void) memcpy(*reply, lenbuf, 5);
605
606	/* read the rest of the message */
607	nleft = len - 5;
608	p = *reply + 5;
609	while (nleft != 0) {
610		if ((nread = read(sockfd, p, nleft)) < 0) {
611			if (errno == EINTR)
612				nread = 0;
613			else {
614				free(*reply);
615				return (SLP_NETWORK_ERROR);
616			}
617		} else if (nread == 0)
618			/*
619			 * shouldn't hit EOF here, but perhaps we've
620			 * gotten something useful, so return OK.
621			 */
622			return (SLP_OK);
623
624		nleft -= nread;
625		p += nread;
626	}
627
628	return (SLP_OK);
629}
630
631/*
632 * Lays in a SLP header for this message into the scatter / gather
633 * array 'iov'. 'header' is the buffer used to contain the header,
634 * and must contain enough space. 'scopes' should contain a string
635 * with the scopes to be used for this message.
636 */
637static SLPError make_header(slp_handle_impl_t *hp, char *header,
638			    const char *scopes) {
639	SLPError err;
640	size_t msgLen, off;
641	int i;
642	size_t mtu;
643	unsigned short slen = (unsigned short)strlen(scopes);
644
645	mtu = slp_get_mtu();
646	msgLen = slp_hdrlang_length(hp);
647	hp->msg.iov[0].iov_base = header;
648	hp->msg.iov[0].iov_len = msgLen;	/* now the length of the hdr */
649
650	/* use the remaining buffer in header for the prlist */
651	hp->msg.prlist->iov_base = header + msgLen;
652
653	for (i = 1; i < hp->msg.iovlen; i++) {
654		msgLen += hp->msg.iov[i].iov_len;
655	}
656	msgLen += slen;
657
658	off = 0;
659	if ((err = slp_add_header(hp->locale, header, mtu,
660					hp->fid, msgLen, &off)) != SLP_OK)
661		return (err);
662
663	/* start out with empty prlist */
664	hp->msg.prlist->iov_len = 0;
665
666	/* store the scope string len into the space provided by the caller */
667	off = 0;
668	if ((err = slp_add_sht((char *)hp->msg.scopeslen.iov_base,
669				2, slen, &off)) != SLP_OK) {
670		return (err);
671	}
672	hp->msg.scopes->iov_base = (caddr_t)scopes;
673	hp->msg.scopes->iov_len = slen;
674
675	return (SLP_OK);
676}
677
678/*
679 * Populates a struct msghdr suitable for use with sendmsg.
680 */
681static void udp_make_msghdr(struct sockaddr_in *sin, struct iovec *iov,
682			    int iovlen, struct msghdr *msg) {
683	msg->msg_name = (caddr_t)sin;
684	msg->msg_namelen = 16;
685	msg->msg_iov = iov;
686	msg->msg_iovlen = iovlen;
687	msg->msg_accrights = NULL;
688	msg->msg_accrightslen = 0;
689}
690
691/*
692 * Sets the address on 'sin', sets the flag in the message header,
693 * and creates an array of pollfds for all interfaces we need to
694 * use. If we need to use only broadcast, and net.slp.interfaces
695 * is set, fills bcifs with an array of subnet broadcast addresses
696 * to which we should send. Returns err != SLP_OK only on catastrophic
697 * error.
698 */
699static SLPError make_mc_target(slp_handle_impl_t *hp,
700				struct sockaddr_in *sin, char *header,
701				struct pollfd **fds, nfds_t *nfds,
702				struct bc_ifs *bcifs) {
703
704	unsigned char ttl = slp_get_multicastTTL();
705	char *ifs_string;
706	SLPBoolean have_valid_if = SLP_FALSE;
707	SLPBoolean use_broadcast = slp_get_usebroadcast();
708	int fd, i, num_givenifs;
709	struct in_addr *given_ifs = NULL;
710	nfds_t nfd_i;
711
712	sin->sin_port = htons(SLP_PORT);
713	sin->sin_family = AF_INET;
714	slp_set_mcast(header);
715
716	/* Get the desired multicast interfaces, if set */
717	bcifs->sin = NULL;
718	*fds = NULL;
719	if ((ifs_string = (char *)SLPGetProperty(
720		SLP_CONFIG_INTERFACES)) != NULL && *ifs_string) {
721
722		char *p, *tstate;
723
724		/* count the number of IFs given */
725		p = strchr(ifs_string, ',');
726		for (num_givenifs = 1; p; num_givenifs++) {
727			p = strchr(p + 1, ',');
728		}
729
730		/* copy the given IFs into an array for easier processing */
731		if (!(given_ifs = calloc(num_givenifs, sizeof (*given_ifs)))) {
732			slp_err(LOG_CRIT, 0, "make_mc_target",
733						"out of memory");
734			return (SLP_MEMORY_ALLOC_FAILED);
735		}
736
737		i = 0;
738		/* strtok_r will destructively modify, so make a copy first */
739		if (!(ifs_string = strdup(ifs_string))) {
740			slp_err(LOG_CRIT, 0, "make_mc_target",
741						"out of memory");
742			free(given_ifs);
743			return (SLP_MEMORY_ALLOC_FAILED);
744		}
745		for (
746			p = strtok_r(ifs_string, ",", &tstate);
747			p;
748			p = strtok_r(NULL, ",", &tstate)) {
749
750			if (slp_pton(p, &(given_ifs[i])) < 1) {
751				/* skip */
752				num_givenifs--;
753				continue;
754			}
755			i++;
756		}
757		*nfds = num_givenifs;
758		free(ifs_string);
759
760		/* allocate a pollfd array for all interfaces */
761		if (!(*fds = calloc(num_givenifs, sizeof (**fds)))) {
762			slp_err(LOG_CRIT, 0, "make_mc_target",
763						"out of memory");
764			free(ifs_string);
765			free(given_ifs);
766			return (SLP_MEMORY_ALLOC_FAILED);
767		}
768
769		/* lay the given interfaces into the pollfd array */
770		for (i = 0; i < num_givenifs; i++) {
771
772			/* create a socket to bind to this interface */
773			if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
774				slp_err(LOG_CRIT, 0, "make_mc_target",
775						"could not create socket: %s",
776						strerror(errno));
777				free_pfds(*fds, *nfds);
778				return (SLP_INTERNAL_SYSTEM_ERROR);
779			}
780
781			/* fill in the pollfd structure */
782			(*fds)[i].fd = fd;
783			(*fds)[i].events |= POLLRDNORM;
784
785			if (use_broadcast) {
786				struct sockaddr_in bcsin[1];
787
788				(void) memcpy(
789					&(bcsin->sin_addr), &(given_ifs[i]),
790					sizeof (bcsin->sin_addr));
791				bcsin->sin_family = AF_INET;
792				bcsin->sin_port = 0;
793
794				/* bind fd to interface */
795				if (bind(fd, (struct sockaddr *)bcsin,
796						sizeof (*bcsin)) == 0) {
797					continue;
798				}
799				/* else fallthru to default (multicast) */
800				slp_err(LOG_INFO, 0, "make_mc_target",
801				"could not set broadcast interface: %s",
802					strerror(errno));
803			}
804			/* else use multicast */
805			if (setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
806					&(given_ifs[i]), sizeof (given_ifs[i]))
807					< 0) {
808
809					slp_err(LOG_INFO, 0, "make_mc_target",
810				"could not set multicast interface: %s",
811							strerror(errno));
812					continue;
813			}
814
815			have_valid_if = SLP_TRUE;
816		}
817
818		if (use_broadcast) {
819		    SLPError err;
820
821		    if ((err = make_bc_target(
822					hp, given_ifs, num_givenifs, bcifs))
823			!= SLP_OK) {
824
825			if (err == SLP_MEMORY_ALLOC_FAILED) {
826			    /* the only thing which is really a showstopper */
827			    return (err);
828			}
829
830			/* else no valid interfaces */
831			have_valid_if = SLP_FALSE;
832		    }
833		}
834		free(given_ifs);
835	}
836
837	if (!have_valid_if) {
838		if (*fds && !have_valid_if) {
839			/* couldn't process net.slp.interfaces property */
840			free(*fds);
841		}
842
843		/* bind to default interface */
844		if (!(*fds = calloc(1, sizeof (**fds)))) {
845			slp_err(LOG_CRIT, 0, "make_mc_target",
846						"out of memory");
847			return (SLP_MEMORY_ALLOC_FAILED);
848		}
849
850		if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
851			slp_err(LOG_CRIT, 0, "make_mc_target",
852						"could not create socket: %s",
853						strerror(errno));
854			free(*fds);
855			return (SLP_INTERNAL_SYSTEM_ERROR);
856		}
857
858		(**fds).fd = fd;
859		(**fds).events |= POLLRDNORM;
860		*nfds = 1;
861	}
862
863	/* set required options on all configured fds */
864	for (nfd_i = 0; nfd_i < *nfds; nfd_i++) {
865		if (use_broadcast) {
866			const int on = 1;
867			if (setsockopt((*fds)[nfd_i].fd, SOL_SOCKET,
868					SO_BROADCAST,
869					(void *) &on, sizeof (on)) < 0) {
870				slp_err(LOG_CRIT, 0, "make_mc_target",
871					"could not enable broadcast: %s",
872					strerror(errno));
873			}
874		} else {
875			if (setsockopt((*fds)[nfd_i].fd, IPPROTO_IP,
876					IP_MULTICAST_TTL, &ttl, 1) < 0) {
877				slp_err(LOG_CRIT, 0, "make_mc_target",
878					    "could not set multicast TTL: %s",
879					    strerror(errno));
880			}
881		}
882	}
883
884	if (use_broadcast) {
885	    sin->sin_addr.s_addr = INADDR_BROADCAST;
886	} else {
887		sin->sin_addr.s_addr = SLP_MULTICAST_ADDRESS;
888	}
889
890	return (SLP_OK);
891}
892
893/*
894 * Obtains the subnet broadcast address for each interface specified
895 * in net.slp.interfaces, and fill bcifs->sin with an array of these
896 * addresses.
897 */
898static SLPError make_bc_target(slp_handle_impl_t *hp,
899				struct in_addr *given_ifs,
900				int num_givenifs, struct bc_ifs *bcifs) {
901	SLPError err;
902	int i;
903
904	if ((err = slp_broadcast_addrs(hp, given_ifs, num_givenifs,
905					&(bcifs->sin), &(bcifs->num_ifs)))
906	    != SLP_OK) {
907	    return (err);
908	}
909
910	/* set SLP port on each sockaddr_in */
911	for (i = 0; i < bcifs->num_ifs; i++) {
912		bcifs->sin[i].sin_port = htons(SLP_PORT);
913	}
914
915	return (SLP_OK);
916}
917
918/*
919 * Sends msg on 1st fd in fds for multicast, or on all interfaces
920 * specified in net.slp.interfaces for broadcast. Returns SLP_OK if
921 * msg was sent successfully on at least one interface; otherwise
922 * returns SLP_NETWORK_ERROR if msg was not sent on any interfaces.
923 */
924static SLPError mc_sendmsg(struct pollfd *fds,
925				struct msghdr *msg, struct bc_ifs *bcifs) {
926
927	if (slp_get_usebroadcast()) {
928	    char *ifs = (char *)SLPGetProperty(SLP_CONFIG_INTERFACES);
929
930	    /* hand off to broadcast-specific send function */
931	    if (ifs && *ifs && bc_sendmsg(fds, msg, bcifs) == SLP_OK) {
932		return (SLP_OK);
933	    }
934
935		/*
936		 * else  no ifs given, or bc_sendmsg failed, so send on
937		 * general broadcast addr (255.255.255.255). This will
938		 * cause the message to be sent on all interfaces. The
939		 * address will have been set in make_mc_target.
940		 */
941	}
942
943	/*
944	 * Send only on one interface -- let routing take care of
945	 * sending the message everywhere it needs to go. Sending
946	 * on more than one interface can cause nasty routing loops.
947	 * Note that this approach doesn't work with partitioned
948	 * networks.
949	 */
950	if (sendmsg(fds[0].fd, msg, 0) < 0) {
951		slp_err(LOG_CRIT, 0, "mc_sendmsg",
952			"sendmsg failed: %s", strerror(errno));
953		return (SLP_NETWORK_ERROR);
954	}
955
956	return (SLP_OK);
957}
958
959/*
960 * Send msg to each subnet broadcast address in bcifs->sin. Note
961 * that we can send on any fd (regardless of which interface to which
962 * it is bound), since the kernel will take care of routing for us.
963 * Returns err != SLP_OK only if no message was sent on any interface.
964 */
965static SLPError bc_sendmsg(struct pollfd *fds, struct msghdr *msg,
966				struct bc_ifs *bcifs) {
967	int i;
968	SLPBoolean sent_one = SLP_FALSE;
969
970	for (i = 0; i < bcifs->num_ifs; i++) {
971		msg->msg_name = (caddr_t)&(bcifs->sin[i]);
972
973		if (sendmsg(fds[0].fd, msg, 0) < 0) {
974			slp_err(LOG_CRIT, 0, "bc_sendmsg",
975				"sendmsg failed: %s", strerror(errno));
976			continue;
977		}
978		sent_one = SLP_TRUE;
979	}
980	return (sent_one ? SLP_OK : SLP_NETWORK_ERROR);
981}
982
983/*
984 * This is where the bulk of the multicast convergance algorithm resides.
985 * mc_recvmsg() waits for data to be ready on any fd in pfd, iterates
986 * through pfd and reads data from ready fd's. It also checks timeouts
987 * and user-cancels.
988 *
989 * Parameters:
990 *   pfd	IN	an array of pollfd structs containing fds to poll
991 *   nfds	IN	number of elements in pfd
992 *   hp		IN	SLPHandle from originating call
993 *   scopes	IN	scopes to use for this message
994 *   header	IN	the SLP message header for this message
995 *   collator	IN/OUT	btree collator for PR list
996 *   final_to	IN	final timeout
997 *   sent	IN	time when message was sent
998 *   now	IN/OUT	set to current time at beginning of convergance
999 *   noresults	OUT	set to 0 if any results are received
1000 *   anyresults	OUT	set to true if any results are received
1001 *   timeout	IN	time for this convergence iteration
1002 *
1003 * Returns only if an error has occured, or if either this retransmit
1004 * timeout or the final timeout has expired, or if hp->cancel becomes true.
1005 */
1006static void mc_recvmsg(struct pollfd *pfd, nfds_t nfds, slp_handle_impl_t *hp,
1007			const char *scopes, char *header, void **collator,
1008			unsigned long long final_to,
1009			unsigned long long sent,
1010			unsigned long long *now,
1011			int *noresults, int *anyresults, int timeout) {
1012	char *reply = NULL;
1013	nfds_t i;
1014	struct sockaddr_in responder;
1015	int pollerr;
1016	socklen_t addrlen = sizeof (responder);
1017	size_t mtu = slp_get_mtu();
1018
1019	for (; !hp->cancel; ) {
1020	    /* wait until we can read something */
1021	    pollerr = wait_for_response(
1022				final_to, &timeout, sent, now, pfd, nfds);
1023	    if (pollerr == 0)
1024		/* timeout */
1025		goto cleanup;
1026	    if (pollerr < 0)
1027		/* error */
1028		goto cleanup;
1029
1030	    /* iterate through all fds to find one with data to read */
1031	    for (i = 0; !hp->cancel && i < nfds; i++) {
1032
1033		if (pfd[i].fd < 0 ||
1034		    !(pfd[i].revents & (POLLRDNORM | POLLERR))) {
1035
1036		    /* unused fd or unwanted event */
1037		    continue;
1038		}
1039
1040		/* alloc reply buffer */
1041		if (!reply && !(reply = malloc(mtu))) {
1042		    slp_err(LOG_CRIT, 0, "mc_revcmsg", "out of memory");
1043		    return;
1044	    }
1045		if (recvfrom(pfd[i].fd, reply, mtu, 0,
1046				(struct sockaddr *)&responder,
1047				(int *)&addrlen) < 0) {
1048
1049		    /* if reply overflows, hand off to TCP */
1050		    if (errno == ENOMEM) {
1051			free(reply); reply = NULL;
1052			tcp_handoff(hp, scopes,
1053					&responder, slp_get_xid(header));
1054			continue;
1055		    }
1056
1057		    /* else something nasty happened */
1058		    slp_err(LOG_CRIT, 0, "mc_recvmsg",
1059					"recvfrom failed: %s",
1060					strerror(errno));
1061		    continue;
1062		} else {
1063		    /* success */
1064		    if (slp_get_overflow(reply)) {
1065			tcp_handoff(hp, scopes,
1066					&responder, slp_get_xid(header));
1067		    }
1068			/*
1069			 * Add to the PR list. If this responder has already
1070			 * answered, it doesn't count.
1071			 */
1072		    if (add2pr_list(&(hp->msg), &responder, collator)) {
1073			(void) slp_enqueue(hp->q, reply);
1074			*noresults = 0;
1075			*anyresults = 1;
1076			reply = NULL;
1077		    }
1078
1079		    /* if we've exceeded maxwait, break out */
1080		    *now = now_millis();
1081		    if (*now > final_to)
1082			goto cleanup;
1083
1084		} /* end successful receive */
1085
1086	    } /* end fd iteration */
1087
1088	    /* reset poll's timeout */
1089	    timeout = timeout - (int)(*now - sent);
1090	    if (timeout <= 0) {
1091		goto cleanup;
1092	    }
1093
1094	} /* end main poll loop */
1095
1096cleanup:
1097	if (reply) {
1098	    free(reply);
1099	}
1100}
1101
1102/*
1103 * Closes any open sockets and frees the pollfd array.
1104 */
1105static void free_pfds(struct pollfd *pfds, nfds_t nfds) {
1106	nfds_t i;
1107
1108	for (i = 0; i < nfds; i++) {
1109	    if (pfds[i].fd <= 0) {
1110		continue;
1111	    }
1112
1113	    (void) close(pfds[i].fd);
1114	}
1115
1116	free(pfds);
1117}
1118
1119/*
1120 * Hands off a message to the TCP thread, fabricating a new target
1121 * from 'sin'. 'xid' will be used to create the XID for the TCP message.
1122 */
1123static void tcp_handoff(slp_handle_impl_t *hp, const char *scopes,
1124			struct sockaddr_in *sin, unsigned short xid) {
1125	slp_target_t *target;
1126
1127	target = slp_fabricate_target(sin);
1128	slp_uc_tcp_send(hp, target, scopes, SLP_TRUE, xid);
1129}
1130
1131/*
1132 * Returns the current time in milliseconds.
1133 */
1134static unsigned long long now_millis() {
1135	unsigned long long i;
1136	struct timeval tv[1];
1137
1138	(void) gettimeofday(tv, NULL);
1139	i = (unsigned long long) tv->tv_sec * 1000;
1140	i += tv->tv_usec / 1000;
1141	return (i);
1142}
1143
1144/*
1145 * A wrapper around poll which waits until a reply comes in. This will
1146 * wait no longer than 'timeout' before returning. poll can return
1147 * even if no data is on the pipe or timeout has occured, so the
1148 * additional paramaters are used to break out of the wait loop if
1149 * we have exceeded the timeout value. 'final_to' is ignored if it is 0.
1150 *
1151 * returns:	< 0 on error
1152 *		0 on timeout
1153 *		> 0 on success (i.e. ready to read data).
1154 * side effect: 'now' is set to the time when poll found data on the pipe.
1155 */
1156static int wait_for_response(
1157	unsigned long long final_to,
1158	int *timeout,
1159	unsigned long long sent,
1160	unsigned long long *now,
1161	struct pollfd pfd[], nfds_t nfds) {
1162
1163	int when, pollerr;
1164
1165	/* wait until we can read something */
1166	for (;;) {
1167		pollerr = poll(pfd, nfds, *timeout);
1168		*now = now_millis();
1169
1170		/* ready to read */
1171		if (pollerr > 0)
1172			return (pollerr);
1173
1174		/* time out */
1175		if (pollerr == 0)
1176			/* timeout */
1177			return (0);
1178
1179		/* error */
1180		if (pollerr < 0)
1181			if (errno == EAGAIN || errno == EINTR) {
1182				/* poll is weird. */
1183				when = (int)(*now - sent);
1184				if (
1185					(final_to != 0 && *now > final_to) ||
1186					when > *timeout)
1187					break;
1188				*timeout = *timeout - when;
1189				continue;
1190			} else {
1191				slp_err(LOG_INFO, 0, "wait for response",
1192					"poll error: %s",
1193					strerror(errno));
1194				return (pollerr);
1195			}
1196	}
1197
1198	return (0);
1199}
1200
1201/*
1202 * Adds the cname of the host whose address is in 'sin' to this message's
1203 * previous responder list. The message is contained in 'msg'.
1204 * 'collator' contains the complete previous responder list, so that
1205 * even if the PR list in the message overflows and must be truncated,
1206 * the function can still correctly determine if we have heard from this
1207 * host before.
1208 *
1209 * returns:	1 if this is the first time we've heard from this host
1210 *		0 is this is a duplicate reply
1211 */
1212static int add2pr_list(
1213	slp_msg_t *msg,
1214	struct sockaddr_in *sin,
1215	void **collator) {
1216
1217	char **res, *cname, *p, *header;
1218	size_t mtu;
1219	size_t len, off, namelen;
1220	unsigned short prlen;
1221
1222	/* Attempt to resolve the responder's IP address to its host name */
1223	if (!(cname = slp_gethostbyaddr((char *)&(sin->sin_addr),
1224					sizeof (sin->sin_addr))))
1225		return (0);
1226
1227	res = slp_tsearch(
1228		cname, collator,
1229		(int (*)(const void *, const void *)) strcasecmp);
1230	if (*res != cname) {
1231		/* duplicate */
1232		slp_err(LOG_INFO, 0, "add2pr_list",
1233			"drop PR ignored by host: %s",
1234			cname);
1235		free(cname);
1236		return (0);
1237	}
1238
1239	/* new responder: add to the msg PR list if there is room */
1240	mtu = slp_get_mtu();
1241
1242	header = msg->iov[0].iov_base;
1243	len = slp_get_length(header);
1244
1245	namelen = strlen(cname);
1246	if ((namelen + 2 + len) >= mtu)
1247		return (1);	/* no room */
1248
1249	/* else  there is enough room */
1250	prlen = (unsigned short)msg->prlist->iov_len;
1251	p = msg->prlist->iov_base + prlen;
1252	*p = 0;
1253
1254	if (prlen) {
1255		namelen++;	/* add the ',' */
1256		(void) strcat(p, ",");
1257	}
1258	(void) strcat(p, cname);
1259
1260	/* update msg and pr list length */
1261	len += namelen;
1262	slp_set_length(header, len);
1263	prlen += (unsigned short)namelen;
1264	off = 0;
1265	(void) slp_add_sht(msg->prlistlen.iov_base, 2, prlen, &off);
1266	msg->prlist->iov_len += namelen;
1267
1268	return (1);
1269}
1270
1271/*
1272 * The iterator function used while traversing the previous responder
1273 * tree. Just frees resources.
1274 */
1275/*ARGSUSED2*/
1276static void free_pr_node(void *node, VISIT order, int level, void *cookie) {
1277	if (order == endorder || order == leaf) {
1278		char *pr = *(char **)node;
1279		free(pr);
1280		free(node);
1281	}
1282}
1283