1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Network SNDR/ncall-ip server - based on nfsd
29 */
30#include <sys/types.h>
31#include <rpc/types.h>
32#include <errno.h>
33#include <netdb.h>
34#include <sys/socket.h>
35#include <netconfig.h>
36#include <stropts.h>
37#include <fcntl.h>
38#include <stdio.h>
39#include <strings.h>
40#include <signal.h>
41#include <unistd.h>
42#include <stdlib.h>
43#include <netdir.h>
44#include <rpc/rpc_com.h>
45#include <rpc/rpc.h>
46#include <tiuser.h>
47#include <netinet/tcp.h>
48#include <netinet/in.h>
49#include <syslog.h>
50#include <locale.h>
51#include <langinfo.h>
52#include <libintl.h>
53#include <libgen.h>
54#include <deflt.h>
55#include <sys/resource.h>
56
57#include <sys/nsctl/nsctl.h>
58
59#ifdef	__NCALL__
60
61#include <sys/ncall/ncall.h>
62#include <sys/ncall/ncall_ip.h>
63#include <sys/nsctl/libncall.h>
64
65#define	RDC_POOL_CREATE	NC_IOC_POOL_CREATE
66#define	RDC_POOL_RUN	NC_IOC_POOL_RUN
67#define	RDC_POOL_WAIT	NC_IOC_POOL_WAIT
68#define	RDC_PROGRAM	NCALL_PROGRAM
69#define	RDC_SERVICE	"ncall"
70#undef RDC_SVCPOOL_ID	/* We are overloading this value */
71#define	RDC_SVCPOOL_ID	NCALL_SVCPOOL_ID
72#define	RDC_SVC_NAME	"NCALL"
73#define	RDC_VERS_MIN	NCALL_VERS_MIN
74#define	RDC_VERS_MAX	NCALL_VERS_MAX
75
76#else	/* !__NCALL__ */
77
78#include <sys/nsctl/rdc_ioctl.h>
79#include <sys/nsctl/rdc_io.h>
80#include <sys/nsctl/librdc.h>
81
82#define	RDC_SERVICE	"rdc"
83#define	RDC_SVC_NAME	"RDC"
84
85#endif	/* __NCALL__ */
86
87#define	RDCADMIN	"/etc/default/sndr"
88
89#include <nsctl.h>
90
91struct conn_ind {
92	struct conn_ind *conn_next;
93	struct conn_ind *conn_prev;
94	struct t_call   *conn_call;
95};
96
97struct conn_entry {
98	bool_t			closing;
99	struct netconfig	nc;
100};
101
102static char *progname;
103static struct conn_entry *conn_polled;
104static int num_conns;			/* Current number of connections */
105static struct pollfd *poll_array;	/* array of poll descriptors for poll */
106static size_t num_fds = 0;		/* number of transport fds opened */
107static void poll_for_action();
108static void remove_from_poll_list(int);
109static int do_poll_cots_action(int, int);
110static int do_poll_clts_action(int, int);
111static void add_to_poll_list(int, struct netconfig *);
112static int bind_to_provider(char *, char *, struct netbuf **,
113    struct netconfig **);
114static int set_addrmask(int, struct netconfig *, struct netbuf *);
115static void conn_close_oldest(void);
116static boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
117static void cots_listen_event(int, int);
118static int discon_get(int, struct netconfig *, struct conn_ind **);
119static int nofile_increase(int);
120static int is_listen_fd_index(int);
121#if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
122static int sndrsvcpool(int);
123static int svcwait(int id);
124#endif
125
126
127/*
128 * RPC protocol block.  Useful for passing registration information.
129 */
130struct protob {
131	char *serv;		/* ASCII service name, e.g. "RDC" */
132	int versmin;		/* minimum version no. to be registered */
133	int versmax;		/* maximum version no. to be registered */
134	int program;		/* program no. to be registered */
135	struct protob *next;	/* next entry on list */
136};
137
138
139
140static size_t end_listen_fds;
141static int debugflg = 0;
142static int max_conns_allowed = -1;
143static int listen_backlog = 10;
144static char *trans_provider = (char *)NULL;
145static int rdcsvc(int, struct netbuf, struct netconfig *);
146
147/* used by cots_listen_event() */
148static int (*Mysvc)(int, struct netbuf, struct netconfig *) = rdcsvc;
149
150/*
151 * Determine valid semantics for rdc.
152 */
153#define	OK_TPI_TYPE(_nconf)	\
154	(_nconf->nc_semantics == NC_TPI_CLTS || \
155	_nconf->nc_semantics == NC_TPI_COTS || \
156	_nconf->nc_semantics == NC_TPI_COTS_ORD)
157
158#define	BE32_TO_U32(a)		\
159	((((uint32_t)((uchar_t *)a)[0] & 0xFF) << (uint32_t)24) |\
160	(((uint32_t)((uchar_t *)a)[1] & 0xFF) << (uint32_t)16) |\
161	(((uint32_t)((uchar_t *)a)[2] & 0xFF) << (uint32_t)8)  |\
162	((uint32_t)((uchar_t *)a)[3] & 0xFF))
163
164#ifdef DEBUG
165/*
166 * Only support UDP in DEBUG mode for now
167 */
168static	char *defaultproviders[] = { "/dev/tcp", "/dev/tcp6", "/dev/udp",
169		"/dev/udp6", NULL };
170#else
171static	char *defaultproviders[] = { "/dev/tcp6", "/dev/tcp", NULL };
172#endif
173
174/*
175 * Number of elements to add to the poll array on each allocation.
176 */
177#define	POLL_ARRAY_INC_SIZE	64
178#define	NOFILE_INC_SIZE		64
179
180#ifdef	__NCALL__
181const char *rdc_devr = "/dev/ncallip";
182#else
183const char *rdc_devr = "/dev/rdc";
184#endif
185
186static int rdc_fdr;
187static int
188
189open_rdc(void)
190{
191	int fd = open(rdc_devr, O_RDONLY);
192
193	if (fd < 0)
194		return (-1);
195
196	return (rdc_fdr = fd);
197}
198
199static int
200sndrsys(int type, void *arg)
201{
202	int ret = -1;
203	if (!rdc_fdr && open_rdc() < 0) { /* open failed */
204		syslog(LOG_ERR, "open_rdc() failed: %m\n");
205	} else {
206		if ((ret = ioctl(rdc_fdr, type, arg)) < 0) {
207			syslog(LOG_ERR, "ioctl(rdc_ioctl) failed: %m\n");
208		}
209	}
210	return (ret);
211}
212
213int
214rdc_transport_open(struct netconfig *nconf)
215{
216	int fd;
217	struct strioctl	strioc;
218
219	if ((nconf == (struct netconfig *)NULL) ||
220	    (nconf->nc_device == (char *)NULL)) {
221		syslog(LOG_ERR, "No netconfig device");
222		return (-1);
223	}
224
225	/*
226	 * Open the transport device.
227	 */
228	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
229	if (fd == -1)  {
230		if (t_errno == TSYSERR && errno == EMFILE &&
231				(nofile_increase(0) == 0)) {
232			/* Try again with a higher NOFILE limit. */
233			fd = t_open(nconf->nc_device, O_RDWR,
234				(struct t_info *)NULL);
235		}
236		if (fd == -1) {
237			if (t_errno == TSYSERR) {
238				syslog(LOG_ERR, "t_open failed: %m");
239			} else {
240				syslog(LOG_ERR, "t_open failed: %s",
241				    t_errlist[t_errno]);
242			}
243			return (-1);
244		}
245	}
246
247	/*
248	 * Pop timod because the RPC module must be as close as possible
249	 * to the transport.
250	 */
251	if (ioctl(fd, I_POP, 0) < 0) {
252		syslog(LOG_ERR, "I_POP of timod failed: %m");
253		if (t_close(fd) == -1) {
254			if (t_errno == TSYSERR) {
255				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
256			} else {
257				syslog(LOG_ERR, "t_close failed on %d: %s",
258				    fd, t_errlist[t_errno]);
259			}
260		}
261		return (-1);
262	}
263
264	if (nconf->nc_semantics == NC_TPI_CLTS) {
265		/*
266		 * Push rpcmod to filter data traffic to KRPC.
267		 */
268		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
269			syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
270			(void) t_close(fd);
271			return (-1);
272		}
273	} else {
274		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
275			syslog(LOG_ERR, "I_PUSH of CONS rpcmod failed: %m");
276			if (t_close(fd) == -1) {
277				if (t_errno == TSYSERR) {
278					syslog(LOG_ERR,
279						"t_close failed on %d: %m", fd);
280				} else {
281					syslog(LOG_ERR,
282						"t_close failed on %d: %s",
283						fd, t_errlist[t_errno]);
284				}
285			}
286			return (-1);
287		}
288
289		strioc.ic_cmd = RPC_SERVER;
290		strioc.ic_dp = (char *)0;
291		strioc.ic_len = 0;
292		strioc.ic_timout = -1;
293		/* Tell CONS rpcmod to act like a server stream. */
294		if (ioctl(fd, I_STR, &strioc) < 0) {
295			syslog(LOG_ERR, "CONS rpcmod set-up ioctl failed: %m");
296			if (t_close(fd) == -1) {
297				if (t_errno == TSYSERR) {
298					syslog(LOG_ERR,
299						"t_close failed on %d: %m", fd);
300				} else {
301					syslog(LOG_ERR,
302						"t_close failed on %d: %s",
303						fd, t_errlist[t_errno]);
304				}
305			}
306			return (-1);
307		}
308	}
309
310	/*
311	 * Re-push timod so that we will still be doing TLI
312	 * operations on the descriptor.
313	 */
314	if (ioctl(fd, I_PUSH, "timod") < 0) {
315		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
316		if (t_close(fd) == -1) {
317			if (t_errno == TSYSERR) {
318				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
319			} else {
320				syslog(LOG_ERR, "t_close failed on %d: %s",
321				    fd, t_errlist[t_errno]);
322			}
323		}
324		return (-1);
325	}
326
327	return (fd);
328}
329
330
331void
332rdcd_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
333{
334	int error;
335
336	/*
337	 * Save the error code across syslog(), just in case syslog()
338	 * gets its own error and, therefore, overwrites errno.
339	 */
340	error = errno;
341	if (t_errno == TSYSERR) {
342		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
343		    tli_name, fd, nconf->nc_proto);
344	} else {
345		syslog(LOG_ERR,
346		    "%s(file descriptor %d/transport %s) TLI error %d",
347		    tli_name, fd, nconf->nc_proto, t_errno);
348	}
349	errno = error;
350}
351
352/*
353 * Called to set up service over a particular transport
354 */
355void
356do_one(char *provider, char *proto, struct protob *protobp0,
357	int (*svc)(int, struct netbuf, struct netconfig *))
358{
359	struct netbuf *retaddr;
360	struct netconfig *retnconf;
361	struct netbuf addrmask;
362	int vers;
363	int sock;
364
365	if (provider) {
366		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
367		    &retnconf);
368	} else {
369		(void) syslog(LOG_ERR,
370	"Cannot establish %s service over %s: transport setup problem.",
371		    protobp0->serv, provider ? provider : proto);
372		return;
373	}
374
375	if (sock == -1) {
376		if ((Is_ipv6present() &&
377		(strcmp(provider, "/dev/tcp6") == 0)) ||
378		(!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
379			(void) syslog(LOG_ERR,
380			    "Cannot establish %s service over %s: transport "
381				"setup problem.",
382				protobp0->serv, provider ? provider : proto);
383		return;
384	}
385
386	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
387		(void) syslog(LOG_ERR,
388		    "Cannot set address mask for %s", retnconf->nc_netid);
389		return;
390	}
391
392
393	/*
394	 * Register all versions of the programs in the protocol block list
395	 */
396	for (vers = protobp0->versmin; vers <= protobp0->versmax; vers++) {
397		(void) rpcb_unset(protobp0->program, vers, retnconf);
398		(void) rpcb_set(protobp0->program, vers, retnconf, retaddr);
399	}
400
401	if (retnconf->nc_semantics == NC_TPI_CLTS) {
402		/* Don't drop core if supporting module(s) aren't loaded. */
403		(void) signal(SIGSYS, SIG_IGN);
404
405		/*
406		 * svc() doesn't block, it returns success or failure.
407		 */
408		if ((*svc)(sock, addrmask, retnconf) < 0) {
409			(void) syslog(LOG_ERR,
410"Cannot establish %s service over <file desc. %d, protocol %s> : %m. Exiting",
411				protobp0->serv, sock, retnconf->nc_proto);
412			exit(1);
413		}
414	}
415	/*
416	 * We successfully set up the server over this transport.
417	 * Add this descriptor to the one being polled on.
418	 */
419	add_to_poll_list(sock, retnconf);
420}
421
422/*
423 * Set up the SNDR/ncall-ip service over all the available transports.
424 * Returns -1 for failure, 0 for success.
425 */
426int
427do_all(struct protob *protobp,
428	int (*svc)(int, struct netbuf, struct netconfig *))
429{
430	struct netconfig *nconf;
431	NCONF_HANDLE *nc;
432
433	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
434		syslog(LOG_ERR, "setnetconfig failed: %m");
435		return (-1);
436	}
437	while (nconf = getnetconfig(nc)) {
438		if ((nconf->nc_flag & NC_VISIBLE) &&
439		    strcmp(nconf->nc_protofmly, "loopback") != 0 &&
440		    OK_TPI_TYPE(nconf))
441			do_one(nconf->nc_device, nconf->nc_proto,
442				protobp, svc);
443	}
444	(void) endnetconfig(nc);
445	return (0);
446}
447
448/*
449 * Read the /etc/default/sndr configuration file to determine if the
450 * client has been configured for number of threads, backlog or transport
451 * provider.
452 */
453
454static void
455read_default(void)
456{
457	char *defval, *tmp_str;
458	int errno;
459	int tmp;
460
461	/* Fail silently if error in opening the default rdc config file */
462	if ((defopen(RDCADMIN)) == 0) {
463		if ((defval = defread("SNDR_THREADS=")) != NULL) {
464			errno = 0;
465			tmp = strtol(defval, (char **)NULL, 10);
466			if (errno == 0) {
467				max_conns_allowed = tmp;
468			}
469		}
470		if ((defval = defread("SNDR_LISTEN_BACKLOG=")) != NULL) {
471			errno = 0;
472			tmp = strtol(defval, (char **)NULL, 10);
473			if (errno == 0) {
474				listen_backlog = tmp;
475			}
476		}
477		if ((defval = defread("SNDR_TRANSPORT=")) != NULL) {
478			errno = 0;
479			tmp_str = strdup(defval);
480			if (errno == 0) {
481				trans_provider = tmp_str;
482			}
483		}
484		/* close defaults file */
485		(void) defopen(NULL);
486	}
487}
488#ifdef lint
489int
490sndrd_lintmain(int ac, char **av)
491#else
492int
493main(int ac, char **av)
494#endif
495{
496	const char *dir = "/";
497	int allflag = 0;
498	int pid;
499	int i, rc;
500	struct protob *protobp0, *protobp;
501	char **providerp;
502	char *required;
503#if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
504	int maxservers;
505#endif
506
507	(void) setlocale(LC_ALL, "");
508#ifdef	__NCALL__
509	(void) textdomain("ncall");
510#else
511	(void) textdomain("rdc");
512#endif
513
514	progname = basename(av[0]);
515
516#ifdef	__NCALL__
517	rc = ncall_check_release(&required);
518#else
519	rc = rdc_check_release(&required);
520#endif
521	if (rc < 0) {
522		(void) fprintf(stderr,
523		    gettext("%s: unable to determine the current "
524		    "Solaris release: %s\n"), progname, strerror(errno));
525		exit(1);
526	} else if (rc == FALSE) {
527		(void) fprintf(stderr,
528		    gettext("%s: incorrect Solaris release (requires %s)\n"),
529		    progname, required);
530		exit(1);
531	}
532
533	openlog(progname, LOG_PID|LOG_CONS, LOG_DAEMON);
534	read_default();
535
536	/*
537	 * Usage: <progname> [-c <number of threads>] [-t protocol] \
538	 *		[-d] [-l <listen backlog>]
539	 */
540	while ((i = getopt(ac, av, "ac:t:dl:")) != EOF) {
541		switch (i) {
542			case 'a':
543				allflag = 1;
544				break;
545			case 'c':
546				max_conns_allowed = atoi(optarg);
547				if (max_conns_allowed <= 0)
548					max_conns_allowed = 16;
549				break;
550
551			case 'd':
552				debugflg++;
553				break;
554
555			case 't':
556				trans_provider = optarg;
557				break;
558
559			case 'l':
560				listen_backlog = atoi(optarg);
561				if (listen_backlog < 0)
562					listen_backlog = 32;
563				break;
564
565			default:
566				syslog(LOG_ERR,
567				    "Usage: %s [-c <number of threads>] "
568				    "[-d] [-t protocol] "
569				    "[-l <listen backlog>]\n", progname);
570				exit(1);
571				break;
572		}
573	}
574
575	if (chroot(dir) < 0) {
576		syslog(LOG_ERR, "chroot failed: %m");
577		exit(1);
578	}
579
580	if (chdir(dir) < 0) {
581		syslog(LOG_ERR, "chdir failed: %m");
582		exit(1);
583	}
584
585	if (!debugflg) {
586		pid = fork();
587		if (pid < 0) {
588			syslog(LOG_ERR, "Fork failed\n");
589			exit(1);
590		}
591		if (pid != 0)
592			exit(0);
593
594		/*
595		 * Close existing file descriptors, open "/dev/null" as
596		 * standard input, output, and error, and detach from
597		 * controlling terminal.
598		 */
599#if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
600		/* use closefrom(3C) from PSARC/2000/193 when possible */
601		closefrom(0);
602#else
603		for (i = 0; i < _NFILE; i++)
604			(void) close(i);
605#endif
606		(void) open("/dev/null", O_RDONLY);
607		(void) open("/dev/null", O_WRONLY);
608		(void) dup(1);
609		(void) setsid();
610
611		/*
612		 * ignore all signals apart from SIGTERM.
613		 */
614		for (i = 1; i < _sys_nsig; i++)
615			(void) sigset(i, SIG_IGN);
616
617		(void) sigset(SIGTERM, SIG_DFL);
618	}
619
620#if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
621	/*
622	 * Set up kernel RPC thread pool for the SNDR/ncall-ip server.
623	 */
624	maxservers = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
625	if (sndrsvcpool(maxservers)) {
626		(void) syslog(LOG_ERR,
627		    "Can't set up kernel %s service: %m. Exiting", progname);
628		exit(1);
629	}
630
631	/*
632	 * Set up blocked thread to do LWP creation on behalf of the kernel.
633	 */
634	if (svcwait(RDC_SVCPOOL_ID)) {
635		(void) syslog(LOG_ERR,
636		    "Can't set up %s pool creator: %m, Exiting", progname);
637		exit(1);
638	}
639#endif
640
641	/*
642	 * Build a protocol block list for registration.
643	 */
644	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
645	protobp->serv = RDC_SVC_NAME;
646	protobp->versmin = RDC_VERS_MIN;
647	protobp->versmax = RDC_VERS_MAX;
648	protobp->program = RDC_PROGRAM;
649	protobp->next = (struct protob *)NULL;
650
651	if (allflag) {
652		if (do_all(protobp0, rdcsvc) == -1)
653			exit(1);
654	} else if (trans_provider)
655		do_one(trans_provider, NULL, protobp0, rdcsvc);
656	else {
657		for (providerp = defaultproviders;
658		    *providerp != NULL; providerp++) {
659			trans_provider = *providerp;
660			do_one(trans_provider, NULL, protobp0, rdcsvc);
661		}
662	}
663
664done:
665	free(protobp);
666
667	end_listen_fds = num_fds;
668	/*
669	 * Poll for non-data control events on the transport descriptors.
670	 */
671	poll_for_action();
672
673	syslog(LOG_ERR, "%s fatal server error\n", progname);
674
675	return (-1);
676}
677
678static int
679reuseaddr(int fd)
680{
681	struct t_optmgmt req, resp;
682	struct opthdr *opt;
683	char reqbuf[128];
684	int *ip;
685
686	/* LINTED pointer alignment */
687	opt = (struct opthdr *)reqbuf;
688	opt->level = SOL_SOCKET;
689	opt->name = SO_REUSEADDR;
690	opt->len = sizeof (int);
691
692	/* LINTED pointer alignment */
693	ip = (int *)&reqbuf[sizeof (struct opthdr)];
694	*ip = 1;
695
696	req.flags = T_NEGOTIATE;
697	req.opt.len = sizeof (struct opthdr) + opt->len;
698	req.opt.buf = (char *)opt;
699
700	resp.flags = 0;
701	resp.opt.buf = reqbuf;
702	resp.opt.maxlen = sizeof (reqbuf);
703
704	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
705		if (t_errno == TSYSERR) {
706			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %m\n");
707		} else {
708			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %s\n",
709			    t_errlist[t_errno]);
710		}
711		return (-1);
712	}
713	return (0);
714}
715
716/*
717 * poll on the open transport descriptors for events and errors.
718 */
719void
720poll_for_action(void)
721{
722	int nfds;
723	int i;
724
725	/*
726	 * Keep polling until all transports have been closed. When this
727	 * happens, we return.
728	 */
729	while ((int)num_fds > 0) {
730		nfds = poll(poll_array, num_fds, INFTIM);
731		switch (nfds) {
732		case 0:
733			continue;
734
735		case -1:
736			/*
737			 * Some errors from poll could be
738			 * due to temporary conditions, and we try to
739			 * be robust in the face of them. Other
740			 * errors (should never happen in theory)
741			 * are fatal (eg. EINVAL, EFAULT).
742			 */
743			switch (errno) {
744			case EINTR:
745			    continue;
746
747			case EAGAIN:
748			case ENOMEM:
749				(void) sleep(10);
750				continue;
751
752			default:
753				(void) syslog(LOG_ERR,
754				    "poll failed: %m. Exiting");
755				exit(1);
756			}
757		default:
758			break;
759		}
760
761		/*
762		 * Go through the poll list looking for events.
763		 */
764		for (i = 0; i < num_fds && nfds > 0; i++) {
765			if (poll_array[i].revents) {
766				nfds--;
767				/*
768				 * We have a message, so try to read it.
769				 * Record the error return in errno,
770				 * so that syslog(LOG_ERR, "...%m")
771				 * dumps the corresponding error string.
772				 */
773				if (conn_polled[i].nc.nc_semantics ==
774				    NC_TPI_CLTS) {
775					errno = do_poll_clts_action(
776					    poll_array[i].fd, i);
777				} else {
778					errno = do_poll_cots_action(
779					    poll_array[i].fd, i);
780				}
781
782				if (errno == 0)
783					continue;
784				/*
785				 * Most returned error codes mean that there is
786				 * fatal condition which we can only deal with
787				 * by closing the transport.
788				 */
789				if (errno != EAGAIN && errno != ENOMEM) {
790					(void) syslog(LOG_ERR,
791					    "Error (%m) reading descriptor %d"
792					    "/transport %s. Closing it.",
793					    poll_array[i].fd,
794					    conn_polled[i].nc.nc_proto);
795					(void) t_close(poll_array[i].fd);
796					remove_from_poll_list(poll_array[i].fd);
797				} else if (errno == ENOMEM)
798					(void) sleep(5);
799			}
800		}
801	}
802
803	(void) syslog(LOG_ERR,
804	    "All transports have been closed with errors. Exiting.");
805}
806
807/*
808 * Allocate poll/transport array entries for this descriptor.
809 */
810static void
811add_to_poll_list(int fd, struct netconfig *nconf)
812{
813	static int poll_array_size = 0;
814
815	/*
816	 * If the arrays are full, allocate new ones.
817	 */
818	if (num_fds == poll_array_size) {
819		struct pollfd *tpa;
820		struct conn_entry *tnp;
821
822		if (poll_array_size != 0) {
823			tpa = poll_array;
824			tnp = conn_polled;
825		} else
826			tpa = (struct pollfd *)0;
827
828		poll_array_size += POLL_ARRAY_INC_SIZE;
829
830		/*
831		 * Allocate new arrays.
832		 */
833		poll_array = (struct pollfd *)
834		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
835		conn_polled = (struct conn_entry *)
836		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
837		if (poll_array == (struct pollfd *)NULL ||
838		    conn_polled == (struct conn_entry *)NULL) {
839			syslog(LOG_ERR, "malloc failed for poll array");
840			exit(1);
841		}
842
843		/*
844		 * Copy the data of the old ones into new arrays, and
845		 * free the old ones.
846		 * num_fds is guaranteed to be less than
847		 * poll_array_size, so this memcpy is safe.
848		 */
849		if (tpa) {
850			(void) memcpy((void *)poll_array, (void *)tpa,
851				num_fds * sizeof (struct pollfd));
852			(void) memcpy((void *)conn_polled, (void *)tnp,
853				num_fds * sizeof (struct conn_entry));
854			free((void *)tpa);
855			free((void *)tnp);
856		}
857	}
858
859	/*
860	 * Set the descriptor and event list. All possible events are
861	 * polled for.
862	 */
863	poll_array[num_fds].fd = fd;
864	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
865
866	/*
867	 * Copy the transport data over too.
868	 */
869	conn_polled[num_fds].nc = *nconf;	/* structure copy */
870	conn_polled[num_fds].closing = 0;
871
872	/*
873	 * Set the descriptor to non-blocking. Avoids a race
874	 * between data arriving on the stream and then having it
875	 * flushed before we can read it.
876	 */
877	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
878		(void) syslog(LOG_ERR,
879		    "fcntl(file desc. %d/transport %s, F_SETFL, "
880		    "O_NONBLOCK): %m. Exiting",
881		    num_fds, nconf->nc_proto);
882		exit(1);
883	}
884
885	/*
886	 * Count this descriptor.
887	 */
888	++num_fds;
889}
890
891static void
892remove_from_poll_list(int fd)
893{
894	int i;
895	int num_to_copy;
896
897	for (i = 0; i < num_fds; i++) {
898		if (poll_array[i].fd == fd) {
899			--num_fds;
900			num_to_copy = num_fds - i;
901			(void) memcpy((void *)&poll_array[i],
902			    (void *)&poll_array[i+1],
903			    num_to_copy * sizeof (struct pollfd));
904			(void) memset((void *)&poll_array[num_fds], 0,
905			    sizeof (struct pollfd));
906			(void) memcpy((void *)&conn_polled[i],
907			    (void *)&conn_polled[i+1],
908			    num_to_copy * sizeof (struct conn_entry));
909			(void) memset((void *)&conn_polled[num_fds], 0,
910			    sizeof (struct conn_entry));
911			return;
912		}
913	}
914	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
915
916}
917
918static void
919conn_close_oldest(void)
920{
921	int fd;
922	int i1;
923
924	/*
925	 * Find the oldest connection that is not already in the
926	 * process of shutting down.
927	 */
928	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
929		if (i1 >= num_fds)
930			return;
931		if (conn_polled[i1].closing == 0)
932			break;
933	}
934#ifdef DEBUG
935	(void) printf("too many connections (%d), releasing oldest (%d)\n",
936	    num_conns, poll_array[i1].fd);
937#else
938	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
939	    num_conns, poll_array[i1].fd);
940#endif
941	fd = poll_array[i1].fd;
942	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
943		/*
944		 * For politeness, send a T_DISCON_REQ to the transport
945		 * provider.  We close the stream anyway.
946		 */
947		(void) t_snddis(fd, (struct t_call *)0);
948		num_conns--;
949		remove_from_poll_list(fd);
950		(void) t_close(fd);
951	} else {
952		/*
953		 * For orderly release, we do not close the stream
954		 * until the T_ORDREL_IND arrives to complete
955		 * the handshake.
956		 */
957		if (t_sndrel(fd) == 0)
958			conn_polled[i1].closing = 1;
959	}
960}
961
962static boolean_t
963conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
964{
965	struct conn_ind	*conn;
966	struct conn_ind	*next_conn;
967
968	conn = (struct conn_ind *)malloc(sizeof (*conn));
969	if (conn == NULL) {
970		syslog(LOG_ERR, "malloc for listen indication failed");
971		return (FALSE);
972	}
973
974	/* LINTED pointer alignment */
975	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
976	if (conn->conn_call == NULL) {
977		free((char *)conn);
978		rdcd_log_tli_error("t_alloc", fd, nconf);
979		return (FALSE);
980	}
981
982	if (t_listen(fd, conn->conn_call) == -1) {
983		rdcd_log_tli_error("t_listen", fd, nconf);
984		(void) t_free((char *)conn->conn_call, T_CALL);
985		free((char *)conn);
986		return (FALSE);
987	}
988
989	if (conn->conn_call->udata.len > 0) {
990		syslog(LOG_WARNING,
991		    "rejecting inbound connection(%s) with %d bytes "
992		    "of connect data",
993		    nconf->nc_proto, conn->conn_call->udata.len);
994
995		conn->conn_call->udata.len = 0;
996		(void) t_snddis(fd, conn->conn_call);
997		(void) t_free((char *)conn->conn_call, T_CALL);
998		free((char *)conn);
999		return (FALSE);
1000	}
1001
1002	if ((next_conn = *connp) != NULL) {
1003		next_conn->conn_prev->conn_next = conn;
1004		conn->conn_next = next_conn;
1005		conn->conn_prev = next_conn->conn_prev;
1006		next_conn->conn_prev = conn;
1007	} else {
1008		conn->conn_next = conn;
1009		conn->conn_prev = conn;
1010		*connp = conn;
1011	}
1012	return (TRUE);
1013}
1014
1015static int
1016discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1017{
1018	struct conn_ind	*conn;
1019	struct t_discon	discon;
1020
1021	discon.udata.buf = (char *)0;
1022	discon.udata.maxlen = 0;
1023	if (t_rcvdis(fd, &discon) == -1) {
1024		rdcd_log_tli_error("t_rcvdis", fd, nconf);
1025		return (-1);
1026	}
1027
1028	conn = *connp;
1029	if (conn == NULL)
1030		return (0);
1031
1032	do {
1033		if (conn->conn_call->sequence == discon.sequence) {
1034			if (conn->conn_next == conn)
1035				*connp = (struct conn_ind *)0;
1036			else {
1037				if (conn == *connp) {
1038					*connp = conn->conn_next;
1039				}
1040				conn->conn_next->conn_prev = conn->conn_prev;
1041				conn->conn_prev->conn_next = conn->conn_next;
1042			}
1043			free((char *)conn);
1044			break;
1045		}
1046		conn = conn->conn_next;
1047	} while (conn != *connp);
1048
1049	return (0);
1050}
1051
1052static void
1053cots_listen_event(int fd, int conn_index)
1054{
1055	struct t_call *call;
1056	struct conn_ind	*conn;
1057	struct conn_ind	*conn_head;
1058	int event;
1059	struct netconfig *nconf = &conn_polled[conn_index].nc;
1060	int new_fd;
1061	struct netbuf addrmask;
1062	int ret = 0;
1063
1064	conn_head = (struct conn_ind *)0;
1065	(void) conn_get(fd, nconf, &conn_head);
1066
1067	while ((conn = conn_head) != NULL) {
1068		conn_head = conn->conn_next;
1069		if (conn_head == conn)
1070			conn_head = (struct conn_ind *)0;
1071		else {
1072			conn_head->conn_prev = conn->conn_prev;
1073			conn->conn_prev->conn_next = conn_head;
1074		}
1075		call = conn->conn_call;
1076		free((char *)conn);
1077
1078		/*
1079		 * If we have already accepted the maximum number of
1080		 * connections allowed on the command line, then drop
1081		 * the oldest connection (for any protocol) before
1082		 * accepting the new connection.  Unless explicitly
1083		 * set on the command line, max_conns_allowed is -1.
1084		 */
1085		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1086			conn_close_oldest();
1087
1088		/*
1089		 * Create a new transport endpoint for the same proto as
1090		 * the listener.
1091		 */
1092		new_fd = rdc_transport_open(nconf);
1093		if (new_fd == -1) {
1094			call->udata.len = 0;
1095			(void) t_snddis(fd, call);
1096			(void) t_free((char *)call, T_CALL);
1097			syslog(LOG_ERR, "Cannot establish transport over %s",
1098			    nconf->nc_device);
1099			continue;
1100		}
1101
1102		/* Bind to a generic address/port for the accepting stream. */
1103		if (t_bind(new_fd, (struct t_bind *)NULL,
1104		    (struct t_bind *)NULL) == -1) {
1105			rdcd_log_tli_error("t_bind", new_fd, nconf);
1106			call->udata.len = 0;
1107			(void) t_snddis(fd, call);
1108			(void) t_free((char *)call, T_CALL);
1109			(void) t_close(new_fd);
1110			continue;
1111		}
1112
1113		while (t_accept(fd, new_fd, call) == -1) {
1114			if (t_errno != TLOOK) {
1115				rdcd_log_tli_error("t_accept", fd, nconf);
1116				call->udata.len = 0;
1117				(void) t_snddis(fd, call);
1118				(void) t_free((char *)call, T_CALL);
1119				(void) t_close(new_fd);
1120				goto do_next_conn;
1121			}
1122			while (event = t_look(fd)) {
1123				switch (event) {
1124				case T_LISTEN:
1125#ifdef DEBUG
1126					(void) printf(
1127"cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1128#endif
1129					(void) conn_get(fd, nconf, &conn_head);
1130					continue;
1131
1132				case T_DISCONNECT:
1133#ifdef DEBUG
1134					(void) printf(
1135	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1136						nconf->nc_proto);
1137#endif
1138					(void) discon_get(fd, nconf,
1139					    &conn_head);
1140					continue;
1141
1142				default:
1143					syslog(LOG_ERR,
1144					    "unexpected event 0x%x during "
1145					    "accept processing (%s)",
1146					    event, nconf->nc_proto);
1147					call->udata.len = 0;
1148					(void) t_snddis(fd, call);
1149					(void) t_free((char *)call, T_CALL);
1150					(void) t_close(new_fd);
1151					goto do_next_conn;
1152				}
1153			}
1154		}
1155
1156		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1157			(void) syslog(LOG_ERR, "Cannot set address mask for %s",
1158			    nconf->nc_netid);
1159			return;
1160		}
1161
1162		/* Tell KRPC about the new stream. */
1163		ret = (*Mysvc)(new_fd, addrmask, nconf);
1164		if (ret < 0) {
1165			syslog(LOG_ERR,
1166			    "unable to register with kernel rpc: %m");
1167			free(addrmask.buf);
1168			(void) t_snddis(new_fd, (struct t_call *)0);
1169			(void) t_free((char *)call, T_CALL);
1170			(void) t_close(new_fd);
1171			goto do_next_conn;
1172		}
1173
1174		free(addrmask.buf);
1175		(void) t_free((char *)call, T_CALL);
1176
1177		/*
1178		 * Poll on the new descriptor so that we get disconnect
1179		 * and orderly release indications.
1180		 */
1181		num_conns++;
1182		add_to_poll_list(new_fd, nconf);
1183
1184		/* Reset nconf in case it has been moved. */
1185		nconf = &conn_polled[conn_index].nc;
1186do_next_conn:;
1187	}
1188}
1189
1190static int
1191do_poll_cots_action(int fd, int conn_index)
1192{
1193	char buf[256];
1194	int event;
1195	int i1;
1196	int flags;
1197	struct conn_entry *connent = &conn_polled[conn_index];
1198	struct netconfig *nconf = &(connent->nc);
1199	const char *errorstr;
1200
1201	while (event = t_look(fd)) {
1202		switch (event) {
1203		case T_LISTEN:
1204#ifdef DEBUG
1205	(void) printf("do_poll_cots_action(%s, %d): T_LISTEN event\n",
1206	    nconf->nc_proto, fd);
1207#endif
1208			cots_listen_event(fd, conn_index);
1209			break;
1210
1211		case T_DATA:
1212#ifdef DEBUG
1213	(void) printf("do_poll_cots_action(%d, %s): T_DATA event\n",
1214		fd, nconf->nc_proto);
1215#endif
1216			/*
1217			 * Receive a private notification from CONS rpcmod.
1218			 */
1219			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1220			if (i1 == -1) {
1221				syslog(LOG_ERR, "t_rcv failed");
1222				break;
1223			}
1224			if (i1 < sizeof (int))
1225				break;
1226			i1 = BE32_TO_U32(buf);
1227			if (i1 == 1 || i1 == 2) {
1228				/*
1229				 * This connection has been idle for too long,
1230				 * so release it as politely as we can.  If we
1231				 * have already initiated an orderly release
1232				 * and we get notified that the stream is
1233				 * still idle, pull the plug.  This prevents
1234				 * hung connections from continuing to consume
1235				 * resources.
1236				 */
1237#ifdef DEBUG
1238(void) printf("do_poll_cots_action(%s, %d): ", nconf->nc_proto, fd);
1239(void) printf("initiating orderly release of idle connection\n");
1240#endif
1241				if (nconf->nc_semantics == NC_TPI_COTS ||
1242				    connent->closing != 0) {
1243					(void) t_snddis(fd, (struct t_call *)0);
1244					goto fdclose;
1245				}
1246				/*
1247				 * For NC_TPI_COTS_ORD, the stream is closed
1248				 * and removed from the poll list when the
1249				 * T_ORDREL is received from the provider.  We
1250				 * don't wait for it here because it may take
1251				 * a while for the transport to shut down.
1252				 */
1253				if (t_sndrel(fd) == -1) {
1254					syslog(LOG_ERR,
1255					"unable to send orderly release %m");
1256				}
1257				connent->closing = 1;
1258			} else
1259				syslog(LOG_ERR,
1260				    "unexpected event from CONS rpcmod %d", i1);
1261			break;
1262
1263		case T_ORDREL:
1264#ifdef DEBUG
1265	(void) printf("do_poll_cots_action(%s, %d): T_ORDREL event\n",
1266		nconf->nc_proto, fd);
1267#endif
1268			/* Perform an orderly release. */
1269			if (t_rcvrel(fd) == 0) {
1270				/* T_ORDREL on listen fd's should be ignored */
1271				if (!is_listen_fd_index(fd)) {
1272					(void) t_sndrel(fd);
1273					goto fdclose;
1274				}
1275				break;
1276
1277			} else if (t_errno == TLOOK) {
1278				break;
1279			} else {
1280				rdcd_log_tli_error("t_rcvrel", fd, nconf);
1281				/*
1282				 * check to make sure we do not close
1283				 * listen fd
1284				 */
1285				if (!is_listen_fd_index(fd))
1286					break;
1287				else
1288					goto fdclose;
1289			}
1290
1291		case T_DISCONNECT:
1292#ifdef DEBUG
1293(void) printf("do_poll_cots_action(%s, %d): T_DISCONNECT event\n",
1294nconf->nc_proto, fd);
1295#endif
1296			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1297				rdcd_log_tli_error("t_rcvdis", fd, nconf);
1298
1299			/*
1300			 * T_DISCONNECT on listen fd's should be ignored.
1301			 */
1302			if (!is_listen_fd_index(fd))
1303				break;
1304			else
1305				goto fdclose;
1306
1307		case T_ERROR:
1308		default:
1309			if (event == T_ERROR || t_errno == TSYSERR) {
1310			    if ((errorstr = strerror(errno)) == NULL) {
1311				(void) snprintf(buf, sizeof (buf),
1312				    "Unknown error num %d", errno);
1313				errorstr = (const char *)buf;
1314			    }
1315			} else if (event == -1)
1316				errorstr = t_strerror(t_errno);
1317			else
1318				errorstr = "";
1319#ifdef DEBUG
1320			syslog(LOG_ERR,
1321			    "unexpected TLI event (0x%x) on "
1322			    "connection-oriented transport(%s, %d):%s",
1323			    event, nconf->nc_proto, fd, errorstr);
1324#endif
1325
1326fdclose:
1327			num_conns--;
1328			remove_from_poll_list(fd);
1329			(void) t_close(fd);
1330			return (0);
1331		}
1332	}
1333
1334	return (0);
1335}
1336
1337
1338/*
1339 * Called to read and interpret the event on a connectionless descriptor.
1340 * Returns 0 if successful, or a UNIX error code if failure.
1341 */
1342static int
1343do_poll_clts_action(int fd, int conn_index)
1344{
1345	int error;
1346	int ret;
1347	int flags;
1348	struct netconfig *nconf = &conn_polled[conn_index].nc;
1349	static struct t_unitdata *unitdata = NULL;
1350	static struct t_uderr *uderr = NULL;
1351	static int oldfd = -1;
1352	struct nd_hostservlist *host = NULL;
1353	struct strbuf ctl[1], data[1];
1354	/*
1355	 * We just need to have some space to consume the
1356	 * message in the event we can't use the TLI interface to do the
1357	 * job.
1358	 *
1359	 * We flush the message using getmsg(). For the control part
1360	 * we allocate enough for any TPI header plus 32 bytes for address
1361	 * and options. For the data part, there is nothing magic about
1362	 * the size of the array, but 256 bytes is probably better than
1363	 * 1 byte, and we don't expect any data portion anyway.
1364	 *
1365	 * If the array sizes are too small, we handle this because getmsg()
1366	 * (called to consume the message) will return MOREDATA|MORECTL.
1367	 * Thus we just call getmsg() until it's read the message.
1368	 */
1369	char ctlbuf[sizeof (union T_primitives) + 32];
1370	char databuf[256];
1371
1372	/*
1373	 * If this is the same descriptor as the last time
1374	 * do_poll_clts_action was called, we can save some
1375	 * de-allocation and allocation.
1376	 */
1377	if (oldfd != fd) {
1378		oldfd = fd;
1379
1380		if (unitdata) {
1381			(void) t_free((char *)unitdata, T_UNITDATA);
1382			unitdata = NULL;
1383		}
1384		if (uderr) {
1385			(void) t_free((char *)uderr, T_UDERROR);
1386			uderr = NULL;
1387		}
1388	}
1389
1390	/*
1391	 * Allocate a unitdata structure for receiving the event.
1392	 */
1393	if (unitdata == NULL) {
1394		/* LINTED pointer alignment */
1395		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
1396		if (unitdata == NULL) {
1397			if (t_errno == TSYSERR) {
1398				/*
1399				 * Save the error code across
1400				 * syslog(), just in case
1401				 * syslog() gets its own error
1402				 * and therefore overwrites errno.
1403				 */
1404				error = errno;
1405				(void) syslog(LOG_ERR,
1406	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
1407					fd, nconf->nc_proto);
1408				return (error);
1409			}
1410			(void) syslog(LOG_ERR,
1411"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
1412					fd, nconf->nc_proto, t_errno);
1413			goto flush_it;
1414		}
1415	}
1416
1417try_again:
1418	flags = 0;
1419
1420	/*
1421	 * The idea is we wait for T_UNITDATA_IND's. Of course,
1422	 * we don't get any, because rpcmod filters them out.
1423	 * However, we need to call t_rcvudata() to let TLI
1424	 * tell us we have a T_UDERROR_IND.
1425	 *
1426	 * algorithm is:
1427	 * 	t_rcvudata(), expecting TLOOK.
1428	 * 	t_look(), expecting T_UDERR.
1429	 * 	t_rcvuderr(), expecting success (0).
1430	 * 	expand destination address into ASCII,
1431	 *	and dump it.
1432	 */
1433
1434	ret = t_rcvudata(fd, unitdata, &flags);
1435	if (ret == 0 || t_errno == TBUFOVFLW) {
1436		(void) syslog(LOG_WARNING,
1437"t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
1438			fd, nconf->nc_proto, unitdata->udata.len);
1439
1440		/*
1441		 * Even though we don't expect any data, in case we do,
1442		 * keep reading until there is no more.
1443		 */
1444		if (flags & T_MORE)
1445			goto try_again;
1446
1447		return (0);
1448	}
1449
1450	switch (t_errno) {
1451	case TNODATA:
1452		return (0);
1453	case TSYSERR:
1454		/*
1455		 * System errors are returned to caller.
1456		 * Save the error code across
1457		 * syslog(), just in case
1458		 * syslog() gets its own error
1459		 * and therefore overwrites errno.
1460		 */
1461		error = errno;
1462		(void) syslog(LOG_ERR,
1463			"t_rcvudata(file descriptor %d/transport %s) %m",
1464			fd, nconf->nc_proto);
1465		return (error);
1466	case TLOOK:
1467		break;
1468	default:
1469		(void) syslog(LOG_ERR,
1470		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1471			fd, nconf->nc_proto, t_errno);
1472		goto flush_it;
1473	}
1474
1475	ret = t_look(fd);
1476	switch (ret) {
1477	case 0:
1478		return (0);
1479	case -1:
1480		/*
1481		 * System errors are returned to caller.
1482		 */
1483		if (t_errno == TSYSERR) {
1484			/*
1485			 * Save the error code across
1486			 * syslog(), just in case
1487			 * syslog() gets its own error
1488			 * and therefore overwrites errno.
1489			 */
1490			error = errno;
1491			(void) syslog(LOG_ERR,
1492				"t_look(file descriptor %d/transport %s) %m",
1493				fd, nconf->nc_proto);
1494			return (error);
1495		}
1496		(void) syslog(LOG_ERR,
1497			"t_look(file descriptor %d/transport %s) TLI error %d",
1498			fd, nconf->nc_proto, t_errno);
1499		goto flush_it;
1500	case T_UDERR:
1501		break;
1502	default:
1503		(void) syslog(LOG_WARNING,
1504	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1505			fd, nconf->nc_proto, ret, T_UDERR);
1506	}
1507
1508	if (uderr == NULL) {
1509		/* LINTED pointer alignment */
1510		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1511		if (uderr == NULL) {
1512			if (t_errno == TSYSERR) {
1513				/*
1514				 * Save the error code across
1515				 * syslog(), just in case
1516				 * syslog() gets its own error
1517				 * and therefore overwrites errno.
1518				 */
1519				error = errno;
1520				(void) syslog(LOG_ERR,
1521	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1522					fd, nconf->nc_proto);
1523				return (error);
1524			}
1525			(void) syslog(LOG_ERR,
1526"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1527				fd, nconf->nc_proto, t_errno);
1528			goto flush_it;
1529		}
1530	}
1531
1532	ret = t_rcvuderr(fd, uderr);
1533	if (ret == 0) {
1534
1535		/*
1536		 * Save the datagram error in errno, so that the
1537		 * %m argument to syslog picks up the error string.
1538		 */
1539		errno = uderr->error;
1540
1541		/*
1542		 * Log the datagram error, then log the host that
1543		 * probably triggerred. Cannot log both in the
1544		 * same transaction because of packet size limitations
1545		 * in /dev/log.
1546		 */
1547		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1548		    "%s response over <file descriptor %d/transport %s> "
1549		    "generated error: %m",
1550		    progname, fd, nconf->nc_proto);
1551
1552		/*
1553		 * Try to map the client's address back to a
1554		 * name.
1555		 */
1556		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1557		if (ret != -1 && host && host->h_cnt > 0 &&
1558		    host->h_hostservs) {
1559		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1560		    "Bad %s response was sent to client with "
1561		    "host name: %s; service port: %s",
1562		    progname, host->h_hostservs->h_host,
1563		    host->h_hostservs->h_serv);
1564		} else {
1565			int i, j;
1566			char *buf;
1567			char *hex = "0123456789abcdef";
1568
1569			/*
1570			 * Mapping failed, print the whole thing
1571			 * in ASCII hex.
1572			 */
1573			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1574			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1575				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1576				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1577			}
1578			buf[j] = '\0';
1579			(void) syslog((errno == ECONNREFUSED) ?
1580			    LOG_DEBUG : LOG_WARNING,
1581			    "Bad %s response was sent to client with "
1582			    "transport address: 0x%s",
1583			    progname, buf);
1584			free((void *)buf);
1585		}
1586
1587		if (ret == 0 && host != NULL)
1588			netdir_free((void *)host, ND_HOSTSERVLIST);
1589		return (0);
1590	}
1591
1592	switch (t_errno) {
1593	case TNOUDERR:
1594		goto flush_it;
1595	case TSYSERR:
1596		/*
1597		 * System errors are returned to caller.
1598		 * Save the error code across
1599		 * syslog(), just in case
1600		 * syslog() gets its own error
1601		 * and therefore overwrites errno.
1602		 */
1603		error = errno;
1604		(void) syslog(LOG_ERR,
1605			"t_rcvuderr(file descriptor %d/transport %s) %m",
1606			fd, nconf->nc_proto);
1607		return (error);
1608	default:
1609		(void) syslog(LOG_ERR,
1610		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1611			fd, nconf->nc_proto, t_errno);
1612		goto flush_it;
1613	}
1614
1615flush_it:
1616	/*
1617	 * If we get here, then we could not cope with whatever message
1618	 * we attempted to read, so flush it. If we did read a message,
1619	 * and one isn't present, that is all right, because fd is in
1620	 * nonblocking mode.
1621	 */
1622	(void) syslog(LOG_ERR,
1623	"Flushing one input message from <file descriptor %d/transport %s>",
1624		fd, nconf->nc_proto);
1625
1626	/*
1627	 * Read and discard the message. Do this this until there is
1628	 * no more control/data in the message or until we get an error.
1629	 */
1630	do {
1631		ctl->maxlen = sizeof (ctlbuf);
1632		ctl->buf = ctlbuf;
1633		data->maxlen = sizeof (databuf);
1634		data->buf = databuf;
1635		flags = 0;
1636		ret = getmsg(fd, ctl, data, &flags);
1637		if (ret == -1)
1638			return (errno);
1639	} while (ret != 0);
1640
1641	return (0);
1642}
1643
1644/*
1645 * Establish service thread.
1646 */
1647static int
1648rdcsvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
1649{
1650#ifdef	__NCALL__
1651	struct ncall_svc_args nsa;
1652#else	/* !__NCALL__ */
1653	struct rdc_svc_args nsa;
1654	_rdc_ioctl_t rdc_args = { 0, };
1655#endif	/* __NCALL__ */
1656
1657	nsa.fd = fd;
1658	nsa.nthr = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
1659	(void) strncpy(nsa.netid, nconf->nc_netid, sizeof (nsa.netid));
1660	nsa.addrmask.len = addrmask.len;
1661	nsa.addrmask.maxlen = addrmask.maxlen;
1662	nsa.addrmask.buf = addrmask.buf;
1663
1664#ifdef	__NCALL__
1665	return (sndrsys(NC_IOC_SERVER, &nsa));
1666#else	/* !__NCALL__ */
1667	rdc_args.arg0 = (long)&nsa;
1668	return (sndrsys(RDC_ENABLE_SVR, &rdc_args));
1669#endif	/* __NCALL__ */
1670}
1671
1672
1673
1674static int
1675nofile_increase(int limit)
1676{
1677	struct rlimit rl;
1678
1679	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
1680		syslog(LOG_ERR,
1681		    "nofile_increase() getrlimit of NOFILE failed: %m");
1682		return (-1);
1683	}
1684
1685	if (limit > 0)
1686		rl.rlim_cur = limit;
1687	else
1688		rl.rlim_cur += NOFILE_INC_SIZE;
1689
1690	if (rl.rlim_cur > rl.rlim_max && rl.rlim_max != RLIM_INFINITY)
1691		rl.rlim_max = rl.rlim_cur;
1692
1693	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
1694		syslog(LOG_ERR,
1695		    "nofile_increase() setrlimit of NOFILE to %d failed: %m",
1696		    rl.rlim_cur);
1697		return (-1);
1698	}
1699
1700	return (0);
1701}
1702
1703int
1704rdcd_bindit(struct netconfig *nconf, struct netbuf **addr,
1705    struct nd_hostserv *hs, int backlog)
1706{
1707	int fd;
1708	struct t_bind *ntb;
1709	struct t_bind tb;
1710	struct nd_addrlist *addrlist;
1711	struct t_optmgmt req, resp;
1712	struct opthdr *opt;
1713	char reqbuf[128];
1714
1715	if ((fd = rdc_transport_open(nconf)) == -1) {
1716		syslog(LOG_ERR, "cannot establish transport service over %s",
1717		    nconf->nc_device);
1718		return (-1);
1719	}
1720
1721	addrlist = (struct nd_addrlist *)NULL;
1722	if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
1723		if (strncmp(nconf->nc_netid, "udp", 3) != 0) {
1724			syslog(LOG_ERR, "Cannot get address for transport "
1725			    "%s host %s service %s",
1726			    nconf->nc_netid, hs->h_host, hs->h_serv);
1727		}
1728		(void) t_close(fd);
1729		return (-1);
1730	}
1731
1732	if (strcmp(nconf->nc_proto, "tcp") == 0) {
1733		/*
1734		 * If we're running over TCP, then set the
1735		 * SO_REUSEADDR option so that we can bind
1736		 * to our preferred address even if previously
1737		 * left connections exist in FIN_WAIT states.
1738		 * This is somewhat bogus, but otherwise you have
1739		 * to wait 2 minutes to restart after killing it.
1740		 */
1741		if (reuseaddr(fd) == -1) {
1742			syslog(LOG_WARNING,
1743			    "couldn't set SO_REUSEADDR option on transport");
1744		}
1745	}
1746
1747	if (nconf->nc_semantics == NC_TPI_CLTS)
1748		tb.qlen = 0;
1749	else
1750		tb.qlen = backlog;
1751
1752	/* LINTED pointer alignment */
1753	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
1754	if (ntb == (struct t_bind *)NULL) {
1755		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
1756		(void) t_close(fd);
1757		netdir_free((void *)addrlist, ND_ADDRLIST);
1758		return (-1);
1759	}
1760
1761	tb.addr = *(addrlist->n_addrs);		/* structure copy */
1762
1763	if (t_bind(fd, &tb, ntb) == -1) {
1764		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
1765		(void) t_free((char *)ntb, T_BIND);
1766		netdir_free((void *)addrlist, ND_ADDRLIST);
1767		(void) t_close(fd);
1768		return (-1);
1769	}
1770
1771	/* make sure we bound to the right address */
1772	if (tb.addr.len != ntb->addr.len ||
1773	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0) {
1774		syslog(LOG_ERR, "t_bind to wrong address");
1775		(void) t_free((char *)ntb, T_BIND);
1776		netdir_free((void *)addrlist, ND_ADDRLIST);
1777		(void) t_close(fd);
1778		return (-1);
1779	}
1780
1781	*addr = &ntb->addr;
1782	netdir_free((void *)addrlist, ND_ADDRLIST);
1783
1784	if (strcmp(nconf->nc_proto, "tcp") == 0 ||
1785	    strcmp(nconf->nc_proto, "tcp6") == 0) {
1786		/*
1787		 * Disable the Nagle algorithm on TCP connections.
1788		 * Connections accepted from this listener will
1789		 * inherit the listener options.
1790		 */
1791
1792		/* LINTED pointer alignment */
1793		opt = (struct opthdr *)reqbuf;
1794		opt->level = IPPROTO_TCP;
1795		opt->name = TCP_NODELAY;
1796		opt->len = sizeof (int);
1797
1798		/* LINTED pointer alignment */
1799		*(int *)((char *)opt + sizeof (*opt)) = 1;
1800
1801		req.flags = T_NEGOTIATE;
1802		req.opt.len = sizeof (*opt) + opt->len;
1803		req.opt.buf = (char *)opt;
1804		resp.flags = 0;
1805		resp.opt.buf = reqbuf;
1806		resp.opt.maxlen = sizeof (reqbuf);
1807
1808		if (t_optmgmt(fd, &req, &resp) < 0 ||
1809		    resp.flags != T_SUCCESS) {
1810			syslog(LOG_ERR,
1811	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
1812				nconf->nc_proto, t_errno);
1813		}
1814	}
1815
1816	return (fd);
1817}
1818
1819
1820/* ARGSUSED */
1821static int
1822bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1823		struct netconfig **retnconf)
1824{
1825	struct netconfig *nconf;
1826	NCONF_HANDLE *nc;
1827	struct nd_hostserv hs;
1828
1829	hs.h_host = HOST_SELF;
1830	hs.h_serv = RDC_SERVICE;	/* serv_name_to_port_name(serv); */
1831
1832	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1833		syslog(LOG_ERR, "setnetconfig failed: %m");
1834		return (-1);
1835	}
1836	while (nconf = getnetconfig(nc)) {
1837		if (OK_TPI_TYPE(nconf) &&
1838		    strcmp(nconf->nc_device, provider) == 0) {
1839			*retnconf = nconf;
1840			return (rdcd_bindit(nconf, addr, &hs, listen_backlog));
1841		}
1842	}
1843	(void) endnetconfig(nc);
1844	if ((Is_ipv6present() && (strcmp(provider, "/dev/tcp6") == 0)) ||
1845	    (!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
1846		syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1847		    provider);
1848	return (-1);
1849}
1850
1851
1852/*
1853 * For listen fd's index is always less than end_listen_fds.
1854 * It's value is equal to the number of open file descriptors after the
1855 * last listen end point was opened but before any connection was accepted.
1856 */
1857static int
1858is_listen_fd_index(int index)
1859{
1860	return (index < end_listen_fds);
1861}
1862
1863
1864/*
1865 * Create an address mask appropriate for the transport.
1866 * The mask is used to obtain the host-specific part of
1867 * a network address when comparing addresses.
1868 * For an internet address the host-specific part is just
1869 * the 32 bit IP address and this part of the mask is set
1870 * to all-ones. The port number part of the mask is zeroes.
1871 */
1872static int
1873set_addrmask(int fd, struct netconfig *nconf, struct netbuf *mask)
1874{
1875	struct t_info info;
1876
1877	/*
1878	 * Find the size of the address we need to mask.
1879	 */
1880	if (t_getinfo(fd, &info) < 0) {
1881		t_error("t_getinfo");
1882		return (-1);
1883	}
1884	mask->len = mask->maxlen = info.addr;
1885	if (info.addr <= 0) {
1886		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1887			info.addr);
1888		return (-1);
1889	}
1890
1891	mask->buf = (char *)malloc(mask->len);
1892	if (mask->buf == NULL) {
1893		syslog(LOG_ERR, "set_addrmask: no memory");
1894		return (-1);
1895	}
1896	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1897
1898	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1899		/*
1900		 * Set the mask so that the port is ignored.
1901		 */
1902		/* LINTED pointer alignment */
1903		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1904		    (in_addr_t)~0;
1905		/* LINTED pointer alignment */
1906		((struct sockaddr_in *)mask->buf)->sin_family = (sa_family_t)~0;
1907	}
1908#ifdef NC_INET6
1909	else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1910		/* LINTED pointer alignment */
1911		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1912		    (uchar_t)~0, sizeof (struct in6_addr));
1913		/* LINTED pointer alignment */
1914		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1915		    (sa_family_t)~0;
1916	}
1917#endif
1918	else {
1919		/*
1920		 * Set all mask bits.
1921		 */
1922		(void) memset(mask->buf, (uchar_t)~0, mask->len);
1923	}
1924	return (0);
1925}
1926
1927#if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
1928
1929static int
1930sndrsvcpool(int maxservers)
1931{
1932	struct svcpool_args npa;
1933
1934	npa.id = RDC_SVCPOOL_ID;
1935	npa.maxthreads = maxservers;
1936	npa.redline = 0;
1937	npa.qsize = 0;
1938	npa.timeout = 0;
1939	npa.stksize = 0;
1940	npa.max_same_xprt = 0;
1941	return (sndrsys(RDC_POOL_CREATE, &npa));
1942}
1943
1944
1945/*
1946 * The following stolen from cmd/fs.d/nfs/lib/thrpool.c
1947 */
1948
1949#include <thread.h>
1950
1951/*
1952 * Thread to call into the kernel and do work on behalf of SNDR/ncall-ip.
1953 */
1954static void *
1955svcstart(void *arg)
1956{
1957	int id = (int)arg;
1958	int err;
1959
1960	while ((err = sndrsys(RDC_POOL_RUN, &id)) != 0) {
1961		/*
1962		 * Interrupted by a signal while in the kernel.
1963		 * this process is still alive, try again.
1964		 */
1965		if (err == EINTR)
1966			continue;
1967		else
1968			break;
1969	}
1970
1971	/*
1972	 * If we weren't interrupted by a signal, but did
1973	 * return from the kernel, this thread's work is done,
1974	 * and it should exit.
1975	 */
1976	thr_exit(NULL);
1977	return (NULL);
1978}
1979
1980/*
1981 * User-space "creator" thread. This thread blocks in the kernel
1982 * until new worker threads need to be created for the service
1983 * pool. On return to userspace, if there is no error, create a
1984 * new thread for the service pool.
1985 */
1986static void *
1987svcblock(void *arg)
1988{
1989	int id = (int)arg;
1990
1991	/* CONSTCOND */
1992	while (1) {
1993		thread_t tid;
1994		int err;
1995
1996		/*
1997		 * Call into the kernel, and hang out there
1998		 * until a thread needs to be created.
1999		 */
2000		if (err = sndrsys(RDC_POOL_WAIT, &id)) {
2001			if (err == ECANCELED || err == EBUSY)
2002				/*
2003				 * If we get back ECANCELED, the service
2004				 * pool is exiting, and we may as well
2005				 * clean up this thread. If EBUSY is
2006				 * returned, there's already a thread
2007				 * looping on this pool, so we should
2008				 * give up.
2009				 */
2010				break;
2011			else
2012				continue;
2013		}
2014
2015		(void) thr_create(NULL, NULL, svcstart, (void *)id,
2016		    THR_BOUND | THR_DETACHED, &tid);
2017	}
2018
2019	thr_exit(NULL);
2020	return (NULL);
2021}
2022
2023static int
2024svcwait(int id)
2025{
2026	thread_t tid;
2027
2028	/*
2029	 * Create a bound thread to wait for kernel LWPs that
2030	 * need to be created.
2031	 */
2032	if (thr_create(NULL, NULL, svcblock, (void *)id,
2033	    THR_BOUND | THR_DETACHED, &tid))
2034		return (1);
2035
2036	return (0);
2037}
2038#endif /* Solaris 9+ */
2039