svc_run.c revision 1219:f89f56c2d9ac
1184610Salfred/*
2184610Salfred * CDDL HEADER START
3184610Salfred *
4184610Salfred * The contents of this file are subject to the terms of the
5184610Salfred * Common Development and Distribution License, Version 1.0 only
6184610Salfred * (the "License").  You may not use this file except in compliance
7184610Salfred * with the License.
8184610Salfred *
9184610Salfred * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10184610Salfred * or http://www.opensolaris.org/os/licensing.
11184610Salfred * See the License for the specific language governing permissions
12184610Salfred * and limitations under the License.
13184610Salfred *
14184610Salfred * When distributing Covered Code, include this CDDL HEADER in each
15184610Salfred * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16184610Salfred * If applicable, add the following below this CDDL HEADER, with the
17184610Salfred * fields enclosed by brackets "[]" replaced with your own identifying
18184610Salfred * information: Portions Copyright [yyyy] [name of copyright owner]
19184610Salfred *
20184610Salfred * CDDL HEADER END
21184610Salfred */
22184610Salfred
23184610Salfred/*
24184610Salfred * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25184610Salfred * Use is subject to license terms.
26184610Salfred */
27184610Salfred
28184610Salfred/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29184610Salfred/* All Rights Reserved */
30184610Salfred/*
31184610Salfred * Portions of this source code were derived from Berkeley
32184610Salfred * 4.3 BSD under license from the Regents of the University of
33184610Salfred * California.
34184610Salfred */
35184610Salfred
36184610Salfred#pragma ident	"%Z%%M%	%I%	%E% SMI"
37184610Salfred
38184610Salfred/*
39184610Salfred * This is the rpc server side idle loop
40184610Salfred * Wait for input, call server program.
41184610Salfred */
42184610Salfred#include "mt.h"
43184610Salfred#include "rpc_mt.h"
44184610Salfred#include <stdlib.h>
45184610Salfred#include <unistd.h>
46184610Salfred#include <signal.h>
47184610Salfred#include <rpc/rpc.h>
48184610Salfred#include <errno.h>
49184610Salfred#include <sys/poll.h>
50184610Salfred#include <sys/types.h>
51184610Salfred#include <syslog.h>
52184610Salfred#include <thread.h>
53184610Salfred#include <assert.h>
54184610Salfred#include <libintl.h>
55194677Sthompsa#include <values.h>
56194677Sthompsa
57194677Sthompsaextern const char __nsl_dom[];
58194677Sthompsa
59194677Sthompsaextern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
60194677Sthompsaextern bool_t __is_a_userfd(int P_fd);
61194677Sthompsaextern void   __destroy_userfd();
62194677Sthompsaextern void clear_pollfd(int);
63194677Sthompsaextern void set_pollfd(int /* fd */, short /* events */);
64194677Sthompsaextern void svc_getreq_poll();
65194677Sthompsaextern void (*__proc_cleanup_cb)();
66194677Sthompsa
67194677Sthompsastatic void start_threads();
68194677Sthompsastatic void create_pipe();
69194677Sthompsastatic void clear_pipe();
70194677Sthompsastatic int select_next_pollfd();
71194677Sthompsastatic SVCXPRT *make_xprt_copy();
72194677Sthompsastatic void _svc_run_mt();
73194677Sthompsastatic void _svc_run();
74194677Sthompsa
75188942Sthompsaint _svc_prog_dispatch();
76194677Sthompsastatic void _svc_done_private();
77184610Salfred
78188942Sthompsaextern rwlock_t svc_fd_lock;
79188942Sthompsaextern mutex_t	svc_door_mutex;
80188942Sthompsaextern cond_t	svc_door_waitcv;
81188942Sthompsaextern int	svc_ndoorfds;
82184610Salfredextern void	__svc_cleanup_door_xprts();
83188942Sthompsaextern void	__svc_free_xprtlist();
84188942Sthompsaextern void	__svc_getreq_user(struct pollfd *);
85188942Sthompsa
86188942Sthompsa/*
87198151Sthompsa * Maximum fragment size allowed for connection oriented connections.
88184610Salfred * Zero means that no maximum size limit has been requested.
89184610Salfred */
90184610Salfredint __rpc_connmaxrec = 0;
91184610Salfred
92184610Salfred/* Inter-Record Timeout in secs for non-blocked connection RPC */
93184610Salfredint __rpc_irtimeout = 35;
94184610Salfred
95184610Salfred/*
96184610Salfred * Request exclusive access to tcp and udp non-priv ports bound with a
97184610Salfred * wildcard addr.
98184610Salfred */
99184610Salfredbool_t __rpc_tp_exclbind = FALSE;
100184610Salfred
101184610Salfred/*
102184610Salfred * XXX - eventually, all mutexes and their initializations static
103184610Salfred */
104184610Salfred
105198501Sthompsa/*
106198501Sthompsa * Variables used for MT
107184610Salfred */
108184610Salfredint svc_mt_mode;		/* multi-threading mode */
109184610Salfred
110184610Salfredint svc_pipe[2];	/* pipe for breaking out of poll: read(0), write(1) */
111184610Salfred
112184610Salfred/* BEGIN PROTECTED BY svc_mutex */
113184610Salfred
114184610Salfredstatic int svc_thr_max = 16;	/* default maximum number of threads allowed */
115184610Salfred
116184610Salfredstatic int svc_thr_total;	/* current number of threads */
117184610Salfred
118184610Salfredstatic int svc_thr_active;	/* current number of threads active */
119184610Salfred
120184610Salfred/* circular array of file descriptors with pending data */
121184610Salfred
122184610Salfred#define	CIRCULAR_BUFSIZE	1024
123184610Salfred
124184610Salfredstatic int svc_pending_fds[CIRCULAR_BUFSIZE+1];	/* fds with pending data */
125184610Salfred
126184610Salfredstatic int svc_next_pending;			/* next one to be processed */
127184610Salfred
128184610Salfredstatic int svc_last_pending;			/* last one in list */
129184610Salfred
130184610Salfredstatic int svc_total_pending;			/* total in list */
131184610Salfred
132198501Sthompsastatic int svc_thr_total_creates;	/* total created - stats */
133184610Salfred
134184610Salfredstatic int svc_thr_total_create_errors;	/* total create errors - stats */
135184610Salfred
136184610Salfredstatic int svc_waiters;		/* number of waiting threads */
137184610Salfred
138184610Salfred/* END PROTECTED BY svc_mutex */
139184610Salfred
140184610Salfred/* BEGIN PROTECTED BY svc_fd_lock: */
141184610Salfred
142184610Salfredint svc_nfds;		/* total number of active file descriptors */
143184610Salfred
144184610Salfredint svc_nfds_set;	/* total number of fd bits set in svc_fdset */
145184610Salfred
146184610Salfredint svc_max_fd = 0;	/* largest active file descriptor */
147184610Salfred
148184610Salfredint svc_npollfds;	/* total number of active pollfds */
149184610Salfred
150184610Salfredint svc_npollfds_set;	/* total number of pollfd set in svc_pollfd */
151184610Salfred
152184610Salfredint svc_max_pollfd;	/* largest active pollfd so far */
153184610Salfred
154184610Salfredint svc_pollfd_allocd;  /* number of pollfd structures allocated */
155184610Salfred
156184610Salfred/* END PROTECTED BY svc_fd_lock: */
157184610Salfred
158184610Salfred/* BEGIN PROTECTED BY svc_thr_mutex */
159184610Salfred
160184610Salfred#define	POLLSET_EXTEND	256
161184610Salfredstatic int svc_pollset_allocd;
162184610Salfredstatic struct pollfd *svc_pollset;
163184610Salfred				/*
164184610Salfred				 * array of file descriptors currently active
165184610Salfred				 */
166184610Salfredstatic int svc_polled;		/* no of fds polled in last poll() - input */
167184610Salfred
168184610Salfredstatic int svc_pollfds;		/* no of active fds in last poll() - output */
169184610Salfred
170184610Salfredstatic int svc_next_pollfd;	/* next fd  to processin svc_pollset */
171184610Salfred
172184610Salfredbool_t svc_polling;		/* true if a thread is polling */
173184610Salfred
174184610Salfred/* END PROTECTED BY svc_thr_mutex */
175184610Salfred
176184610Salfred/* BEGIN PROTECTED BY svc_exit_mutex */
177184610Salfred
178184610Salfredstatic bool_t svc_exit_done = TRUE;
179184610Salfred
180184610Salfred/* END PROTECTED BY svc_exit_mutex */
181184610Salfred
182184610Salfred/*
183184610Salfred * Warlock section
184184610Salfred */
185184610Salfred
186184610Salfred/* VARIABLES PROTECTED BY svc_mutex:
187184610Salfred	svc_thr_total, svc_thr_active, svc_pending_fds, svc_next_pending,
188184610Salfred	svc_last_pending, svc_total_pending, svc_thr_total_creates,
189184610Salfred	svc_thr_total_create_errors,
190184610Salfred	svcxprt_list_t::next, svcxprt_ext_t::my_xlist,
191184610Salfred	svc_thr_max, svc_waiters
192184610Salfred */
193184610Salfred
194184610Salfred/* VARIABLES PROTECTED BY svc_fd_lock:
195184610Salfred	svc_xports, svc_fdset, svc_nfds, svc_nfds_set, svc_max_fd,
196200087Sthompsa	svc_pollfd, svc_npollfds, svc_npollfds_set, svc_max_pollfd
197200305Sthompsa */
198200087Sthompsa
199200305Sthompsa/* VARIABLES PROTECTED BY svc_thr_mutex:
200184610Salfred	svc_pollset, svc_pollfds, svc_next_pollfd, svc_polling
201184610Salfred	svc_pollset_allocd, svc_polled
202184610Salfred */
203184610Salfred
204184610Salfred/* VARIABLES PROTECTED BY svc_exit_mutex:
205184610Salfred	svc_exit_done
206184610Salfred */
207184610Salfred
208184610Salfred/* VARIABLES READABLE WITHOUT LOCK:
209184610Salfred	svc_thr_total, svc_thr_active, svc_thr_total_creates,
210184610Salfred	svc_thr_total_create_errors,
211184610Salfred	svc_xports, svc_nfds, svc_nfds_set, svc_max_fd,
212184610Salfred	svc_npollfds, svc_npollfds_set, svc_max_pollfd,
213184610Salfred	svc_pollfds, svc_next_pollfd, svc_exit_done, svc_polling,
214195958Salfred	svc_thr_max, svc_waiters
215195958Salfred */
216184610Salfred
217184610Salfred/* VARIABLES PROTECTED BY "program_logic":
218184610Salfred	rpc_msg::, svc_req::, svcxprt_ext_t::flags, svc_mt_mode,
219184610Salfred	svcxprt_ext_t::parent
220184610Salfred */
221184610Salfred
222184610Salfred/* LOCK ORDER:
223184610Salfred	svc_exit_mutex, svc_thr_mutex, svc_mutex, svc_fd_lock
224184610Salfred */
225184610Salfred
226184610Salfred
227184610Salfredvoid
228184610Salfredsvc_run(void)
229184610Salfred{
230184610Salfred	/* NO OTHER THREADS ARE RUNNING */
231184610Salfred
232184610Salfred	svc_exit_done = FALSE;
233184610Salfred
234184610Salfred	while ((svc_npollfds > 0 || svc_ndoorfds > 0) && !svc_exit_done) {
235184610Salfred		if (svc_npollfds > 0) {
236184610Salfred			switch (svc_mt_mode) {
237184610Salfred			case RPC_SVC_MT_NONE:
238184610Salfred				_svc_run();
239184610Salfred				break;
240184610Salfred			default:
241184610Salfred				_svc_run_mt();
242184610Salfred				break;
243184610Salfred			}
244184610Salfred			continue;
245184610Salfred		}
246184610Salfred
247184610Salfred		(void) mutex_lock(&svc_door_mutex);
248197554Sthompsa		if (svc_ndoorfds > 0)
249197554Sthompsa			(void) cond_wait(&svc_door_waitcv, &svc_door_mutex);
250197554Sthompsa		(void) mutex_unlock(&svc_door_mutex);
251197554Sthompsa	}
252197554Sthompsa}
253197554Sthompsa
254197554Sthompsa
255197554Sthompsa/*
256197554Sthompsa *	This function causes svc_run() to exit by destroying all
257197554Sthompsa *	service handles.
258197554Sthompsa */
259197554Sthompsavoid
260197554Sthompsasvc_exit(void)
261197554Sthompsa{
262197554Sthompsa	SVCXPRT	*xprt;
263197554Sthompsa	int fd;
264197554Sthompsa	char dummy;
265197554Sthompsa
266197554Sthompsa	/* NO LOCKS HELD */
267197554Sthompsa
268197554Sthompsa	(void) mutex_lock(&svc_exit_mutex);
269197554Sthompsa	if (svc_exit_done) {
270197554Sthompsa		(void) mutex_unlock(&svc_exit_mutex);
271197554Sthompsa		return;
272197554Sthompsa	}
273197554Sthompsa	svc_exit_done = TRUE;
274197554Sthompsa	for (fd = 0; fd < svc_max_pollfd; fd++) {
275197554Sthompsa		xprt = svc_xports[fd];
276197554Sthompsa		if (xprt) {
277197554Sthompsa			SVC_DESTROY(xprt);
278197554Sthompsa		}
279197554Sthompsa	}
280197554Sthompsa	__svc_free_xprtlist();
281197554Sthompsa	__svc_cleanup_door_xprts();
282197554Sthompsa	(void) mutex_unlock(&svc_exit_mutex);
283197554Sthompsa
284197554Sthompsa	if (svc_mt_mode != RPC_SVC_MT_NONE) {
285197554Sthompsa		(void) mutex_lock(&svc_mutex);
286197554Sthompsa		(void) cond_broadcast(&svc_thr_fdwait);
287197554Sthompsa		(void) mutex_unlock(&svc_mutex);
288197554Sthompsa
289197554Sthompsa		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
290197554Sthompsa	}
291197554Sthompsa
292184610Salfred	(void) mutex_lock(&svc_door_mutex);
293184610Salfred	(void) cond_signal(&svc_door_waitcv);	/* wake up door dispatching */
294184610Salfred	(void) mutex_unlock(&svc_door_mutex);
295184610Salfred
296184610Salfred	/* destroy reactor information if any */
297184610Salfred	__destroy_userfd();
298184610Salfred}
299187170Sthompsa
300187170Sthompsa
301187170Sthompsa/*
302187170Sthompsa * this funtion is called with svc_fd_lock and svc_thr_mutex
303187170Sthompsa */
304184610Salfred
305194228Sthompsastatic int
306184610Salfredalloc_pollset(int npollfds)
307187170Sthompsa{
308184610Salfred	if (npollfds > svc_pollset_allocd) {
309184610Salfred		pollfd_t *tmp;
310184610Salfred		do {
311184610Salfred			svc_pollset_allocd += POLLSET_EXTEND;
312184610Salfred		} while (npollfds > svc_pollset_allocd);
313184610Salfred		tmp = realloc(svc_pollset,
314184610Salfred				sizeof (pollfd_t) * svc_pollset_allocd);
315184610Salfred		if (tmp == NULL) {
316184610Salfred			syslog(LOG_ERR, "alloc_pollset: out of memory");
317184610Salfred			return (-1);
318184610Salfred		}
319184610Salfred		svc_pollset = tmp;
320184610Salfred	}
321184610Salfred	return (0);
322184610Salfred}
323184610Salfred
324184610Salfredstatic void
325184610Salfred_svc_run(void)
326184610Salfred{
327187186Sthompsa	sigset_t set, oldset;
328187186Sthompsa	int npollfds;
329199057Sthompsa	int i;
330184610Salfred
331184610Salfred	/*
332184610Salfred	 * Block SIGALRM while doing work.  Unblock it while doing poll().
333184610Salfred	 * This is so that services like rpc.rstatd can cause the poll()
334184610Salfred	 * to be interrupted due to alarm() but that we don't end up in
335184610Salfred	 * an MT-unsafe signal handler at an inopportune time.
336184610Salfred	 */
337184610Salfred	(void) sigemptyset(&set);
338184610Salfred	(void) sigaddset(&set, SIGALRM);
339184610Salfred	(void) sigprocmask(SIG_BLOCK, &set, &oldset);
340184610Salfred	while (!svc_exit_done) {
341184610Salfred		/*
342184610Salfred		 * Check whether there is any server fd on which we may want
343184610Salfred		 * to wait.
344184610Salfred		 */
345184610Salfred		(void) rw_rdlock(&svc_fd_lock);
346184610Salfred		if (alloc_pollset(svc_npollfds) == -1)
347184610Salfred			break;
348184610Salfred		npollfds = __rpc_compress_pollfd(svc_max_pollfd,
349184610Salfred			svc_pollfd, svc_pollset);
350184610Salfred		(void) rw_unlock(&svc_fd_lock);
351184610Salfred		if (npollfds == 0)
352184610Salfred			break;	/* None waiting, hence return */
353184610Salfred
354184610Salfred		(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
355184610Salfred		i = poll(svc_pollset, npollfds, -1);
356184610Salfred		(void) sigprocmask(SIG_BLOCK, &set, &oldset);
357184610Salfred		switch (i) {
358184610Salfred		case -1:
359184610Salfred			/*
360184610Salfred			 * We ignore all errors, continuing with the assumption
361184610Salfred			 * that it was set by the signal handlers (or any
362184610Salfred			 * other outside event) and not caused by poll().
363184610Salfred			 */
364184610Salfred		case 0:
365184610Salfred			continue;
366184610Salfred		default:
367184610Salfred			svc_getreq_poll(svc_pollset, i);
368184610Salfred		}
369184610Salfred	}
370184610Salfred	(void) sigprocmask(SIG_SETMASK, &oldset, NULL);
371184610Salfred}
372184610Salfred
373184610Salfred/*
374184610Salfred * In _svc_run_mt, myfd is linked with mypollfd
375184610Salfred * svc_pollset[mypollfd].fd == myfd
376184610Salfred * However, in some cases, the link can not be made, thus we define the
377184610Salfred * following values for these special cases
378184610Salfred */
379184610Salfredenum {
380184610Salfred	INVALID_POLLFD	= -200,
381184610Salfred	FD_FROM_PENDING
382184610Salfred};
383184610Salfred
384184610Salfredstatic void
385184610Salfred_svc_run_mt(void)
386184610Salfred{
387184610Salfred	int npollfds;
388184610Salfred	int n_polled, dispatch;
389184610Salfred
390184610Salfred	static bool_t first_time = TRUE;
391184610Salfred	bool_t main_thread = FALSE;
392184610Salfred	int n_new;
393184610Salfred	int myfd, mypollfd;
394184610Salfred	SVCXPRT *parent_xprt, *xprt;
395184610Salfred
396184610Salfred	/*
397184610Salfred	 * Server is multi-threaded.  Do "first time" initializations.
398184610Salfred	 * Since only one thread exists in the beginning, there's no
399184610Salfred	 * need for mutex protection for first time initializations.
400184610Salfred	 */
401184610Salfred	if (first_time) {
402184610Salfred		first_time = FALSE;
403184610Salfred		main_thread = TRUE;
404184610Salfred		svc_thr_total = 1;	/* this thread */
405184610Salfred		svc_next_pending = svc_last_pending = 0;
406184610Salfred
407184610Salfred		/*
408184610Salfred		 * Create a pipe for waking up the poll, if new
409190183Sthompsa		 * descriptors have been added to svc_fdset.
410184610Salfred		 */
411184610Salfred		create_pipe();
412190183Sthompsa	}
413184610Salfred
414184610Salfred	/* OTHER THREADS ARE RUNNING */
415184610Salfred
416184610Salfred	if (svc_exit_done)
417184610Salfred		return;
418184610Salfred
419198501Sthompsa	for (;;) {
420197554Sthompsa		/*
421197554Sthompsa		 * svc_thr_mutex prevents more than one thread from
422197554Sthompsa		 * trying to select a descriptor to process further.
423197554Sthompsa		 * svc_thr_mutex is unlocked after a thread selects
424197554Sthompsa		 * a descriptor on which to receive data.  If there are
425197554Sthompsa		 * no such descriptors, the thread will poll with
426197554Sthompsa		 * svc_thr_mutex locked, after unlocking all other
427197554Sthompsa		 * locks.  This prevents more than one thread from
428197554Sthompsa		 * trying to poll at the same time.
429197554Sthompsa		 */
430197554Sthompsa		(void) mutex_lock(&svc_thr_mutex);
431197554Sthompsa		(void) mutex_lock(&svc_mutex);
432197554Sthompsacontinue_with_locks:
433197554Sthompsa		myfd = -1;
434197554Sthompsa		mypollfd = INVALID_POLLFD;
435197554Sthompsa
436197554Sthompsa		/*
437197554Sthompsa		 * Check if there are any descriptors with data pending.
438197554Sthompsa		 */
439197554Sthompsa		if (svc_total_pending > 0) {
440197554Sthompsa			myfd = svc_pending_fds[svc_next_pending++];
441197554Sthompsa			mypollfd = FD_FROM_PENDING;
442197554Sthompsa			if (svc_next_pending > CIRCULAR_BUFSIZE)
443197554Sthompsa				svc_next_pending = 0;
444197554Sthompsa			svc_total_pending--;
445197554Sthompsa		}
446199058Sthompsa
447199058Sthompsa		/*
448199058Sthompsa		 * Get the next active file descriptor to process.
449199058Sthompsa		 */
450199058Sthompsa		if (myfd == -1 && svc_pollfds == 0) {
451199058Sthompsa			/*
452199058Sthompsa			 * svc_pollset is empty; do polling
453199058Sthompsa			 */
454199058Sthompsa			svc_polling = TRUE;
455199058Sthompsa
456199058Sthompsa			/*
457199058Sthompsa			 * if there are no file descriptors, return
458199058Sthompsa			 */
459184610Salfred			(void) rw_rdlock(&svc_fd_lock);
460184610Salfred			if (svc_npollfds == 0 ||
461184610Salfred					alloc_pollset(svc_npollfds + 1) == -1) {
462184610Salfred				(void) rw_unlock(&svc_fd_lock);
463184610Salfred				svc_polling = FALSE;
464184610Salfred				svc_thr_total--;
465184610Salfred				(void) mutex_unlock(&svc_mutex);
466184610Salfred				(void) mutex_unlock(&svc_thr_mutex);
467184610Salfred				if (!main_thread) {
468184610Salfred					thr_exit(NULL);
469184610Salfred					/* NOTREACHED */
470184610Salfred				}
471184610Salfred				break;
472184610Salfred			}
473184610Salfred
474184610Salfred			npollfds = __rpc_compress_pollfd(svc_max_pollfd,
475184610Salfred					svc_pollfd, svc_pollset);
476184610Salfred			(void) rw_unlock(&svc_fd_lock);
477184610Salfred
478184610Salfred			if (npollfds == 0) {
479184610Salfred				/*
480184610Salfred				 * There are file descriptors, but none of them
481184610Salfred				 * are available for polling.  If this is the
482184610Salfred				 * main thread, or if no thread is waiting,
483184610Salfred				 * wait on condition variable, otherwise exit.
484184610Salfred				 */
485184610Salfred				svc_polling = FALSE;
486184610Salfred				(void) mutex_unlock(&svc_thr_mutex);
487184610Salfred				if ((!main_thread) && svc_waiters > 0) {
488184610Salfred					svc_thr_total--;
489184610Salfred					(void) mutex_unlock(&svc_mutex);
490184610Salfred					thr_exit(NULL);
491184610Salfred					/* NOTREACHED */
492184610Salfred				}
493184610Salfred
494184610Salfred				while (svc_npollfds_set == 0 &&
495184610Salfred					svc_pollfds == 0 &&
496184610Salfred					svc_total_pending == 0 &&
497184610Salfred							!svc_exit_done) {
498184610Salfred					svc_waiters++;
499184610Salfred					(void) cond_wait(&svc_thr_fdwait,
500184610Salfred								&svc_mutex);
501184610Salfred					svc_waiters--;
502184610Salfred				}
503184610Salfred
504184610Salfred				/*
505184610Salfred				 * Check exit flag.  If this is not the main
506184610Salfred				 * thread, exit.
507184610Salfred				 */
508184610Salfred				if (svc_exit_done) {
509184610Salfred					svc_thr_total--;
510184610Salfred					(void) mutex_unlock(&svc_mutex);
511184610Salfred					if (!main_thread)
512184610Salfred						thr_exit(NULL);
513184610Salfred					break;
514184610Salfred				}
515184610Salfred
516184610Salfred				(void) mutex_unlock(&svc_mutex);
517184610Salfred				continue;
518184610Salfred			}
519194228Sthompsa
520184610Salfred			/*
521184610Salfred			 * We're ready to poll.  Always set svc_pipe[0]
522184610Salfred			 * as the last one, since the poll will occasionally
523184610Salfred			 * need to be interrupted.  Release svc_mutex for
524198501Sthompsa			 * the duration of the poll, but hold on to
525198501Sthompsa			 * svc_thr_mutex, as we don't want any other thread
526198501Sthompsa			 * to do the same.
527198501Sthompsa			 */
528198501Sthompsa			svc_pollset[npollfds].fd = svc_pipe[0];
529198501Sthompsa			svc_pollset[npollfds].events = MASKVAL;
530198501Sthompsa
531198501Sthompsa			do {
532198501Sthompsa				int i, j;
533198501Sthompsa
534198501Sthompsa				(void) mutex_unlock(&svc_mutex);
535198501Sthompsa				n_polled = poll(svc_pollset, npollfds + 1, -1);
536198501Sthompsa				(void) mutex_lock(&svc_mutex);
537198501Sthompsa				if (n_polled <= 0)
538198501Sthompsa					continue;
539198501Sthompsa
540198501Sthompsa				/*
541198501Sthompsa				 * Check if information returned indicates one
542198501Sthompsa				 * or more closed fd's; find and remove any such
543198501Sthompsa				 * information
544198501Sthompsa				 */
545198501Sthompsa				for (i = 0; i <= npollfds; i++) {
546198501Sthompsa					if (svc_pollset[i].revents & POLLNVAL) {
547198501Sthompsa						/* Overwrite svc_pollset[i] */
548198501Sthompsa						for (j = i; j < npollfds; j++)
549198501Sthompsa							svc_pollset[j] =
550198501Sthompsa							    svc_pollset[j + 1];
551198501Sthompsa						(void) memset(&svc_pollset[j],
552198501Sthompsa						    0, sizeof (struct pollfd));
553198501Sthompsa						npollfds--;
554198501Sthompsa						n_polled--;
555198501Sthompsa						i--;
556198501Sthompsa					}
557198501Sthompsa				}
558198501Sthompsa			} while (n_polled <= 0);
559198501Sthompsa			svc_polling = FALSE;
560198501Sthompsa
561198501Sthompsa			/*
562198501Sthompsa			 * If there's data in the pipe, clear it.
563198501Sthompsa			 */
564198501Sthompsa			if (svc_pollset[npollfds].revents) {
565198501Sthompsa				clear_pipe();
566198501Sthompsa				n_polled--;
567198501Sthompsa				svc_pollset[npollfds].revents = 0;
568198501Sthompsa			}
569184610Salfred			svc_polled = npollfds;
570184610Salfred			svc_pollfds = n_polled;
571184610Salfred			svc_next_pollfd = 0;
572184610Salfred
573184610Salfred			/*
574184610Salfred			 * Check exit flag.
575184610Salfred			 */
576184610Salfred			if (svc_exit_done) {
577184610Salfred				svc_thr_total--;
578184610Salfred				(void) mutex_unlock(&svc_mutex);
579184610Salfred				(void) mutex_unlock(&svc_thr_mutex);
580184610Salfred				if (!main_thread) {
581184610Salfred					thr_exit(NULL);
582184610Salfred					/* NOTREACHED */
583184610Salfred				}
584184610Salfred				break;
585184610Salfred			}
586184610Salfred
587184610Salfred			/*
588184610Salfred			 * If no descriptor is active, continue.
589184610Salfred			 */
590184610Salfred			if (svc_pollfds == 0)
591188942Sthompsa				goto continue_with_locks;
592		}
593
594		/*
595		 * If a file descriptor has already not been selected,
596		 * choose a file descriptor.
597		 * svc_pollfds and svc_next_pollfd are updated.
598		 */
599		if (myfd == -1) {
600			if (select_next_pollfd(&myfd, &mypollfd) == -1)
601				goto continue_with_locks;
602		}
603
604		/*
605		 * Check to see if new threads need to be started.
606		 * Count of threads that could be gainfully employed is
607		 * obtained as follows:
608		 *	- count 1 for poller
609		 *	- count 1 for this request
610		 *	- count active file descriptors (svc_pollfds)
611		 *	- count pending file descriptors
612		 *
613		 * (svc_thr_total - svc_thr_active) are already available.
614		 * This thread is one of the available threads.
615		 *
616		 * Number of new threads should not exceed
617		 *	(svc_thr_max - svc_thr_total).
618		 */
619		if (svc_thr_total < svc_thr_max &&
620			    svc_mt_mode == RPC_SVC_MT_AUTO && !svc_exit_done) {
621			n_new = 1 + 1 + svc_pollfds + svc_total_pending -
622					(svc_thr_total - svc_thr_active);
623			if (n_new > (svc_thr_max - svc_thr_total))
624				n_new = svc_thr_max - svc_thr_total;
625			if (n_new > 0)
626				start_threads(n_new);
627		}
628
629		/*
630		 * Get parent xprt.  It is possible for the parent service
631		 * handle to be destroyed by now, due to a race condition.
632		 * Check for this, and if so, log a warning and go on.
633		 */
634		parent_xprt = svc_xports[myfd];
635		if (parent_xprt == NULL) {
636			/* Check if it is not a user FD */
637			if (__is_a_userfd(myfd) == TRUE)
638				__svc_getreq_user(&(svc_pollset[mypollfd]));
639			goto continue_with_locks;
640		}
641/* LINTED pointer alignment */
642		if (svc_defunct(parent_xprt) || svc_failed(parent_xprt))
643			goto continue_with_locks;
644
645		/*
646		 * Make a copy of parent xprt, update svc_fdset.
647		 */
648		if ((xprt = make_xprt_copy(parent_xprt)) == NULL)
649			goto continue_with_locks;
650
651		/*
652		 * Keep track of active threads in automatic mode.
653		 */
654		if (svc_mt_mode == RPC_SVC_MT_AUTO)
655			svc_thr_active++;
656
657		/*
658		 * Release mutexes so other threads can get going.
659		 */
660		(void) mutex_unlock(&svc_mutex);
661		(void) mutex_unlock(&svc_thr_mutex);
662
663		/*
664		 * Process request.
665		 */
666		{
667			struct rpc_msg *msg;
668			struct svc_req *r;
669			char *cred_area;
670
671/* LINTED pointer alignment */
672			msg = SVCEXT(xprt)->msg;
673/* LINTED pointer alignment */
674			r = SVCEXT(xprt)->req;
675/* LINTED pointer alignment */
676			cred_area = SVCEXT(xprt)->cred_area;
677
678
679			msg->rm_call.cb_cred.oa_base = cred_area;
680			msg->rm_call.cb_verf.oa_base =
681						&(cred_area[MAX_AUTH_BYTES]);
682			r->rq_clntcred = &(cred_area[2 * MAX_AUTH_BYTES]);
683
684			/*
685			 * receive RPC message
686			 */
687			if ((dispatch = SVC_RECV(xprt, msg))) {
688				if (svc_mt_mode != RPC_SVC_MT_NONE)
689/* LINTED pointer alignment */
690					svc_flags(xprt) |= SVC_ARGS_CHECK;
691				dispatch = _svc_prog_dispatch(xprt, msg, r);
692
693				/*
694				 * Call cleanup procedure if set.
695				 */
696				if (__proc_cleanup_cb != NULL)
697					(*__proc_cleanup_cb)(xprt);
698			} else
699				svc_args_done(xprt);
700
701			/*
702			 * Finish up, if automatic mode, or not dispatched.
703			 */
704			if (svc_mt_mode == RPC_SVC_MT_AUTO || !dispatch) {
705/* LINTED pointer alignment */
706				if (svc_flags(xprt) & SVC_ARGS_CHECK)
707					svc_args_done(xprt);
708				(void) mutex_lock(&svc_mutex);
709				_svc_done_private(xprt);
710				if (svc_mt_mode == RPC_SVC_MT_AUTO) {
711					/*
712					 * not active any more
713					 */
714					svc_thr_active--;
715
716					/*
717					 * If not main thread, exit unless
718					 * there's some immediate work.
719					 */
720					if (!main_thread &&
721						    svc_pollfds <= 0 &&
722						    svc_total_pending <= 0 &&
723						    (svc_polling ||
724							svc_waiters > 0)) {
725						svc_thr_total--;
726						if (svc_thr_total ==
727						    svc_waiters) {
728							(void) cond_broadcast(
729							    &svc_thr_fdwait);
730						}
731						(void) mutex_unlock(&svc_mutex);
732						thr_exit(NULL);
733						/* NOTREACHED */
734					}
735				}
736				(void) mutex_unlock(&svc_mutex);
737			}
738		}
739
740	}
741}
742
743
744/*
745 * start_threads() - Start specified number of threads.
746 */
747static void
748start_threads(int num_threads)
749{
750	int		i;
751
752	assert(MUTEX_HELD(&svc_mutex));
753
754	for (i = 0; i < num_threads; i++) {
755		if (thr_create(NULL, 0, (void *(*)(void *))_svc_run_mt, NULL,
756		    THR_DETACHED, NULL) == 0) {
757			svc_thr_total++;
758			svc_thr_total_creates++;
759		} else {
760			svc_thr_total_create_errors++;
761		}
762	}
763}
764
765
766/*
767 * create_pipe() - create pipe for breaking out of poll.
768 */
769static void
770create_pipe(void)
771{
772	if (pipe(svc_pipe) == -1) {
773		syslog(LOG_ERR, dgettext(__nsl_dom,
774				"RPC: svc could not create pipe - exiting"));
775		exit(1);
776	}
777	if (fcntl(svc_pipe[0], F_SETFL, O_NONBLOCK) == -1) {
778		syslog(LOG_ERR, dgettext(__nsl_dom,
779					"RPC: svc pipe error - exiting"));
780		exit(1);
781	}
782	if (fcntl(svc_pipe[1], F_SETFL, O_NONBLOCK) == -1) {
783		syslog(LOG_ERR, dgettext(__nsl_dom,
784					"RPC: svc pipe error - exiting"));
785		exit(1);
786	}
787}
788
789
790/*
791 * clear_pipe() - Empty data in pipe.
792 */
793static void
794clear_pipe(void)
795{
796	char	buf[16];
797	int	i;
798
799	do {
800		i = read(svc_pipe[0], buf, sizeof (buf));
801	} while (i == sizeof (buf));
802}
803
804
805/*
806 * select_next_pollfd() - Select the next active fd in svc_pollset.
807 */
808static int
809select_next_pollfd(int *fd, int *pollfdIndex)
810{
811	int i;
812
813	assert(MUTEX_HELD(&svc_thr_mutex));
814	assert(MUTEX_HELD(&svc_mutex));
815
816	for (i = svc_next_pollfd; svc_pollfds > 0 && i < svc_polled;
817							i++) {
818		if (svc_pollset[i].revents) {
819			svc_pollfds--;
820			/*
821			 * No more special case for POLLNVAL, because it may
822			 * be linked with a user file descriptot callback
823			 */
824			svc_next_pollfd = i + 1;
825
826			*fd = svc_pollset[i].fd;
827			*pollfdIndex = i;
828
829			return (0);
830		}
831	}
832	svc_next_pollfd = svc_pollfds = 0;
833	*fd = -1;
834	*pollfdIndex = INVALID_POLLFD;
835	return (-1);
836}
837
838
839/*
840 * make_xprt_copy() - make a copy of the parent xprt.
841 * Clear fd bit in svc_fdset.
842 */
843static SVCXPRT *
844make_xprt_copy(SVCXPRT *parent)
845{
846/* LINTED pointer alignment */
847	SVCXPRT_LIST	*xlist = SVCEXT(parent)->my_xlist;
848	SVCXPRT_LIST	*xret;
849	SVCXPRT		*xprt;
850	int		fd = parent->xp_fd;
851
852	assert(MUTEX_HELD(&svc_mutex));
853
854	xret = xlist->next;
855	if (xret) {
856		xlist->next = xret->next;
857		xret->next = NULL;
858		xprt = xret->xprt;
859/* LINTED pointer alignment */
860		svc_flags(xprt) = svc_flags(parent);
861	} else
862		xprt = svc_copy(parent);
863
864	if (xprt) {
865/* LINTED pointer alignment */
866		SVCEXT(parent)->refcnt++;
867		(void) rw_wrlock(&svc_fd_lock);
868		clear_pollfd(fd);
869		(void) rw_unlock(&svc_fd_lock);
870	}
871	return (xprt);
872}
873
874/*
875 * _svc_done_private() - return copies to library.
876 */
877static void
878_svc_done_private(SVCXPRT *xprt)
879{
880	SVCXPRT		*parent;
881	SVCXPRT_LIST	*xhead, *xlist;
882
883	assert(MUTEX_HELD(&svc_mutex));
884
885/* LINTED pointer alignment */
886	if ((parent = SVCEXT(xprt)->parent) == NULL)
887		return;
888
889/* LINTED pointer alignment */
890	xhead = SVCEXT(parent)->my_xlist;
891/* LINTED pointer alignment */
892	xlist = SVCEXT(xprt)->my_xlist;
893	xlist->next = xhead->next;
894	xhead->next = xlist;
895
896/* LINTED pointer alignment */
897	SVCEXT(parent)->refcnt--;
898
899	/*
900	 * Propagate any error flags.  This is done in both directions to
901	 * ensure that if one child gets an error, everyone will see it
902	 * (even if there are multiple outstanding children) and the
903	 * transport will get closed.
904	 */
905/* LINTED pointer alignment */
906	svc_flags(xprt) |= svc_flags(parent);
907/* LINTED pointer alignment */
908	if (svc_failed(xprt) || svc_defunct(xprt)) {
909/* LINTED pointer alignment */
910		svc_flags(parent) |= (svc_flags(xprt) &
911				(SVC_FAILED | SVC_DEFUNCT));
912/* LINTED pointer alignment */
913		if (SVCEXT(parent)->refcnt == 0)
914			_svc_destroy_private(xprt);
915	}
916}
917
918void
919svc_done(SVCXPRT *xprt)
920{
921	if (svc_mt_mode != RPC_SVC_MT_USER)
922		return;
923
924	/*
925	 * Make sure file descriptor is released in user mode.
926	 * If the xprt is a door, do nothing: this work is performed by
927	 * svc_door.c's return_xprt_copy() routine, which is basically a
928	 * door-specific copy of _svc_done_private().
929	 */
930/* LINTED pointer alignment */
931	if (svc_type(xprt) == SVC_DOOR)
932		return;
933
934/* LINTED pointer alignment */
935	if (svc_flags(xprt) & SVC_ARGS_CHECK)
936		svc_args_done(xprt);
937
938	(void) mutex_lock(&svc_mutex);
939	_svc_done_private(xprt);
940	(void) mutex_unlock(&svc_mutex);
941}
942
943
944/*
945 * Mark argument completion.  Release file descriptor.
946 */
947void
948svc_args_done(SVCXPRT *xprt)
949{
950	char	dummy;
951/* LINTED pointer alignment */
952	SVCXPRT	*parent = SVCEXT(xprt)->parent;
953	bool_t	wake_up_poller;
954	enum	xprt_stat stat;
955
956/* LINTED pointer alignment */
957	svc_flags(xprt) |= svc_flags(parent);
958/* LINTED pointer alignment */
959	svc_flags(xprt) &= ~SVC_ARGS_CHECK;
960/* LINTED pointer alignment */
961	if (svc_failed(xprt) || svc_defunct(parent))
962		return;
963
964/* LINTED pointer alignment */
965	if (svc_type(xprt) == SVC_CONNECTION &&
966				(stat = SVC_STAT(xprt)) != XPRT_IDLE) {
967		if (stat == XPRT_MOREREQS) {
968			(void) mutex_lock(&svc_mutex);
969			svc_pending_fds[svc_last_pending++] = xprt->xp_fd;
970			if (svc_last_pending > CIRCULAR_BUFSIZE)
971				svc_last_pending = 0;
972			svc_total_pending++;
973			(void) mutex_unlock(&svc_mutex);
974			wake_up_poller = FALSE;
975		} else {
976			/*
977			 * connection failed
978			 */
979			return;
980		}
981	} else {
982		(void) rw_wrlock(&svc_fd_lock);
983		set_pollfd(xprt->xp_fd, MASKVAL);
984		(void) rw_unlock(&svc_fd_lock);
985		wake_up_poller = TRUE;
986	}
987
988	if (!wake_up_poller || !svc_polling) {
989		/*
990		 * Wake up any waiting threads.
991		 */
992		(void) mutex_lock(&svc_mutex);
993		if (svc_waiters > 0) {
994			(void) cond_broadcast(&svc_thr_fdwait);
995			(void) mutex_unlock(&svc_mutex);
996			return;
997		}
998		(void) mutex_unlock(&svc_mutex);
999	}
1000
1001	/*
1002	 * Wake up any polling thread.
1003	 */
1004	if (svc_polling)
1005		(void) write(svc_pipe[1], &dummy, sizeof (dummy));
1006}
1007
1008
1009int
1010__rpc_legal_connmaxrec(int suggested) {
1011	if (suggested == -1) {
1012		/* Supply default */
1013		return (RPC_MAXDATASIZE + 2*sizeof (uint32_t));
1014	} else if (suggested < 0) {
1015		return (-1);
1016	} else if (suggested > 0) {
1017		/* Round down to multiple of BYTES_PER_XDR_UNIT */
1018		suggested -= suggested % BYTES_PER_XDR_UNIT;
1019		/* If possible, allow for two fragment headers */
1020		if (suggested < MAXINT-(2*sizeof (uint32_t))) {
1021			/* Allow for two fragment headers */
1022			suggested += 2 * sizeof (uint32_t);
1023		} else {
1024			suggested = MAXINT;
1025		}
1026		if (suggested < sizeof (struct rpc_msg)) {
1027			return (-1);
1028		}
1029	}
1030	return (suggested);
1031}
1032
1033
1034bool_t
1035rpc_control(int op, void *info)
1036{
1037	int		tmp;
1038	extern int	__rpc_minfd;
1039
1040	switch (op) {
1041	case RPC_SVC_MTMODE_SET:
1042		tmp = *((int *)info);
1043		if (tmp != RPC_SVC_MT_NONE && tmp != RPC_SVC_MT_AUTO &&
1044						tmp != RPC_SVC_MT_USER)
1045			return (FALSE);
1046		if (svc_mt_mode != RPC_SVC_MT_NONE && svc_mt_mode != tmp)
1047			return (FALSE);
1048		svc_mt_mode = tmp;
1049		return (TRUE);
1050	case RPC_SVC_MTMODE_GET:
1051		*((int *)info) = svc_mt_mode;
1052		return (TRUE);
1053	case RPC_SVC_THRMAX_SET:
1054		if ((tmp = *((int *)info)) < 1)
1055			return (FALSE);
1056		(void) mutex_lock(&svc_mutex);
1057		svc_thr_max = tmp;
1058		(void) mutex_unlock(&svc_mutex);
1059		return (TRUE);
1060	case RPC_SVC_THRMAX_GET:
1061		*((int *)info) = svc_thr_max;
1062		return (TRUE);
1063	case RPC_SVC_THRTOTAL_GET:
1064		*((int *)info) = svc_thr_total;
1065		return (TRUE);
1066	case RPC_SVC_THRCREATES_GET:
1067		*((int *)info) = svc_thr_total_creates;
1068		return (TRUE);
1069	case RPC_SVC_THRERRORS_GET:
1070		*((int *)info) = svc_thr_total_create_errors;
1071		return (TRUE);
1072	case RPC_SVC_USE_POLLFD:
1073		if (*((int *)info) && !__rpc_use_pollfd_done) {
1074			__rpc_use_pollfd_done = 1;
1075			return (TRUE);
1076		}
1077		return (FALSE);
1078	case __RPC_CLNT_MINFD_SET:
1079		tmp = *((int *)info);
1080		if (tmp < 0)
1081			return (FALSE);
1082		__rpc_minfd = tmp;
1083		return (TRUE);
1084	case __RPC_CLNT_MINFD_GET:
1085		*((int *)info) = __rpc_minfd;
1086		return (TRUE);
1087	case RPC_SVC_CONNMAXREC_SET:
1088		tmp = __rpc_legal_connmaxrec(*(int *)info);
1089		if (tmp >= 0) {
1090			__rpc_connmaxrec = tmp;
1091			return (TRUE);
1092		} else {
1093			return (FALSE);
1094		}
1095	case RPC_SVC_CONNMAXREC_GET:
1096		*((int *)info) = __rpc_connmaxrec;
1097		return (TRUE);
1098	case RPC_SVC_IRTIMEOUT_SET:
1099		tmp = *((int *)info);
1100		if (tmp >= 0) {
1101			__rpc_irtimeout = tmp;
1102			return (TRUE);
1103		} else {
1104			return (FALSE);
1105		}
1106	/*
1107	 * No mutex necessary as _EXCLBIND_SET will/should only
1108	 * be used before an RPC daemon goes mt-hot.
1109	 */
1110	case __RPC_SVC_EXCLBIND_SET:
1111		if (info) {
1112			__rpc_tp_exclbind = *((bool_t *)info);
1113			return (TRUE);
1114		}
1115		return (FALSE);
1116	case __RPC_SVC_EXCLBIND_GET:
1117		if (info) {
1118			*((bool_t *)info) = __rpc_tp_exclbind;
1119			return (TRUE);
1120		}
1121		return (FALSE);
1122
1123	default:
1124		return (FALSE);
1125	}
1126}
1127