ptms_conf.c revision 4321:a8930ec16e52
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * This file contains global data and code shared between master and slave parts
30 * of the pseudo-terminal driver.
31 *
32 * Pseudo terminals (or pt's for short) are allocated dynamically.
33 * pt's are put in the global ptms_slots array indexed by minor numbers.
34 *
35 * The slots array is initially small (of the size NPTY_MIN). When more pt's are
36 * needed than the slot array size, the larger slot array is allocated and all
37 * opened pt's move to the new one.
38 *
39 * Resource allocation:
40 *
41 *	pt_ttys structures are allocated via pt_ttys_alloc, which uses
42 *		kmem_cache_alloc().
43 *	Minor number space is allocated via vmem_alloc() interface.
44 *	ptms_slots arrays are allocated via kmem_alloc().
45 *
46 *   Minors are started from 1 instead of 0 because vmem_alloc returns 0 in case
47 *   of failure. Also, in anticipation of removing clone device interface to
48 *   pseudo-terminal subsystem, minor 0 should not be used. (Potential future
49 *   development).
50 *
51 *   After the table slot size reaches pt_maxdelta, we stop 2^N extension
52 *   algorithm and start extending the slot table size by pt_maxdelta.
53 *
54 *   Device entries /dev/pts directory are created dynamically by the
55 *   /dev filesystem. We no longer call ddi_create_minor_node() on
56 *   behalf of the slave driver. The /dev filesystem creates /dev/pts
57 *   nodes based on the pt_ttys array.
58 *
59 * Synchronization:
60 *
61 *   All global data synchronization between ptm/pts is done via global
62 *   ptms_lock mutex which is implicitly initialized by declaring it global.
63 *
64 *   Individual fields of pt_ttys structure (except ptm_rdq, pts_rdq and
65 *   pt_nullmsg) are protected by pt_ttys.pt_lock mutex.
66 *
67 *   PT_ENTER_READ/PT_ENTER_WRITE are reference counter based read-write locks
68 *   which allow reader locks to be reacquired by the same thread (usual
69 *   reader/writer locks can't be used for that purpose since it is illegal for
70 *   a thread to acquire a lock it already holds, even as a reader). The sole
71 *   purpose of these macros is to guarantee that the peer queue will not
72 *   disappear (due to closing peer) while it is used. It is safe to use
73 *   PT_ENTER_READ/PT_EXIT_READ brackets across calls like putq/putnext (since
74 *   they are not real locks but reference counts).
75 *
76 *   PT_ENTER_WRITE/PT_EXIT_WRITE brackets are used ONLY in master/slave
77 *   open/close paths to modify ptm_rdq and pts_rdq fields. These fields should
78 *   be set to appropriate queues *after* qprocson() is called during open (to
79 *   prevent peer from accessing the queue with incomplete plumbing) and set to
80 *   NULL before qprocsoff() is called during close. Put and service procedures
81 *   use PT_ENTER_READ/PT_EXIT_READ to prevent peer closes.
82 *
83 *   The pt_nullmsg field is only used in open/close routines and is also
84 *   protected by PT_ENTER_WRITE/PT_EXIT_WRITE brackets to avoid extra mutex
85 *   holds.
86 *
87 * Lock Ordering:
88 *
89 *   If both ptms_lock and per-pty lock should be held, ptms_lock should always
90 *   be entered first, followed by per-pty lock.
91 *
92 * Global functions:
93 *
94 * void ptms_init(void);
95 *
96 *	Called by pts/ptm _init entry points. It performes one-time
97 * 	initialization needed for both pts and ptm. This initialization is done
98 * 	here and not in ptms_initspace because all these data structures are not
99 *	needed if pseudo-terminals are not used in the system.
100 *
101 * struct pt_ttys *pt_ttys_alloc(void);
102 *
103 *	Allocate new minor number and pseudo-terminal entry. May sleep.
104 *	New minor number is recorded in pt_minor field of the entry returned.
105 *	This routine also initializes pt_minor and pt_state fields of the new
106 *	pseudo-terminal and puts a pointer to it into ptms_slots array.
107 *
108 * struct pt_ttys *ptms_minor2ptty(minor_t minor)
109 *
110 *	Find pt_ttys structure by minor number.
111 *	Returns NULL when minor is out of range.
112 *
113 * int ptms_minor_valid(minor_t minor, uid_t *ruid, gid_t *rgid)
114 *
115 *	Check if minor refers to an allocated pty in the current zone.
116 *	Returns
117 *		 0 if not allocated or not for this zone.
118 *		 1 if an allocated pty in the current zone.
119 *	Also returns owner of pty.
120 *
121 * int ptms_minor_exists(minor_t minor)
122 *	Check if minor refers to an allocated pty (in any zone)
123 *	Returns
124 *		0 if not an allocated pty
125 *		1 if an allocated pty
126 *
127 * void ptms_set_owner(minor_t minor, uid_t ruid, gid_t rgid)
128 *
129 *	Sets the owner associated with a pty.
130 *
131 * void ptms_close(struct pt_ttys *pt, uint_t flags_to_clear);
132 *
133 *	Clear flags_to_clear in pt and if no one owns it (PTMOPEN/PTSOPEN not
134 * 	set) free pt entry and corresponding slot.
135 *
136 * Tuneables and configuration:
137 *
138 *	pt_cnt: minimum number of pseudo-terminals in the system. The system
139 *		should provide at least this number of ptys (provided sufficient
140 * 		memory is available). It is different from the older semantics
141 *		of pt_cnt meaning maximum number of ptys.
142 *		Set to 0 by default.
143 *
144 *	pt_max_pty: Maximum number of pseudo-terminals in the system. The system
145 *		should not allocate more ptys than pt_max_pty (although, it may
146 * 		impose stricter maximum). Zero value means no user-defined
147 * 		maximum. This is intended to be used as "denial-of-service"
148 *		protection.
149 *		Set to 0 by default.
150 *
151 *         Both pt_cnt and pt_max_pty may be modified during system lifetime
152 *         with their semantics preserved.
153 *
154 *	pt_init_cnt: Initial size of ptms_slots array. Set to NPTY_INITIAL.
155 *
156 *	pt_ptyofmem: Approximate percentage of system memory that may be
157 *		occupied by pty data structures. Initially set to NPTY_PERCENT.
158 *		This variable is used once during initialization to estimate
159 * 		maximum number of ptys in the system. The actual maximum is
160 *		determined as minimum of pt_max_pty and calculated value.
161 *
162 *	pt_maxdelta: Maximum extension chunk of the slot table.
163 */
164
165
166
167#include <sys/types.h>
168#include <sys/param.h>
169#include <sys/termios.h>
170#include <sys/stream.h>
171#include <sys/stropts.h>
172#include <sys/kmem.h>
173#include <sys/ptms.h>
174#include <sys/stat.h>
175#include <sys/sunddi.h>
176#include <sys/ddi.h>
177#include <sys/bitmap.h>
178#include <sys/sysmacros.h>
179#include <sys/ddi_impldefs.h>
180#include <sys/zone.h>
181#ifdef DEBUG
182#include <sys/strlog.h>
183#endif
184
185
186/* Initial number of ptms slots */
187#define	NPTY_INITIAL 16
188
189#define	NPTY_PERCENT 5
190
191/* Maximum increment of the slot table size */
192#define	PTY_MAXDELTA 128
193
194/*
195 * Tuneable variables.
196 */
197uint_t	pt_cnt = 0;			/* Minimum number of ptys */
198size_t 	pt_max_pty = 0;			/* Maximum number of ptys */
199uint_t	pt_init_cnt = NPTY_INITIAL;	/* Initial number of ptms slots */
200uint_t	pt_pctofmem = NPTY_PERCENT;	/* Percent of memory to use for ptys */
201uint_t	pt_maxdelta = PTY_MAXDELTA;	/* Max increment for slot table size */
202
203/* Other global variables */
204
205kmutex_t ptms_lock;			/* Global data access lock */
206
207/*
208 * Slot array and its management variables
209 */
210static struct pt_ttys **ptms_slots = NULL; /* Slots for actual pt structures */
211static size_t ptms_nslots = 0;		/* Size of slot array */
212static size_t ptms_ptymax = 0;		/* Maximum number of ptys */
213static size_t ptms_inuse = 0;		/* # of ptys currently allocated */
214
215dev_info_t 	*pts_dip = NULL;	/* set if slave is attached */
216
217static struct kmem_cache *ptms_cache = NULL;	/* pty cache */
218
219static vmem_t *ptms_minor_arena = NULL; /* Arena for device minors */
220
221static uint_t ptms_roundup(uint_t);
222static int ptms_constructor(void *, void *, int);
223static void ptms_destructor(void *, void *);
224static minor_t ptms_grow(void);
225
226/*
227 * Total size occupied by one pty. Each pty master/slave pair consumes one
228 * pointer for ptms_slots array, one pt_ttys structure and one empty message
229 * preallocated for pts close.
230 */
231
232#define	PTY_SIZE (sizeof (struct pt_ttys) + \
233    sizeof (struct pt_ttys *) + \
234    sizeof (dblk_t))
235
236#ifdef DEBUG
237int ptms_debug = 0;
238#define	PTMOD_ID 5
239#endif
240
241/*
242 * Clear all bits of x except the highest bit
243 */
244#define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
245
246/*
247 * Roundup the number to the nearest power of 2
248 */
249static uint_t
250ptms_roundup(uint_t x)
251{
252	uint_t p = truncate(x);	/* x with non-high bits stripped */
253
254	/*
255	 * If x is a power of 2, return x, otherwise roundup.
256	 */
257	return (p == x ? p : (p * 2));
258}
259
260/*
261 * Allocate ptms_slots array and kmem cache for pt_ttys. This initialization is
262 * only called once during system lifetime. Called from ptm or pts _init
263 * routine.
264 */
265void
266ptms_init(void)
267{
268	mutex_enter(&ptms_lock);
269
270	if (ptms_slots == NULL) {
271		ptms_slots = kmem_zalloc(pt_init_cnt *
272		    sizeof (struct pt_ttys *), KM_SLEEP);
273
274		ptms_cache = kmem_cache_create("pty_map",
275		    sizeof (struct pt_ttys), 0, ptms_constructor,
276		    ptms_destructor, NULL, NULL, NULL, 0);
277
278		ptms_nslots = pt_init_cnt;
279
280		/* Allocate integer space for minor numbers */
281		ptms_minor_arena = vmem_create("ptms_minor", (void *)1,
282		    ptms_nslots, 1, NULL, NULL, NULL, 0,
283		    VM_SLEEP | VMC_IDENTIFIER);
284
285		/*
286		 * Calculate available number of ptys - how many ptys can we
287		 * allocate in pt_pctofmem % of available memory. The value is
288		 * rounded up to the nearest power of 2.
289		 */
290		ptms_ptymax = ptms_roundup((pt_pctofmem * kmem_maxavail()) /
291		    (100 * PTY_SIZE));
292	}
293	mutex_exit(&ptms_lock);
294}
295
296/*
297 * This routine attaches the pts dip.
298 */
299int
300ptms_attach_slave(void)
301{
302	if (pts_dip == NULL && i_ddi_attach_pseudo_node("pts") == NULL)
303		return (-1);
304
305	ASSERT(pts_dip);
306	return (0);
307}
308
309/*
310 * Called from /dev fs. Checks if dip is attached,
311 * and if it is, returns its major number.
312 */
313major_t
314ptms_slave_attached(void)
315{
316	major_t maj = (major_t)-1;
317
318	mutex_enter(&ptms_lock);
319	if (pts_dip)
320		maj = ddi_driver_major(pts_dip);
321	mutex_exit(&ptms_lock);
322
323	return (maj);
324}
325
326/*
327 * Allocate new minor number and pseudo-terminal entry. Returns the new entry or
328 * NULL if no memory or maximum number of entries reached.
329 */
330struct pt_ttys *
331pt_ttys_alloc(void)
332{
333	minor_t dminor;
334	struct pt_ttys *pt = NULL;
335
336	mutex_enter(&ptms_lock);
337
338	/*
339	 * Always try to allocate new pty when pt_cnt minimum limit is not
340	 * achieved. If it is achieved, the maximum is determined by either
341	 * user-specified value (if it is non-zero) or our memory estimations -
342	 * whatever is less.
343	 */
344	if (ptms_inuse >= pt_cnt) {
345		/*
346		 * When system achieved required minimum of ptys, check for the
347		 *   denial of service limits.
348		 *
349		 * Since pt_max_pty may be zero, the formula below is used to
350		 * avoid conditional expression. It will equal to pt_max_pty if
351		 * it is not zero and ptms_ptymax otherwise.
352		 */
353		size_t user_max = (pt_max_pty == 0 ? ptms_ptymax : pt_max_pty);
354
355		/* Do not try to allocate more than allowed */
356		if (ptms_inuse >= min(ptms_ptymax, user_max)) {
357			mutex_exit(&ptms_lock);
358			return (NULL);
359		}
360	}
361	ptms_inuse++;
362
363	/*
364	 * Allocate new minor number. If this fails, all slots are busy and
365	 * we need to grow the hash.
366	 */
367	dminor = (minor_t)(uintptr_t)
368	    vmem_alloc(ptms_minor_arena, 1, VM_NOSLEEP);
369
370	if (dminor == 0) {
371		/* Grow the cache and retry allocation */
372		dminor = ptms_grow();
373	}
374
375	if (dminor == 0) {
376		/* Not enough memory now */
377		ptms_inuse--;
378		mutex_exit(&ptms_lock);
379		return (NULL);
380	}
381
382	pt = kmem_cache_alloc(ptms_cache, KM_NOSLEEP);
383	if (pt == NULL) {
384		/* Not enough memory - this entry can't be used now. */
385		vmem_free(ptms_minor_arena, (void *)(uintptr_t)dminor, 1);
386		ptms_inuse--;
387	} else {
388		pt->pt_minor = dminor;
389		pt->pt_pid = curproc->p_pid;	/* For debugging */
390		pt->pt_state = (PTMOPEN | PTLOCK);
391		pt->pt_zoneid = getzoneid();
392		pt->pt_ruid = 0; /* we don't know uid/gid yet. Report as root */
393		pt->pt_rgid = 0;
394		ASSERT(ptms_slots[dminor - 1] == NULL);
395		ptms_slots[dminor - 1] = pt;
396	}
397
398	mutex_exit(&ptms_lock);
399	return (pt);
400}
401
402/*
403 * Get pt_ttys structure by minor number.
404 * Returns NULL when minor is out of range.
405 */
406struct pt_ttys *
407ptms_minor2ptty(minor_t dminor)
408{
409	struct pt_ttys *pt = NULL;
410
411	ASSERT(mutex_owned(&ptms_lock));
412	if ((dminor >= 1) && (dminor <= ptms_nslots) && ptms_slots != NULL)
413		pt = ptms_slots[dminor - 1];
414
415	return (pt);
416}
417
418/*
419 * Invoked in response to chown on /dev/pts nodes to change the
420 * permission on a pty
421 */
422void
423ptms_set_owner(minor_t dminor, uid_t ruid, gid_t rgid)
424{
425	struct pt_ttys *pt;
426
427	ASSERT(ruid >= 0);
428	ASSERT(rgid >= 0);
429
430	if (ruid < 0 || rgid < 0)
431		return;
432
433	/*
434	 * /dev/pts/0 is not used, but some applications may check it. There
435	 * is no pty backing it - so we have nothing to do.
436	 */
437	if (dminor == 0)
438		return;
439
440	mutex_enter(&ptms_lock);
441	pt = ptms_minor2ptty(dminor);
442	if (pt != NULL && pt->pt_zoneid == getzoneid()) {
443		pt->pt_ruid = ruid;
444		pt->pt_rgid = rgid;
445	}
446	mutex_exit(&ptms_lock);
447}
448
449/*
450 * Given a ptm/pts minor number
451 * returns:
452 *	1 if the pty is allocated to the current zone.
453 *	0 otherwise
454 *
455 * If the pty is allocated to the current zone, it also returns the owner.
456 */
457int
458ptms_minor_valid(minor_t dminor, uid_t *ruid, gid_t *rgid)
459{
460	struct pt_ttys *pt;
461	int ret;
462
463	ASSERT(ruid);
464	ASSERT(rgid);
465
466	*ruid = (uid_t)-1;
467	*rgid = (gid_t)-1;
468
469	/*
470	 * /dev/pts/0 is not used, but some applications may check it, so create
471	 * it also. Report the owner as root. It belongs to all zones.
472	 */
473	if (dminor == 0) {
474		*ruid = 0;
475		*rgid = 0;
476		return (1);
477	}
478
479	ret = 0;
480	mutex_enter(&ptms_lock);
481	pt = ptms_minor2ptty(dminor);
482	if (pt != NULL) {
483		ASSERT(pt->pt_ruid >= 0);
484		ASSERT(pt->pt_rgid >= 0);
485		if (pt->pt_zoneid == getzoneid()) {
486			ret = 1;
487			*ruid = pt->pt_ruid;
488			*rgid = pt->pt_rgid;
489		}
490	}
491	mutex_exit(&ptms_lock);
492
493	return (ret);
494}
495
496/*
497 * Given a ptm/pts minor number
498 * returns:
499 *	0 if the pty is not allocated
500 *	1 if the pty is allocated
501 */
502int
503ptms_minor_exists(minor_t dminor)
504{
505	int ret;
506
507	mutex_enter(&ptms_lock);
508	ret = ptms_minor2ptty(dminor) ? 1 : 0;
509	mutex_exit(&ptms_lock);
510
511	return (ret);
512}
513
514/*
515 * Close the pt and clear flags_to_clear.
516 * If pt device is not opened by someone else, free it and clear its slot.
517 */
518void
519ptms_close(struct pt_ttys *pt, uint_t flags_to_clear)
520{
521	uint_t flags;
522
523	ASSERT(MUTEX_NOT_HELD(&ptms_lock));
524	ASSERT(pt != NULL);
525
526	mutex_enter(&ptms_lock);
527
528	mutex_enter(&pt->pt_lock);
529	pt->pt_state &= ~flags_to_clear;
530	flags = pt->pt_state;
531	mutex_exit(&pt->pt_lock);
532
533	if (! (flags & (PTMOPEN | PTSOPEN))) {
534		/* No one owns the entry - free it */
535
536		ASSERT(pt->ptm_rdq == NULL);
537		ASSERT(pt->pts_rdq == NULL);
538		ASSERT(pt->pt_nullmsg == NULL);
539		ASSERT(pt->pt_refcnt == 0);
540		ASSERT(pt->pt_minor <= ptms_nslots);
541		ASSERT(ptms_slots[pt->pt_minor - 1] == pt);
542		ASSERT(ptms_inuse > 0);
543
544		ptms_inuse--;
545
546		pt->pt_pid = 0;
547
548		ptms_slots[pt->pt_minor - 1] = NULL;
549		/* Return minor number to the pool of minors */
550		vmem_free(ptms_minor_arena, (void *)(uintptr_t)pt->pt_minor, 1);
551		/* Return pt to the cache */
552		kmem_cache_free(ptms_cache, pt);
553	}
554	mutex_exit(&ptms_lock);
555}
556
557/*
558 * Allocate another slot table twice as large as the original one (limited to
559 * global maximum). Migrate all pt to the new slot table and free the original
560 * one. Create more /devices entries for new devices.
561 */
562static minor_t
563ptms_grow()
564{
565	minor_t old_size = ptms_nslots;
566	minor_t delta = MIN(pt_maxdelta, old_size);
567	minor_t new_size = old_size + delta;
568	struct pt_ttys **ptms_old = ptms_slots;
569	struct pt_ttys **ptms_new;
570	void  *vaddr;			/* vmem_add return value */
571
572	ASSERT(MUTEX_HELD(&ptms_lock));
573
574	DDBG("ptmopen(%d): need to grow\n", (int)ptms_inuse);
575
576	/* Allocate new ptms array */
577	ptms_new = kmem_zalloc(new_size * sizeof (struct pt_ttys *),
578	    KM_NOSLEEP);
579	if (ptms_new == NULL)
580		return ((minor_t)0);
581
582	/* Increase clone index space */
583	vaddr = vmem_add(ptms_minor_arena, (void *)(uintptr_t)(old_size + 1),
584	    new_size - old_size, VM_NOSLEEP);
585
586	if (vaddr == NULL) {
587		kmem_free(ptms_new, new_size * sizeof (struct pt_ttys *));
588		return ((minor_t)0);
589	}
590
591	/* Migrate pt entries to a new location */
592	ptms_nslots = new_size;
593	bcopy(ptms_old, ptms_new, old_size * sizeof (struct pt_ttys *));
594	ptms_slots = ptms_new;
595	kmem_free(ptms_old, old_size * sizeof (struct pt_ttys *));
596
597	/* Allocate minor number and return it */
598	return ((minor_t)(uintptr_t)
599	    vmem_alloc(ptms_minor_arena, 1, VM_NOSLEEP));
600}
601
602/*ARGSUSED*/
603static int
604ptms_constructor(void *maddr, void *arg, int kmflags)
605{
606	struct pt_ttys *pt = maddr;
607
608	pt->pts_rdq = NULL;
609	pt->ptm_rdq = NULL;
610	pt->pt_nullmsg = NULL;
611	pt->pt_pid = NULL;
612	pt->pt_minor = NULL;
613	pt->pt_refcnt = 0;
614	pt->pt_state = 0;
615	pt->pt_zoneid = GLOBAL_ZONEID;
616
617	cv_init(&pt->pt_cv, NULL, CV_DEFAULT, NULL);
618	mutex_init(&pt->pt_lock, NULL, MUTEX_DEFAULT, NULL);
619	return (0);
620}
621
622/*ARGSUSED*/
623static void
624ptms_destructor(void *maddr, void *arg)
625{
626	struct pt_ttys *pt = maddr;
627
628	ASSERT(pt->pt_refcnt == 0);
629	ASSERT(pt->pt_state == 0);
630	ASSERT(pt->ptm_rdq == NULL);
631	ASSERT(pt->pts_rdq == NULL);
632
633	mutex_destroy(&pt->pt_lock);
634	cv_destroy(&pt->pt_cv);
635}
636
637#ifdef DEBUG
638void
639ptms_log(char *str, uint_t arg)
640{
641	if (ptms_debug) {
642		if (ptms_debug & 2)
643			cmn_err(CE_CONT, str, arg);
644		if (ptms_debug & 4)
645			(void) strlog(PTMOD_ID, -1, 0, SL_TRACE | SL_ERROR,
646			    str, arg);
647		else
648			(void) strlog(PTMOD_ID, -1, 0, SL_TRACE, str, arg);
649	}
650}
651
652void
653ptms_logp(char *str, uintptr_t arg)
654{
655	if (ptms_debug) {
656		if (ptms_debug & 2)
657			cmn_err(CE_CONT, str, arg);
658		if (ptms_debug & 4)
659			(void) strlog(PTMOD_ID, -1, 0, SL_TRACE | SL_ERROR,
660			    str, arg);
661		else
662			(void) strlog(PTMOD_ID, -1, 0, SL_TRACE, str, arg);
663	}
664}
665#endif
666