zone.h revision 2712:f74a135872bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_ZONE_H
27#define	_SYS_ZONE_H
28
29#pragma ident	"%Z%%M%	%I%	%E% SMI"
30
31#include <sys/types.h>
32#include <sys/mutex.h>
33#include <sys/param.h>
34#include <sys/rctl.h>
35#include <sys/ipc_rctl.h>
36#include <sys/pset.h>
37#include <sys/tsol/label.h>
38#include <sys/uadmin.h>
39
40#ifdef	__cplusplus
41extern "C" {
42#endif
43
44/*
45 * NOTE
46 *
47 * The contents of this file are private to the implementation of
48 * Solaris and are subject to change at any time without notice.
49 * Applications and drivers using these interfaces may fail to
50 * run on future releases.
51 */
52
53/* Available both in kernel and for user space */
54
55/* zone id restrictions and special ids */
56#define	MAX_ZONEID	9999
57#define	MIN_USERZONEID	1	/* lowest user-creatable zone ID */
58#define	MIN_ZONEID	0	/* minimum zone ID on system */
59#define	GLOBAL_ZONEID	0
60#define	ZONEID_WIDTH	4	/* for printf */
61
62/*
63 * Special zoneid_t token to refer to all zones.
64 */
65#define	ALL_ZONES	(-1)
66
67/* system call subcodes */
68#define	ZONE_CREATE	0
69#define	ZONE_DESTROY	1
70#define	ZONE_GETATTR	2
71#define	ZONE_ENTER	3
72#define	ZONE_LIST	4
73#define	ZONE_SHUTDOWN	5
74#define	ZONE_LOOKUP	6
75#define	ZONE_BOOT	7
76#define	ZONE_VERSION	8
77#define	ZONE_SETATTR	9
78
79/* zone attributes */
80#define	ZONE_ATTR_ROOT		1
81#define	ZONE_ATTR_NAME		2
82#define	ZONE_ATTR_STATUS	3
83#define	ZONE_ATTR_PRIVSET	4
84#define	ZONE_ATTR_UNIQID	5
85#define	ZONE_ATTR_POOLID	6
86#define	ZONE_ATTR_INITPID	7
87#define	ZONE_ATTR_SLBL		8
88#define	ZONE_ATTR_INITNAME	9
89#define	ZONE_ATTR_BOOTARGS	10
90#define	ZONE_ATTR_BRAND		11
91
92/* Start of the brand-specific attribute namespace */
93#define	ZONE_ATTR_BRAND_ATTRS	32768
94
95#define	ZONE_EVENT_CHANNEL	"com.sun:zones:status"
96#define	ZONE_EVENT_STATUS_CLASS	"status"
97#define	ZONE_EVENT_STATUS_SUBCLASS	"change"
98
99#define	ZONE_EVENT_UNINITIALIZED	"uninitialized"
100#define	ZONE_EVENT_READY		"ready"
101#define	ZONE_EVENT_RUNNING		"running"
102#define	ZONE_EVENT_SHUTTING_DOWN	"shutting_down"
103
104#define	ZONE_CB_NAME		"zonename"
105#define	ZONE_CB_NEWSTATE	"newstate"
106#define	ZONE_CB_OLDSTATE	"oldstate"
107#define	ZONE_CB_TIMESTAMP	"when"
108#define	ZONE_CB_ZONEID		"zoneid"
109
110/*
111 * Exit values that may be returned by scripts or programs invoked by various
112 * zone commands.
113 *
114 * These are defined as:
115 *
116 *	ZONE_SUBPROC_OK
117 *	===============
118 *	The subprocess completed successfully.
119 *
120 *	ZONE_SUBPROC_USAGE
121 *	==================
122 *	The subprocess failed with a usage message, or a usage message should
123 *	be output in its behalf.
124 *
125 *	ZONE_SUBPROC_NOTCOMPLETE
126 *	========================
127 *	The subprocess did not complete, but the actions performed by the
128 *	subprocess require no recovery actions by the user.
129 *
130 *	For example, if the subprocess were called by "zoneadm install," the
131 *	installation of the zone did not succeed but the user need not perform
132 *	a "zoneadm uninstall" before attempting another install.
133 *
134 *	ZONE_SUBPROC_FATAL
135 *	==================
136 *	The subprocess failed in a fatal manner, usually one that will require
137 *	some type of recovery action by the user.
138 *
139 *	For example, if the subprocess were called by "zoneadm install," the
140 *	installation of the zone did not succeed and the user will need to
141 *	perform a "zoneadm uninstall" before another install attempt is
142 *	possible.
143 *
144 *	The non-success exit values are large to avoid accidental collision
145 *	with values used internally by some commands (e.g. "Z_ERR" and
146 *	"Z_USAGE" as used by zoneadm.)
147 */
148#define	ZONE_SUBPROC_OK			0
149#define	ZONE_SUBPROC_USAGE		253
150#define	ZONE_SUBPROC_NOTCOMPLETE	254
151#define	ZONE_SUBPROC_FATAL		255
152
153#ifdef _SYSCALL32
154typedef struct {
155	caddr32_t zone_name;
156	caddr32_t zone_root;
157	caddr32_t zone_privs;
158	size32_t zone_privssz;
159	caddr32_t rctlbuf;
160	size32_t rctlbufsz;
161	caddr32_t extended_error;
162	caddr32_t zfsbuf;
163	size32_t  zfsbufsz;
164	int match;			/* match level */
165	uint32_t doi;			/* DOI for label */
166	caddr32_t label;		/* label associated with zone */
167} zone_def32;
168#endif
169typedef struct {
170	const char *zone_name;
171	const char *zone_root;
172	const struct priv_set *zone_privs;
173	size_t zone_privssz;
174	const char *rctlbuf;
175	size_t rctlbufsz;
176	int *extended_error;
177	const char *zfsbuf;
178	size_t zfsbufsz;
179	int match;			/* match level */
180	uint32_t doi;			/* DOI for label */
181	const bslabel_t *label;		/* label associated with zone */
182} zone_def;
183
184/* extended error information */
185#define	ZE_UNKNOWN	0	/* No extended error info */
186#define	ZE_CHROOTED	1	/* tried to zone_create from chroot */
187#define	ZE_AREMOUNTS	2	/* there are mounts within the zone */
188
189/* zone_status */
190typedef enum {
191	ZONE_IS_UNINITIALIZED = 0,
192	ZONE_IS_READY,
193	ZONE_IS_BOOTING,
194	ZONE_IS_RUNNING,
195	ZONE_IS_SHUTTING_DOWN,
196	ZONE_IS_EMPTY,
197	ZONE_IS_DOWN,
198	ZONE_IS_DYING,
199	ZONE_IS_DEAD
200} zone_status_t;
201#define	ZONE_MIN_STATE		ZONE_IS_UNINITIALIZED
202#define	ZONE_MAX_STATE		ZONE_IS_DEAD
203
204/*
205 * Valid commands which may be issued by zoneadm to zoneadmd.  The kernel also
206 * communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT.
207 */
208typedef enum zone_cmd {
209	Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
210	Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT
211} zone_cmd_t;
212
213/*
214 * The structure of a request to zoneadmd.
215 */
216typedef struct zone_cmd_arg {
217	uint64_t	uniqid;		/* unique "generation number" */
218	zone_cmd_t	cmd;		/* requested action */
219	uint32_t	_pad;		/* need consistent 32/64 bit alignmt */
220	char locale[MAXPATHLEN];	/* locale in which to render messages */
221	char bootbuf[BOOTARGS_MAX];	/* arguments passed to zone_boot() */
222} zone_cmd_arg_t;
223
224/*
225 * Structure of zoneadmd's response to a request.  A NULL return value means
226 * the caller should attempt to restart zoneadmd and retry.
227 */
228typedef struct zone_cmd_rval {
229	int rval;			/* return value of request */
230	char errbuf[1];	/* variable-sized buffer containing error messages */
231} zone_cmd_rval_t;
232
233/*
234 * The zone support infrastructure uses the zone name as a component
235 * of unix domain (AF_UNIX) sockets, which are limited to 108 characters
236 * in length, so ZONENAME_MAX is limited by that.
237 */
238#define	ZONENAME_MAX		64
239
240#define	GLOBAL_ZONENAME		"global"
241
242/*
243 * Extended Regular expression (see regex(5)) which matches all valid zone
244 * names.
245 */
246#define	ZONENAME_REGEXP		"[a-zA-Z0-9][-_.a-zA-Z0-9]{0,62}"
247
248/*
249 * Where the zones support infrastructure places temporary files.
250 */
251#define	ZONES_TMPDIR		"/var/run/zones"
252
253/*
254 * The path to the door used by clients to communicate with zoneadmd.
255 */
256#define	ZONE_DOOR_PATH		ZONES_TMPDIR "/%s.zoneadmd_door"
257
258#ifdef _KERNEL
259/*
260 * We need to protect the definition of 'list_t' from userland applications and
261 * libraries which may be defining ther own versions.
262 */
263#include <sys/list.h>
264
265#define	GLOBAL_ZONEUNIQID	0	/* uniqid of the global zone */
266
267/* zone_flags */
268#define	ZF_DESTROYED		0x1	/* ZSD destructor callbacks run */
269#define	ZF_HASHED_LABEL		0x2	/* zone has a unique label */
270#define	ZF_IS_SCRATCH		0x4	/* scratch zone */
271
272struct pool;
273struct brand;
274
275/*
276 * Structure to record list of ZFS datasets exported to a zone.
277 */
278typedef struct zone_dataset {
279	char		*zd_dataset;
280	list_node_t	zd_linkage;
281} zone_dataset_t;
282
283typedef struct zone {
284	/*
285	 * zone_name is never modified once set.
286	 */
287	char		*zone_name;	/* zone's configuration name */
288	/*
289	 * zone_nodename and zone_domain are never freed once allocated.
290	 */
291	char		*zone_nodename;	/* utsname.nodename equivalent */
292	char		*zone_domain;	/* srpc_domain equivalent */
293	/*
294	 * zone_lock protects the following fields of a zone_t:
295	 * 	zone_ref
296	 * 	zone_cred_ref
297	 * 	zone_ntasks
298	 * 	zone_flags
299	 * 	zone_zsd
300	 */
301	kmutex_t	zone_lock;
302	/*
303	 * zone_linkage is the zone's linkage into the active or
304	 * death-row list.  The field is protected by zonehash_lock.
305	 */
306	list_node_t	zone_linkage;
307	zoneid_t	zone_id;	/* ID of zone */
308	uint_t		zone_ref;	/* count of zone_hold()s on zone */
309	uint_t		zone_cred_ref;	/* count of zone_hold_cred()s on zone */
310	/*
311	 * zone_rootvp and zone_rootpath can never be modified once set.
312	 */
313	struct vnode	*zone_rootvp;	/* zone's root vnode */
314	char		*zone_rootpath;	/* Path to zone's root + '/' */
315	ushort_t	zone_flags;	/* misc flags */
316	zone_status_t	zone_status;	/* protected by zone_status_lock */
317	uint_t		zone_ntasks;	/* number of tasks executing in zone */
318	kmutex_t	zone_nlwps_lock; /* protects zone_nlwps, and *_nlwps */
319					/* counters in projects and tasks */
320					/* that are within the zone */
321	rctl_qty_t	zone_nlwps;	/* number of lwps in zone */
322	rctl_qty_t	zone_nlwps_ctl; /* protected by zone_rctls->rcs_lock */
323	rctl_qty_t	zone_shmmax;	/* System V shared memory usage */
324	ipc_rqty_t	zone_ipc;	/* System V IPC id resource usage */
325
326	uint_t		zone_rootpathlen; /* strlen(zone_rootpath) + 1 */
327	uint32_t	zone_shares;	/* FSS shares allocated to zone */
328	rctl_set_t	*zone_rctls;	/* zone-wide (zone.*) rctls */
329	list_t		zone_zsd;	/* list of Zone-Specific Data values */
330	kcondvar_t	zone_cv;	/* used to signal state changes */
331	struct proc	*zone_zsched;	/* Dummy kernel "zsched" process */
332	pid_t		zone_proc_initpid; /* pid of "init" for this zone */
333	char		*zone_initname;	/* fs path to 'init' */
334	int		zone_boot_err;  /* for zone_boot() if boot fails */
335	char		*zone_bootargs;	/* arguments passed via zone_boot() */
336	/*
337	 * zone_kthreads is protected by zone_status_lock.
338	 */
339	kthread_t	*zone_kthreads;	/* kernel threads in zone */
340	struct priv_set	*zone_privset;	/* limit set for zone */
341	/*
342	 * zone_vfslist is protected by vfs_list_lock().
343	 */
344	struct vfs	*zone_vfslist;	/* list of FS's mounted in zone */
345	uint64_t	zone_uniqid;	/* unique zone generation number */
346	struct cred	*zone_kcred;	/* kcred-like, zone-limited cred */
347	/*
348	 * zone_pool is protected by pool_lock().
349	 */
350	struct pool	*zone_pool;	/* pool the zone is bound to */
351	hrtime_t	zone_pool_mod;	/* last pool bind modification time */
352	/* zone_psetid is protected by cpu_lock */
353	psetid_t	zone_psetid;	/* pset the zone is bound to */
354	/*
355	 * The following two can be read without holding any locks.  They are
356	 * updated under cpu_lock.
357	 */
358	int		zone_ncpus;  /* zone's idea of ncpus */
359	int		zone_ncpus_online; /* zone's idea of ncpus_online */
360	/*
361	 * List of ZFS datasets exported to this zone.
362	 */
363	list_t		zone_datasets;	/* list of datasets */
364
365	ts_label_t	*zone_slabel;	/* zone sensitivity label */
366	int		zone_match;	/* require label match for packets */
367	tsol_mlp_list_t zone_mlps;	/* MLPs on zone-private addresses */
368
369	boolean_t	zone_restart_init;	/* Restart init if it dies? */
370	struct brand	*zone_brand;		/* zone's brand */
371} zone_t;
372
373/*
374 * Special value of zone_psetid to indicate that pools are disabled.
375 */
376#define	ZONE_PS_INVAL	PS_MYID
377
378extern zone_t zone0;
379extern zone_t *global_zone;
380extern uint_t maxzones;
381extern rctl_hndl_t rc_zone_nlwps;
382
383extern long zone(int, void *, void *, void *, void *);
384extern void zone_zsd_init(void);
385extern void zone_init(void);
386extern void zone_hold(zone_t *);
387extern void zone_rele(zone_t *);
388extern void zone_cred_hold(zone_t *);
389extern void zone_cred_rele(zone_t *);
390extern void zone_task_hold(zone_t *);
391extern void zone_task_rele(zone_t *);
392extern zone_t *zone_find_by_id(zoneid_t);
393extern zone_t *zone_find_by_label(const ts_label_t *);
394extern zone_t *zone_find_by_name(char *);
395extern zone_t *zone_find_by_any_path(const char *, boolean_t);
396extern zone_t *zone_find_by_path(const char *);
397extern zoneid_t getzoneid(void);
398
399/*
400 * Zone-specific data (ZSD) APIs
401 */
402/*
403 * The following is what code should be initializing its zone_key_t to if it
404 * calls zone_getspecific() without necessarily knowing that zone_key_create()
405 * has been called on the key.
406 */
407#define	ZONE_KEY_UNINITIALIZED	0
408
409typedef uint_t zone_key_t;
410
411extern void	zone_key_create(zone_key_t *, void *(*)(zoneid_t),
412    void (*)(zoneid_t, void *), void (*)(zoneid_t, void *));
413extern int 	zone_key_delete(zone_key_t);
414extern void	*zone_getspecific(zone_key_t, zone_t *);
415extern int	zone_setspecific(zone_key_t, zone_t *, const void *);
416
417/*
418 * The definition of a zsd_entry is truly private to zone.c and is only
419 * placed here so it can be shared with mdb.
420 */
421struct zsd_entry {
422	zone_key_t		zsd_key;	/* Key used to lookup value */
423	void			*zsd_data;	/* Caller-managed value */
424	/*
425	 * Callbacks to be executed when a zone is created, shutdown, and
426	 * destroyed, respectively.
427	 */
428	void			*(*zsd_create)(zoneid_t);
429	void			(*zsd_shutdown)(zoneid_t, void *);
430	void			(*zsd_destroy)(zoneid_t, void *);
431	list_node_t		zsd_linkage;
432};
433
434/*
435 * Macros to help with zone visibility restrictions.
436 */
437
438/*
439 * Is process in the global zone?
440 */
441#define	INGLOBALZONE(p) \
442	((p)->p_zone == global_zone)
443
444/*
445 * Can process view objects in given zone?
446 */
447#define	HASZONEACCESS(p, zoneid) \
448	((p)->p_zone->zone_id == (zoneid) || INGLOBALZONE(p))
449
450/*
451 * Convenience macro to see if a resolved path is visible from within a
452 * given zone.
453 *
454 * The basic idea is that the first (zone_rootpathlen - 1) bytes of the
455 * two strings must be equal.  Since the rootpathlen has a trailing '/',
456 * we want to skip everything in the path up to (but not including) the
457 * trailing '/'.
458 */
459#define	ZONE_PATH_VISIBLE(path, zone) \
460	(strncmp((path), (zone)->zone_rootpath,		\
461	    (zone)->zone_rootpathlen - 1) == 0)
462
463/*
464 * Convenience macro to go from the global view of a path to that seen
465 * from within said zone.  It is the responsibility of the caller to
466 * ensure that the path is a resolved one (ie, no '..'s or '.'s), and is
467 * in fact visible from within the zone.
468 */
469#define	ZONE_PATH_TRANSLATE(path, zone)	\
470	(ASSERT(ZONE_PATH_VISIBLE(path, zone)),	\
471	(path) + (zone)->zone_rootpathlen - 2)
472
473/*
474 * Special processes visible in all zones.
475 */
476#define	ZONE_SPECIALPID(x)	 ((x) == 0 || (x) == 1)
477
478/*
479 * Zone-safe version of thread_create() to be used when the caller wants to
480 * create a kernel thread to run within the current zone's context.
481 */
482extern kthread_t *zthread_create(caddr_t, size_t, void (*)(), void *, size_t,
483    pri_t);
484extern void zthread_exit(void);
485
486/*
487 * Functions for an external observer to register interest in a zone's status
488 * change.  Observers will be woken up when the zone status equals the status
489 * argument passed in (in the case of zone_status_timedwait, the function may
490 * also return because of a timeout; zone_status_wait_sig may return early due
491 * to a signal being delivered; zone_status_timedwait_sig may return for any of
492 * the above reasons).
493 *
494 * Otherwise these behave identically to cv_timedwait(), cv_wait(), and
495 * cv_wait_sig() respectively.
496 */
497extern clock_t zone_status_timedwait(zone_t *, clock_t, zone_status_t);
498extern clock_t zone_status_timedwait_sig(zone_t *, clock_t, zone_status_t);
499extern void zone_status_wait(zone_t *, zone_status_t);
500extern int zone_status_wait_sig(zone_t *, zone_status_t);
501
502/*
503 * Get the status  of the zone (at the time it was called).  The state may
504 * have progressed by the time it is returned.
505 */
506extern zone_status_t zone_status_get(zone_t *);
507
508/*
509 * Get the "kcred" credentials corresponding to the given zone.
510 */
511extern struct cred *zone_get_kcred(zoneid_t);
512
513/*
514 * Get/set the pool the zone is currently bound to.
515 */
516extern struct pool *zone_pool_get(zone_t *);
517extern void zone_pool_set(zone_t *, struct pool *);
518
519/*
520 * Get/set the pset the zone is currently using.
521 */
522extern psetid_t zone_pset_get(zone_t *);
523extern void zone_pset_set(zone_t *, psetid_t);
524
525/*
526 * Get the number of cpus/online-cpus visible from the given zone.
527 */
528extern int zone_ncpus_get(zone_t *);
529extern int zone_ncpus_online_get(zone_t *);
530
531/*
532 * Returns true if the named pool/dataset is visible in the current zone.
533 */
534extern int zone_dataset_visible(const char *, int *);
535
536/*
537 * zone version of kadmin()
538 */
539extern int zone_kadmin(int, int, const char *, cred_t *);
540extern void zone_shutdown_global(void);
541
542extern void mount_in_progress(void);
543extern void mount_completed(void);
544
545extern int zone_walk(int (*)(zone_t *, void *), void *);
546
547#endif	/* _KERNEL */
548
549#ifdef	__cplusplus
550}
551#endif
552
553#endif	/* _SYS_ZONE_H */
554