s10_brand.c revision 12199:2dbcb597eb37
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <errno.h>
27#include <fcntl.h>
28#include <dirent.h>
29#include <stddef.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <strings.h>
33#include <unistd.h>
34#include <thread.h>
35#include <sys/auxv.h>
36#include <sys/brand.h>
37#include <sys/inttypes.h>
38#include <sys/lwp.h>
39#include <sys/syscall.h>
40#include <sys/systm.h>
41#include <sys/utsname.h>
42#include <sys/sysconfig.h>
43#include <sys/systeminfo.h>
44#include <sys/zone.h>
45#include <sys/stat.h>
46#include <sys/mntent.h>
47#include <sys/ctfs.h>
48#include <sys/priv.h>
49#include <sys/acctctl.h>
50#include <libgen.h>
51#include <bsm/audit.h>
52#include <sys/crypto/ioctl.h>
53#include <sys/fs/zfs.h>
54#include <sys/zfs_ioctl.h>
55#include <sys/ucontext.h>
56#include <sys/mntio.h>
57#include <sys/mnttab.h>
58#include <sys/attr.h>
59#include <atomic.h>
60
61#include <s10_brand.h>
62#include <brand_misc.h>
63#include <s10_misc.h>
64#include <s10_signal.h>
65
66/*
67 * See usr/src/lib/brand/shared/brand/common/brand_util.c for general
68 * emulation notes.
69 */
70
71static zoneid_t zoneid;
72static boolean_t emul_global_zone = B_FALSE;
73static s10_emul_bitmap_t emul_bitmap;
74pid_t zone_init_pid;
75
76/*
77 * S10_FEATURE_IS_PRESENT is a macro that helps facilitate conditional
78 * emulation.  For each constant N defined in the s10_emulated_features
79 * enumeration in usr/src/uts/common/brand/solaris10/s10_brand.h,
80 * S10_FEATURE_IS_PRESENT(N) is true iff the feature/backport represented by N
81 * is present in the Solaris 10 image hosted within the zone.  In other words,
82 * S10_FEATURE_IS_PRESENT(N) is true iff the file /usr/lib/brand/solaris10/M,
83 * where M is the enum value of N, was present in the zone when the zone booted.
84 *
85 *
86 * *** Sample Usage
87 *
88 * Suppose that you need to backport a fix to Solaris 10 and there is
89 * emulation in place for the fix.  Suppose further that the emulation won't be
90 * needed if the fix is backported (i.e., if the fix is present in the hosted
91 * Solaris 10 environment, then the brand won't need the emulation).  Then if
92 * you add a constant named "S10_FEATURE_X" to the end of the
93 * s10_emulated_features enumeration that represents the backported fix and
94 * S10_FEATURE_X evaluates to four, then you should create a file named
95 * /usr/lib/brand/solaris10/4 as part of your backport.  Additionally, you
96 * should retain the aforementioned emulation but modify it so that it's
97 * performed only when S10_FEATURE_IS_PRESENT(S10_FEATURE_X) is false.  Thus the
98 * emulation function should look something like the following:
99 *
100 *	static int
101 *	my_emul_function(sysret_t *rv, ...)
102 *	{
103 *		if (S10_FEATURE_IS_PRESENT(S10_FEATURE_X)) {
104 *			// Don't emulate
105 *			return (__systemcall(rv, ...));
106 *		} else {
107 *			// Emulate whatever needs to be emulated when the
108 *			// backport isn't present in the Solaris 10 image.
109 *		}
110 *	}
111 */
112#define	S10_FEATURE_IS_PRESENT(s10_emulated_features_constant)	\
113	((emul_bitmap[(s10_emulated_features_constant) >> 3] &	\
114	(1 << ((s10_emulated_features_constant) & 0x7))) != 0)
115
116brand_sysent_table_t brand_sysent_table[];
117
118#define	S10_UTS_RELEASE	"5.10"
119#define	S10_UTS_VERSION	"Generic_Virtual"
120
121/*
122 * Figures out the PID of init for the zone.  Also returns a boolean
123 * indicating whether this process currently has that pid: if so,
124 * then at this moment, we are init.
125 */
126static boolean_t
127get_initpid_info(void)
128{
129	pid_t pid;
130	sysret_t rval;
131	int err;
132
133	/*
134	 * Determine the current process PID and the PID of the zone's init.
135	 * We use care not to call getpid() here, because we're not supposed
136	 * to call getpid() until after the program is fully linked-- the
137	 * first call to getpid() is a signal from the linker to debuggers
138	 * that linking has been completed.
139	 */
140	if ((err = __systemcall(&rval, SYS_brand,
141	    B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) {
142		brand_abort(err, "Failed to get init's pid");
143	}
144
145	/*
146	 * Note that we need to be cautious with the pid we get back--
147	 * it should not be stashed and used in place of getpid(), since
148	 * we might fork(2).  So we keep zone_init_pid and toss the pid
149	 * we otherwise got.
150	 */
151	if (pid == zone_init_pid)
152		return (B_TRUE);
153
154	return (B_FALSE);
155}
156
157/* Free the thread-local storage provided by mntfs_get_mntentbuf(). */
158static void
159mntfs_free_mntentbuf(void *arg)
160{
161	struct mntentbuf *embufp = arg;
162
163	if (embufp == NULL)
164		return;
165	if (embufp->mbuf_emp)
166		free(embufp->mbuf_emp);
167	if (embufp->mbuf_buf)
168		free(embufp->mbuf_buf);
169	bzero(embufp, sizeof (struct mntentbuf));
170	free(embufp);
171}
172
173/* Provide the thread-local storage required by mntfs_ioctl(). */
174static struct mntentbuf *
175mntfs_get_mntentbuf(size_t size)
176{
177	static mutex_t keylock;
178	static thread_key_t key;
179	static int once_per_keyname = 0;
180	void *tsd = NULL;
181	struct mntentbuf *embufp;
182
183	/* Create the key. */
184	if (!once_per_keyname) {
185		(void) mutex_lock(&keylock);
186		if (!once_per_keyname) {
187			if (thr_keycreate(&key, mntfs_free_mntentbuf)) {
188				(void) mutex_unlock(&keylock);
189				return (NULL);
190			} else {
191				once_per_keyname++;
192			}
193		}
194		(void) mutex_unlock(&keylock);
195	}
196
197	/*
198	 * The thread-specific datum for this key is the address of a struct
199	 * mntentbuf. If this is the first time here then we allocate the struct
200	 * and its contents, and associate its address with the thread; if there
201	 * are any problems then we abort.
202	 */
203	if (thr_getspecific(key, &tsd))
204		return (NULL);
205	if (tsd == NULL) {
206		if (!(embufp = calloc(1, sizeof (struct mntentbuf))) ||
207		    !(embufp->mbuf_emp = malloc(sizeof (struct extmnttab))) ||
208		    thr_setspecific(key, embufp)) {
209			mntfs_free_mntentbuf(embufp);
210			return (NULL);
211		}
212	} else {
213		embufp = tsd;
214	}
215
216	/* Return the buffer, resizing it if necessary. */
217	if (size > embufp->mbuf_bufsize) {
218		if (embufp->mbuf_buf)
219			free(embufp->mbuf_buf);
220		if ((embufp->mbuf_buf = malloc(size)) == NULL) {
221			embufp->mbuf_bufsize = 0;
222			return (NULL);
223		} else {
224			embufp->mbuf_bufsize = size;
225		}
226	}
227	return (embufp);
228}
229
230/*
231 * The MNTIOC_GETMNTENT command in this release differs from that in early
232 * versions of Solaris 10.
233 *
234 * Previously, the command would copy a pointer to a struct extmnttab to an
235 * address provided as an argument. The pointer would be somewhere within a
236 * mapping already present within the user's address space. In addition, the
237 * text to which the struct's members pointed would also be within a
238 * pre-existing mapping. Now, the user is required to allocate memory for both
239 * the struct and the text buffer, and to pass the address of each within a
240 * struct mntentbuf. In order to conceal these details from a Solaris 10 client
241 * we allocate some thread-local storage in which to create the necessary data
242 * structures; this is static, thread-safe memory that will be cleaned up
243 * without the caller's intervention.
244 *
245 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY are new in this release; they should
246 * not work for older clients.
247 */
248int
249mntfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
250{
251	int err;
252	struct stat statbuf;
253	struct mntentbuf *embufp;
254	static size_t bufsize = MNT_LINE_MAX;
255
256	/* Do not emulate mntfs commands from up-to-date clients. */
257	if (S10_FEATURE_IS_PRESENT(S10_FEATURE_ALTERED_MNTFS_IOCTL))
258		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
259
260	/* Do not emulate mntfs commands directed at other file systems. */
261	if ((err = __systemcall(rval, SYS_fstatat + 1024,
262	    fdes, NULL, &statbuf, 0)) != 0)
263		return (err);
264	if (strcmp(statbuf.st_fstype, MNTTYPE_MNTFS) != 0)
265		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
266
267	if (cmd == MNTIOC_GETEXTMNTENT || cmd == MNTIOC_GETMNTANY)
268		return (EINVAL);
269
270	if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
271		return (ENOMEM);
272
273	/*
274	 * MNTIOC_GETEXTMNTENT advances the file pointer once it has
275	 * successfully copied out the result to the address provided. We
276	 * therefore need to check the user-supplied address now since the
277	 * one we'll be providing is guaranteed to work.
278	 */
279	if (brand_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
280		return (EFAULT);
281
282	/*
283	 * Keep retrying for as long as we fail for want of a large enough
284	 * buffer.
285	 */
286	for (;;) {
287		if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes,
288		    MNTIOC_GETEXTMNTENT, embufp)) != 0)
289			return (err);
290
291		if (rval->sys_rval1 == MNTFS_TOOLONG) {
292			/* The buffer wasn't large enough. */
293			(void) atomic_swap_ulong((unsigned long *)&bufsize,
294			    2 * embufp->mbuf_bufsize);
295			if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
296				return (ENOMEM);
297		} else {
298			break;
299		}
300	}
301
302	if (brand_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
303		return (EFAULT);
304
305	return (0);
306}
307
308/*
309 * Assign the structure member value from the s (source) structure to the
310 * d (dest) structure.
311 */
312#define	struct_assign(d, s, val)	(((d).val) = ((s).val))
313
314/*
315 * The CRYPTO_GET_FUNCTION_LIST parameter structure crypto_function_list_t
316 * changed between S10 and Nevada, so we have to emulate the old S10
317 * crypto_function_list_t structure when interposing on the ioctl syscall.
318 */
319typedef struct s10_crypto_function_list {
320	boolean_t fl_digest_init;
321	boolean_t fl_digest;
322	boolean_t fl_digest_update;
323	boolean_t fl_digest_key;
324	boolean_t fl_digest_final;
325
326	boolean_t fl_encrypt_init;
327	boolean_t fl_encrypt;
328	boolean_t fl_encrypt_update;
329	boolean_t fl_encrypt_final;
330
331	boolean_t fl_decrypt_init;
332	boolean_t fl_decrypt;
333	boolean_t fl_decrypt_update;
334	boolean_t fl_decrypt_final;
335
336	boolean_t fl_mac_init;
337	boolean_t fl_mac;
338	boolean_t fl_mac_update;
339	boolean_t fl_mac_final;
340
341	boolean_t fl_sign_init;
342	boolean_t fl_sign;
343	boolean_t fl_sign_update;
344	boolean_t fl_sign_final;
345	boolean_t fl_sign_recover_init;
346	boolean_t fl_sign_recover;
347
348	boolean_t fl_verify_init;
349	boolean_t fl_verify;
350	boolean_t fl_verify_update;
351	boolean_t fl_verify_final;
352	boolean_t fl_verify_recover_init;
353	boolean_t fl_verify_recover;
354
355	boolean_t fl_digest_encrypt_update;
356	boolean_t fl_decrypt_digest_update;
357	boolean_t fl_sign_encrypt_update;
358	boolean_t fl_decrypt_verify_update;
359
360	boolean_t fl_seed_random;
361	boolean_t fl_generate_random;
362
363	boolean_t fl_session_open;
364	boolean_t fl_session_close;
365	boolean_t fl_session_login;
366	boolean_t fl_session_logout;
367
368	boolean_t fl_object_create;
369	boolean_t fl_object_copy;
370	boolean_t fl_object_destroy;
371	boolean_t fl_object_get_size;
372	boolean_t fl_object_get_attribute_value;
373	boolean_t fl_object_set_attribute_value;
374	boolean_t fl_object_find_init;
375	boolean_t fl_object_find;
376	boolean_t fl_object_find_final;
377
378	boolean_t fl_key_generate;
379	boolean_t fl_key_generate_pair;
380	boolean_t fl_key_wrap;
381	boolean_t fl_key_unwrap;
382	boolean_t fl_key_derive;
383
384	boolean_t fl_init_token;
385	boolean_t fl_init_pin;
386	boolean_t fl_set_pin;
387
388	boolean_t prov_is_hash_limited;
389	uint32_t prov_hash_threshold;
390	uint32_t prov_hash_limit;
391} s10_crypto_function_list_t;
392
393typedef struct s10_crypto_get_function_list {
394	uint_t				fl_return_value;
395	crypto_provider_id_t		fl_provider_id;
396	s10_crypto_function_list_t	fl_list;
397} s10_crypto_get_function_list_t;
398
399/*
400 * The structure returned by the CRYPTO_GET_FUNCTION_LIST ioctl on /dev/crypto
401 * increased in size due to:
402 *	6482533 Threshold for HW offload via PKCS11 interface
403 * between S10 and Nevada.  This is a relatively simple process of filling
404 * in the S10 structure fields with the Nevada data.
405 *
406 * We stat the device to make sure that the ioctl is meant for /dev/crypto.
407 *
408 */
409static int
410crypto_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
411{
412	int				err;
413	s10_crypto_get_function_list_t	s10_param;
414	crypto_get_function_list_t	native_param;
415	static dev_t			crypto_dev = (dev_t)-1;
416	struct stat			sbuf;
417
418	if (crypto_dev == (dev_t)-1) {
419		if ((err = __systemcall(rval, SYS_fstatat + 1024,
420		    AT_FDCWD, "/dev/crypto", &sbuf, 0)) != 0)
421			goto nonemuioctl;
422		crypto_dev = major(sbuf.st_rdev);
423	}
424	if ((err = __systemcall(rval, SYS_fstatat + 1024,
425	    fdes, NULL, &sbuf, 0)) != 0)
426		return (err);
427	/* Each open fd of /dev/crypto gets a new minor device. */
428	if (major(sbuf.st_rdev) != crypto_dev)
429		goto nonemuioctl;
430
431	if (brand_uucopy((const void *)arg, &s10_param, sizeof (s10_param))
432	    != 0)
433		return (EFAULT);
434	struct_assign(native_param, s10_param, fl_provider_id);
435	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd,
436	    &native_param)) != 0)
437		return (err);
438
439	struct_assign(s10_param, native_param, fl_return_value);
440	struct_assign(s10_param, native_param, fl_provider_id);
441
442	struct_assign(s10_param, native_param, fl_list.fl_digest_init);
443	struct_assign(s10_param, native_param, fl_list.fl_digest);
444	struct_assign(s10_param, native_param, fl_list.fl_digest_update);
445	struct_assign(s10_param, native_param, fl_list.fl_digest_key);
446	struct_assign(s10_param, native_param, fl_list.fl_digest_final);
447
448	struct_assign(s10_param, native_param, fl_list.fl_encrypt_init);
449	struct_assign(s10_param, native_param, fl_list.fl_encrypt);
450	struct_assign(s10_param, native_param, fl_list.fl_encrypt_update);
451	struct_assign(s10_param, native_param, fl_list.fl_encrypt_final);
452
453	struct_assign(s10_param, native_param, fl_list.fl_decrypt_init);
454	struct_assign(s10_param, native_param, fl_list.fl_decrypt);
455	struct_assign(s10_param, native_param, fl_list.fl_decrypt_update);
456	struct_assign(s10_param, native_param, fl_list.fl_decrypt_final);
457
458	struct_assign(s10_param, native_param, fl_list.fl_mac_init);
459	struct_assign(s10_param, native_param, fl_list.fl_mac);
460	struct_assign(s10_param, native_param, fl_list.fl_mac_update);
461	struct_assign(s10_param, native_param, fl_list.fl_mac_final);
462
463	struct_assign(s10_param, native_param, fl_list.fl_sign_init);
464	struct_assign(s10_param, native_param, fl_list.fl_sign);
465	struct_assign(s10_param, native_param, fl_list.fl_sign_update);
466	struct_assign(s10_param, native_param, fl_list.fl_sign_final);
467	struct_assign(s10_param, native_param, fl_list.fl_sign_recover_init);
468	struct_assign(s10_param, native_param, fl_list.fl_sign_recover);
469
470	struct_assign(s10_param, native_param, fl_list.fl_verify_init);
471	struct_assign(s10_param, native_param, fl_list.fl_verify);
472	struct_assign(s10_param, native_param, fl_list.fl_verify_update);
473	struct_assign(s10_param, native_param, fl_list.fl_verify_final);
474	struct_assign(s10_param, native_param, fl_list.fl_verify_recover_init);
475	struct_assign(s10_param, native_param, fl_list.fl_verify_recover);
476
477	struct_assign(s10_param, native_param,
478	    fl_list.fl_digest_encrypt_update);
479	struct_assign(s10_param, native_param,
480	    fl_list.fl_decrypt_digest_update);
481	struct_assign(s10_param, native_param, fl_list.fl_sign_encrypt_update);
482	struct_assign(s10_param, native_param,
483	    fl_list.fl_decrypt_verify_update);
484
485	struct_assign(s10_param, native_param, fl_list.fl_seed_random);
486	struct_assign(s10_param, native_param, fl_list.fl_generate_random);
487
488	struct_assign(s10_param, native_param, fl_list.fl_session_open);
489	struct_assign(s10_param, native_param, fl_list.fl_session_close);
490	struct_assign(s10_param, native_param, fl_list.fl_session_login);
491	struct_assign(s10_param, native_param, fl_list.fl_session_logout);
492
493	struct_assign(s10_param, native_param, fl_list.fl_object_create);
494	struct_assign(s10_param, native_param, fl_list.fl_object_copy);
495	struct_assign(s10_param, native_param, fl_list.fl_object_destroy);
496	struct_assign(s10_param, native_param, fl_list.fl_object_get_size);
497	struct_assign(s10_param, native_param,
498	    fl_list.fl_object_get_attribute_value);
499	struct_assign(s10_param, native_param,
500	    fl_list.fl_object_set_attribute_value);
501	struct_assign(s10_param, native_param, fl_list.fl_object_find_init);
502	struct_assign(s10_param, native_param, fl_list.fl_object_find);
503	struct_assign(s10_param, native_param, fl_list.fl_object_find_final);
504
505	struct_assign(s10_param, native_param, fl_list.fl_key_generate);
506	struct_assign(s10_param, native_param, fl_list.fl_key_generate_pair);
507	struct_assign(s10_param, native_param, fl_list.fl_key_wrap);
508	struct_assign(s10_param, native_param, fl_list.fl_key_unwrap);
509	struct_assign(s10_param, native_param, fl_list.fl_key_derive);
510
511	struct_assign(s10_param, native_param, fl_list.fl_init_token);
512	struct_assign(s10_param, native_param, fl_list.fl_init_pin);
513	struct_assign(s10_param, native_param, fl_list.fl_set_pin);
514
515	struct_assign(s10_param, native_param, fl_list.prov_is_hash_limited);
516	struct_assign(s10_param, native_param, fl_list.prov_hash_threshold);
517	struct_assign(s10_param, native_param, fl_list.prov_hash_limit);
518
519	return (brand_uucopy(&s10_param, (void *)arg, sizeof (s10_param)));
520
521nonemuioctl:
522	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
523}
524
525/*
526 * The process contract CT_TGET and CT_TSET parameter structure ct_param_t
527 * changed between S10 and Nevada, so we have to emulate the old S10
528 * ct_param_t structure when interposing on the ioctl syscall.
529 */
530typedef struct s10_ct_param {
531	uint32_t ctpm_id;
532	uint32_t ctpm_pad;
533	uint64_t ctpm_value;
534} s10_ct_param_t;
535
536/*
537 * We have to emulate process contract ioctls for init(1M) because the
538 * ioctl parameter structure changed between S10 and Nevada.  This is
539 * a relatively simple process of filling Nevada structure fields,
540 * shuffling values, and initiating a native system call.
541 *
542 * For now, we'll assume that all consumers of CT_TGET and CT_TSET will
543 * need emulation.  We'll issue a stat to make sure that the ioctl
544 * is meant for the contract file system.
545 *
546 */
547static int
548ctfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
549{
550	int err;
551	s10_ct_param_t s10param;
552	ct_param_t param;
553	struct stat statbuf;
554
555	if ((err = __systemcall(rval, SYS_fstatat + 1024,
556	    fdes, NULL, &statbuf, 0)) != 0)
557		return (err);
558	if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0)
559		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
560
561	if (brand_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0)
562		return (EFAULT);
563	param.ctpm_id = s10param.ctpm_id;
564	param.ctpm_size = sizeof (uint64_t);
565	param.ctpm_value = &s10param.ctpm_value;
566	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &param))
567	    != 0)
568		return (err);
569
570	if (cmd == CT_TGET)
571		return (brand_uucopy(&s10param, (void *)arg,
572		    sizeof (s10param)));
573
574	return (0);
575}
576
577/*
578 * This is S10u8's zinject_record_t structure.
579 */
580typedef struct s10_zinject_record_u8 {
581	uint64_t	zi_objset;
582	uint64_t	zi_object;
583	uint64_t	zi_start;
584	uint64_t	zi_end;
585	uint64_t	zi_guid;
586	uint32_t	zi_level;
587	uint32_t	zi_error;
588	uint64_t	zi_type;
589	uint32_t	zi_freq;
590	/* Solaris Next added zi_failfast here */
591	/* Solaris Next added zi_func here */
592	/* Solaris Next added zi_iotype here */
593	uint32_t	zi_pad;		/* 64-bit alignment; renamed to */
594					/* zi_duration in Solaris Next */
595	/* Solaris Next added zi_timer here */
596} s10_zinject_record_u8_t;
597
598/*
599 * This is S10u8's zfs_cmd_t structure, which is used by ZFS ioctls.
600 */
601typedef struct s10_zfs_cmd_u8 {
602	char		zc_name[MAXPATHLEN];
603	char		zc_value[MAXPATHLEN * 2];
604	char		zc_string[MAXNAMELEN];
605	/* Solaris Next added zc_top_ds here */
606	uint64_t	zc_guid;
607	uint64_t	zc_nvlist_conf;		/* really (char *) */
608	uint64_t	zc_nvlist_conf_size;
609	uint64_t	zc_nvlist_src;		/* really (char *) */
610	uint64_t	zc_nvlist_src_size;
611	uint64_t	zc_nvlist_dst;		/* really (char *) */
612	uint64_t	zc_nvlist_dst_size;
613	uint64_t	zc_cookie;
614	uint64_t	zc_objset_type;
615	uint64_t	zc_perm_action;
616	uint64_t 	zc_history;		/* really (char *) */
617	uint64_t 	zc_history_len;
618	uint64_t	zc_history_offset;
619	uint64_t	zc_obj;
620	/* Solaris Next added zc_iflags member here */
621	zfs_share_t	zc_share;
622	dmu_objset_stats_t zc_objset_stats;
623	struct drr_begin zc_begin_record;
624	s10_zinject_record_u8_t zc_inject_record;
625	/* Solaris Next added zc_defer_destroy here */
626	/* Solaris Next added zc_temphold here */
627} s10_zfs_cmd_u8_t;
628
629/*
630 * Solaris Next removed these ZFS ioctls.
631 */
632#define	S10_ZFS_IOC_CREATE_MINOR	ZFS_IOC_CREATE
633#define	S10_ZFS_IOC_REMOVE_MINOR	ZFS_IOC_DESTROY
634#define	S10_ZFS_IOC_ISCSI_PERM_CHECK	ZFS_IOC_SMB_ACL
635
636/*
637 * ZFS ioctls changed between Solaris 10 (S10) and Solaris Next (S.Next).
638 * This emulation function translates S10 ZFS ioctls into their S.Next
639 * counterparts (and vice versa).
640 */
641static int
642zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
643{
644	int				err;
645	s10_zfs_cmd_u8_t		s10_param;
646	zfs_cmd_t			native_param;
647	static dev_t			zfs_dev = (dev_t)-1;
648	struct stat			sbuf;
649
650	/*
651	 * Ensure that the ioctl is targeting the ZFS device, /dev/zfs.
652	 * If it isn't, then s10_ioctl() mistook the ioctl for a ZFS ioctl.
653	 * In that case, it doesn't need to be emulated, so we pass it to the
654	 * kernel.
655	 */
656	if (zfs_dev == (dev_t)-1) {
657		if ((err = __systemcall(rval, SYS_fstatat + 1024,
658		    AT_FDCWD, ZFS_DEV, &sbuf, 0) != 0) != 0)
659			goto passthruioctl;
660		zfs_dev = major(sbuf.st_rdev);
661	}
662	if ((err = __systemcall(rval, SYS_fstatat + 1024,
663	    fdes, NULL, &sbuf, 0)) != 0)
664		return (err);
665	if (major(sbuf.st_rdev) != zfs_dev)
666		goto passthruioctl;
667
668	/*
669	 * S.Next removed the ZFS_IOC_CREATE_MINOR, ZFS_IOC_REMOVE_MINOR, and
670	 * ZFS_IOC_ISCSI_PERM_CHECK ioctl commands, which were defined in the
671	 * middle of zfs_ioc_t.  This means that all ZFS ioctls numbers greater
672	 * than ZFS_IOC_REMOVE_MINOR must be decremented by two and those
673	 * greater than ZFS_IOC_ISCSI_PERM_CHECK must be decremented by three.
674	 * We'll return EPERM when the ioctl is ZFS_IOC_CREATE_MINOR or
675	 * ZFS_IOC_REMOVE_MINOR because that's what ZFS does inside native S10
676	 * zones.  We'll also return EPERM when the ioctl is
677	 * ZFS_IOC_ISCSI_PERM_CHECK.
678	 */
679	if (cmd == S10_ZFS_IOC_CREATE_MINOR ||
680	    cmd == S10_ZFS_IOC_REMOVE_MINOR ||
681	    cmd == S10_ZFS_IOC_ISCSI_PERM_CHECK)
682		return (EPERM);
683	if (cmd > S10_ZFS_IOC_ISCSI_PERM_CHECK)
684		cmd -= 3;
685	else if (cmd > S10_ZFS_IOC_REMOVE_MINOR)
686		cmd -= 2;
687
688	/*
689	 * S10u9's ZFS ioctls are compatible with their S.Next
690	 * counterparts (modulo some removed ioctl command numbers;
691	 * see above); consequently, we can pass the ioctl's
692	 * zfs_cmd_t structure to the S.Next kernel without modifying
693	 * it if the process is running in an S10u9 environment.
694	 * (S10_FEATURE_U9_ZFS_IOCTL indicates an S10u9 environment.)
695	 */
696	if (S10_FEATURE_IS_PRESENT(S10_FEATURE_U9_ZFS_IOCTL))
697		goto passthruioctl;
698
699	/*
700	 * The process is running in an S10u8 environment.
701	 * Copy the S10 process' ioctl structure to the stack.  We'll
702	 * copy the individual fields to a Solaris Next ZFS ioctl
703	 * structure.
704	 */
705	if (brand_uucopy((const void *)arg, &s10_param, sizeof (s10_param))
706	    != 0)
707		return (EFAULT);
708
709	/*
710	 * Copy fields from the S10 ioctl structure on the stack to the
711	 * Solaris Next ioctl structure.
712	 */
713	bcopy(s10_param.zc_name, native_param.zc_name,
714	    sizeof (native_param.zc_name));
715	bcopy(s10_param.zc_value, native_param.zc_value,
716	    sizeof (native_param.zc_value));
717	bcopy(s10_param.zc_string, native_param.zc_string,
718	    sizeof (native_param.zc_string));
719	struct_assign(native_param, s10_param, zc_guid);
720	struct_assign(native_param, s10_param, zc_nvlist_conf);
721	struct_assign(native_param, s10_param, zc_nvlist_conf_size);
722	struct_assign(native_param, s10_param, zc_nvlist_src);
723	struct_assign(native_param, s10_param, zc_nvlist_src_size);
724	struct_assign(native_param, s10_param, zc_nvlist_dst);
725	struct_assign(native_param, s10_param, zc_nvlist_dst_size);
726	struct_assign(native_param, s10_param, zc_cookie);
727	struct_assign(native_param, s10_param, zc_objset_type);
728	struct_assign(native_param, s10_param, zc_perm_action);
729	struct_assign(native_param, s10_param, zc_history);
730	struct_assign(native_param, s10_param, zc_history_len);
731	struct_assign(native_param, s10_param, zc_history_offset);
732	struct_assign(native_param, s10_param, zc_obj);
733	struct_assign(native_param, s10_param, zc_share);
734	struct_assign(native_param, s10_param, zc_objset_stats);
735	struct_assign(native_param, s10_param, zc_begin_record);
736	struct_assign(native_param, s10_param, zc_inject_record.zi_objset);
737	struct_assign(native_param, s10_param, zc_inject_record.zi_object);
738	struct_assign(native_param, s10_param, zc_inject_record.zi_start);
739	struct_assign(native_param, s10_param, zc_inject_record.zi_end);
740	struct_assign(native_param, s10_param, zc_inject_record.zi_guid);
741	struct_assign(native_param, s10_param, zc_inject_record.zi_level);
742	struct_assign(native_param, s10_param, zc_inject_record.zi_error);
743	struct_assign(native_param, s10_param, zc_inject_record.zi_type);
744	struct_assign(native_param, s10_param, zc_inject_record.zi_freq);
745
746	/*
747	 * Fill Solaris Next fields that aren't in S10u8 with sensible values.
748	 */
749	native_param.zc_top_ds[0] = '\0';
750	native_param.zc_iflags = 0;
751	native_param.zc_inject_record.zi_failfast = B_FALSE;
752	native_param.zc_inject_record.zi_func[0] = '\0';
753	native_param.zc_inject_record.zi_iotype = ZIO_TYPES;
754	native_param.zc_inject_record.zi_duration = 0;
755	native_param.zc_inject_record.zi_timer = 0;
756	native_param.zc_defer_destroy = B_FALSE;
757	native_param.zc_temphold = B_FALSE;
758
759	/*
760	 * Issue a native ZFS ioctl using the Solaris Next ioctl structure.
761	 */
762	err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param);
763
764	/*
765	 * Copy fields shared by both S10 and Solaris Next ioctl structures
766	 * from the Solaris Next structure to the S10 structure.
767	 */
768	bcopy(native_param.zc_name, s10_param.zc_name,
769	    sizeof (s10_param.zc_name));
770	bcopy(native_param.zc_value, s10_param.zc_value,
771	    sizeof (s10_param.zc_value));
772	bcopy(native_param.zc_string, s10_param.zc_string,
773	    sizeof (s10_param.zc_string));
774	struct_assign(s10_param, native_param, zc_guid);
775	struct_assign(s10_param, native_param, zc_nvlist_conf);
776	struct_assign(s10_param, native_param, zc_nvlist_conf_size);
777	struct_assign(s10_param, native_param, zc_nvlist_src);
778	struct_assign(s10_param, native_param, zc_nvlist_src_size);
779	struct_assign(s10_param, native_param, zc_nvlist_dst);
780	struct_assign(s10_param, native_param, zc_nvlist_dst_size);
781	struct_assign(s10_param, native_param, zc_cookie);
782	struct_assign(s10_param, native_param, zc_objset_type);
783	struct_assign(s10_param, native_param, zc_perm_action);
784	struct_assign(s10_param, native_param, zc_history);
785	struct_assign(s10_param, native_param, zc_history_len);
786	struct_assign(s10_param, native_param, zc_history_offset);
787	struct_assign(s10_param, native_param, zc_obj);
788	struct_assign(s10_param, native_param, zc_share);
789	struct_assign(s10_param, native_param, zc_objset_stats);
790	struct_assign(s10_param, native_param, zc_begin_record);
791	struct_assign(s10_param, native_param, zc_inject_record.zi_objset);
792	struct_assign(s10_param, native_param, zc_inject_record.zi_object);
793	struct_assign(s10_param, native_param, zc_inject_record.zi_start);
794	struct_assign(s10_param, native_param, zc_inject_record.zi_end);
795	struct_assign(s10_param, native_param, zc_inject_record.zi_guid);
796	struct_assign(s10_param, native_param, zc_inject_record.zi_level);
797	struct_assign(s10_param, native_param, zc_inject_record.zi_error);
798	struct_assign(s10_param, native_param, zc_inject_record.zi_type);
799	struct_assign(s10_param, native_param, zc_inject_record.zi_freq);
800
801	/*
802	 * Copy the S10 structure from the stack to the location
803	 * specified by the S10 process.
804	 */
805	(void) brand_uucopy(&s10_param, (void *)arg, sizeof (s10_param));
806	return (err);
807
808passthruioctl:
809	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
810}
811
812int
813s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
814{
815	switch (cmd) {
816	case CRYPTO_GET_FUNCTION_LIST:
817		return (crypto_ioctl(rval, fdes, cmd, arg));
818	case CT_TGET:
819		/*FALLTHRU*/
820	case CT_TSET:
821		return (ctfs_ioctl(rval, fdes, cmd, arg));
822	case MNTIOC_GETMNTENT:
823		/*FALLTHRU*/
824	case MNTIOC_GETEXTMNTENT:
825		/*FALLTHRU*/
826	case MNTIOC_GETMNTANY:
827		return (mntfs_ioctl(rval, fdes, cmd, arg));
828	}
829
830	if ((cmd & 0xff00) == ZFS_IOC)
831		return (zfs_ioctl(rval, fdes, cmd, arg));
832
833	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
834}
835
836/*
837 * Unfortunately, pwrite()'s behavior differs between S10 and Nevada when
838 * applied to files opened with O_APPEND.  The offset argument is ignored and
839 * the buffer is appended to the target file in S10, whereas the current file
840 * position is ignored in Nevada (i.e., pwrite() acts as though the target file
841 * wasn't opened with O_APPEND).  This is a result of the fix for CR 6655660
842 * (pwrite() must ignore the O_APPEND/FAPPEND flag).
843 *
844 * We emulate the old S10 pwrite() behavior by checking whether the target file
845 * was opened with O_APPEND.  If it was, then invoke the write() system call
846 * instead of pwrite(); otherwise, invoke the pwrite() system call as usual.
847 */
848static int
849s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes,
850    off_t offset)
851{
852	int err;
853
854	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
855		return (err);
856	if (rval->sys_rval1 & O_APPEND)
857		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
858		    num_bytes));
859	return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes,
860	    offset));
861}
862
863#if !defined(_LP64)
864/*
865 * This is the large file version of the pwrite() system call for 32-bit
866 * processes.  This exists for the same reason that s10_pwrite() exists; see
867 * the comment above s10_pwrite().
868 */
869static int
870s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes,
871    uint32_t offset_1, uint32_t offset_2)
872{
873	int err;
874
875	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
876		return (err);
877	if (rval->sys_rval1 & O_APPEND)
878		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
879		    num_bytes));
880	return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp,
881	    num_bytes, offset_1, offset_2));
882}
883#endif	/* !_LP64 */
884
885/*
886 * These are convenience macros that s10_getdents_common() uses.  Both treat
887 * their arguments, which should be character pointers, as dirent pointers or
888 * dirent64 pointers and yield their d_name and d_reclen fields.  These
889 * macros shouldn't be used outside of s10_getdents_common().
890 */
891#define	dirent_name(charptr)	((charptr) + name_offset)
892#define	dirent_reclen(charptr)	\
893	(*(unsigned short *)(uintptr_t)((charptr) + reclen_offset))
894
895/*
896 * This function contains code that is common to both s10_getdents() and
897 * s10_getdents64().  See the comment above s10_getdents() for details.
898 *
899 * rval, fd, buf, and nbyte should be passed unmodified from s10_getdents()
900 * and s10_getdents64().  getdents_syscall_id should be either SYS_getdents
901 * or SYS_getdents64.  name_offset should be the the byte offset of
902 * the d_name field in the dirent structures passed to the kernel via the
903 * syscall represented by getdents_syscall_id.  reclen_offset should be
904 * the byte offset of the d_reclen field in the aforementioned dirent
905 * structures.
906 */
907static int
908s10_getdents_common(sysret_t *rval, int fd, char *buf, size_t nbyte,
909    int getdents_syscall_id, size_t name_offset, size_t reclen_offset)
910{
911	int err;
912	size_t buf_size;
913	char *local_buf;
914	char *buf_current;
915
916	/*
917	 * Use a special brand operation, B_S10_ISFDXATTRDIR, to determine
918	 * whether the specified file descriptor refers to an extended file
919	 * attribute directory.  If it doesn't, then SYS_getdents won't
920	 * reveal extended file attributes, in which case we can simply
921	 * hand the syscall to the native kernel.
922	 */
923	if ((err = __systemcall(rval, SYS_brand + 1024, B_S10_ISFDXATTRDIR,
924	    fd)) != 0)
925		return (err);
926	if (rval->sys_rval1 == 0)
927		return (__systemcall(rval, getdents_syscall_id + 1024, fd, buf,
928		    nbyte));
929
930	/*
931	 * The file descriptor refers to an extended file attributes directory.
932	 * We need to create a dirent buffer that's as large as buf into which
933	 * the native SYS_getdents will store the special extended file
934	 * attribute directory's entries.  We can't dereference buf because
935	 * it might be an invalid pointer!
936	 */
937	if (nbyte > MAXGETDENTS_SIZE)
938		nbyte = MAXGETDENTS_SIZE;
939	local_buf = (char *)malloc(nbyte);
940	if (local_buf == NULL) {
941		/*
942		 * getdents(2) doesn't return an error code indicating a memory
943		 * allocation error and it doesn't make sense to return any of
944		 * its documented error codes for a malloc(3C) failure.  We'll
945		 * use ENOMEM even though getdents(2) doesn't use it because it
946		 * best describes the failure.
947		 */
948		(void) B_TRUSS_POINT_3(rval, getdents_syscall_id, ENOMEM, fd,
949		    buf, nbyte);
950		rval->sys_rval1 = -1;
951		rval->sys_rval2 = 0;
952		return (EIO);
953	}
954
955	/*
956	 * Issue a native SYS_getdents syscall but use our local dirent buffer
957	 * instead of buf.  This will allow us to examine the returned dirent
958	 * structures immediately and copy them to buf later.  That way the
959	 * calling process won't be able to see the dirent structures until
960	 * we finish examining them.
961	 */
962	if ((err = __systemcall(rval, getdents_syscall_id + 1024, fd, local_buf,
963	    nbyte)) != 0) {
964		free(local_buf);
965		return (err);
966	}
967	buf_size = rval->sys_rval1;
968	if (buf_size == 0) {
969		free(local_buf);
970		return (0);
971	}
972
973	/*
974	 * Look for SUNWattr_ro (VIEW_READONLY) and SUNWattr_rw
975	 * (VIEW_READWRITE) in the directory entries and remove them
976	 * from the dirent buffer.
977	 */
978	for (buf_current = local_buf;
979	    (size_t)(buf_current - local_buf) < buf_size; /* cstyle */) {
980		if (strcmp(dirent_name(buf_current), VIEW_READONLY) != 0 &&
981		    strcmp(dirent_name(buf_current), VIEW_READWRITE) != 0) {
982			/*
983			 * The dirent refers to an attribute that should
984			 * be visible to Solaris 10 processes.  Keep it
985			 * and examine the next entry in the buffer.
986			 */
987			buf_current += dirent_reclen(buf_current);
988		} else {
989			/*
990			 * We found either SUNWattr_ro (VIEW_READONLY)
991			 * or SUNWattr_rw (VIEW_READWRITE).  Remove it
992			 * from the dirent buffer by decrementing
993			 * buf_size by the size of the entry and
994			 * overwriting the entry with the remaining
995			 * entries.
996			 */
997			buf_size -= dirent_reclen(buf_current);
998			(void) memmove(buf_current, buf_current +
999			    dirent_reclen(buf_current), buf_size -
1000			    (size_t)(buf_current - local_buf));
1001		}
1002	}
1003
1004	/*
1005	 * Copy local_buf into buf so that the calling process can see
1006	 * the results.
1007	 */
1008	if ((err = brand_uucopy(local_buf, buf, buf_size)) != 0) {
1009		free(local_buf);
1010		rval->sys_rval1 = -1;
1011		rval->sys_rval2 = 0;
1012		return (err);
1013	}
1014	rval->sys_rval1 = buf_size;
1015	free(local_buf);
1016	return (0);
1017}
1018
1019/*
1020 * Solaris Next added two special extended file attributes, SUNWattr_ro and
1021 * SUNWattr_rw, which are called "extended system attributes".  They have
1022 * special semantics (e.g., a process cannot unlink SUNWattr_ro) and should
1023 * not appear in solaris10-branded zones because no Solaris 10 applications,
1024 * including system commands such as tar(1), are coded to correctly handle these
1025 * special attributes.
1026 *
1027 * This emulation function solves the aforementioned problem by emulating
1028 * the getdents(2) syscall and filtering both system attributes out of resulting
1029 * directory entry lists.  The emulation function only filters results when
1030 * the given file descriptor refers to an extended file attribute directory.
1031 * Filtering getdents(2) results is expensive because it requires dynamic
1032 * memory allocation; however, the performance cost is tolerable because
1033 * we don't expect Solaris 10 processes to frequently examine extended file
1034 * attribute directories.
1035 *
1036 * The brand's emulation library needs two getdents(2) emulation functions
1037 * because getdents(2) comes in two flavors: non-largefile-aware getdents(2)
1038 * and largefile-aware getdents64(2).  s10_getdents() handles the non-largefile-
1039 * aware case for 32-bit processes and all getdents(2) syscalls for 64-bit
1040 * processes (64-bit processes use largefile-aware interfaces by default).
1041 * See s10_getdents64() below for the largefile-aware getdents64(2) emulation
1042 * function for 32-bit processes.
1043 */
1044static int
1045s10_getdents(sysret_t *rval, int fd, struct dirent *buf, size_t nbyte)
1046{
1047	return (s10_getdents_common(rval, fd, (char *)buf, nbyte, SYS_getdents,
1048	    offsetof(struct dirent, d_name),
1049	    offsetof(struct dirent, d_reclen)));
1050}
1051
1052#ifndef	_LP64
1053/*
1054 * This is the largefile-aware version of getdents(2) for 32-bit processes.
1055 * This exists for the same reason that s10_getdents() exists.  See the comment
1056 * above s10_getdents().
1057 */
1058static int
1059s10_getdents64(sysret_t *rval, int fd, struct dirent64 *buf, size_t nbyte)
1060{
1061	return (s10_getdents_common(rval, fd, (char *)buf, nbyte,
1062	    SYS_getdents64, offsetof(struct dirent64, d_name),
1063	    offsetof(struct dirent64, d_reclen)));
1064}
1065#endif	/* !_LP64 */
1066
1067#define	S10_AC_PROC		(0x1 << 28)
1068#define	S10_AC_TASK		(0x2 << 28)
1069#define	S10_AC_FLOW		(0x4 << 28)
1070#define	S10_AC_MODE(x)		((x) & 0xf0000000)
1071#define	S10_AC_OPTION(x)	((x) & 0x0fffffff)
1072
1073/*
1074 * The mode shift, mode mask and option mask for acctctl have changed.  The
1075 * mode is currently the top full byte and the option is the lower 3 full bytes.
1076 */
1077int
1078s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz)
1079{
1080	int mode = S10_AC_MODE(cmd);
1081	int option = S10_AC_OPTION(cmd);
1082
1083	switch (mode) {
1084	case S10_AC_PROC:
1085		mode = AC_PROC;
1086		break;
1087	case S10_AC_TASK:
1088		mode = AC_TASK;
1089		break;
1090	case S10_AC_FLOW:
1091		mode = AC_FLOW;
1092		break;
1093	default:
1094		return (B_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf,
1095		    bufsz));
1096	}
1097
1098	return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf,
1099	    bufsz));
1100}
1101
1102/*
1103 * The Audit Policy parameters have changed due to:
1104 *    6466722 audituser and AUDIT_USER are defined, unused, undocumented and
1105 *            should be removed.
1106 *
1107 * In S10 we had the following flag:
1108 *	#define AUDIT_USER 0x0040
1109 * which doesn't exist in Solaris Next where the subsequent flags are shifted
1110 * down.  For example, in S10 we had:
1111 *	#define AUDIT_GROUP     0x0080
1112 * but on Solaris Next we have:
1113 *	#define AUDIT_GROUP     0x0040
1114 * AUDIT_GROUP has the value AUDIT_USER had in S10 and all of the subsequent
1115 * bits are also shifted one place.
1116 *
1117 * When we're getting or setting the Audit Policy parameters we need to
1118 * shift the outgoing or incoming bits into their proper positions.  Since
1119 * S10_AUDIT_USER was always unused, we always clear that bit on A_GETPOLICY.
1120 *
1121 * The command we care about, BSM_AUDITCTL, passes the most parameters (3),
1122 * so declare this function to take up to 4 args and just pass them on.
1123 * The number of parameters for s10_auditsys needs to be equal to the BSM_*
1124 * subcommand that has the most parameters, since we want to pass all
1125 * parameters through, regardless of which subcommands we interpose on.
1126 *
1127 * Note that the auditsys system call uses the SYSENT_AP macro wrapper instead
1128 * of the more common SYSENT_CI macro.  This means the return value is a
1129 * SE_64RVAL so the syscall table uses RV_64RVAL.
1130 */
1131
1132#define	S10_AUDIT_HMASK	0xffffffc0
1133#define	S10_AUDIT_LMASK	0x3f
1134#define	S10_AUC_NOSPACE	0x3
1135
1136int
1137s10_auditsys(sysret_t *rval, int bsmcmd, intptr_t a0, intptr_t a1, intptr_t a2)
1138{
1139	int	    err;
1140	uint32_t    m;
1141
1142	if (bsmcmd != BSM_AUDITCTL)
1143		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1,
1144		    a2));
1145
1146	if ((int)a0 == A_GETPOLICY) {
1147		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
1148		    &m, a2)) != 0)
1149			return (err);
1150		m = ((m & S10_AUDIT_HMASK) << 1) | (m & S10_AUDIT_LMASK);
1151		if (brand_uucopy(&m, (void *)a1, sizeof (m)) != 0)
1152			return (EFAULT);
1153		return (0);
1154
1155	} else if ((int)a0 == A_SETPOLICY) {
1156		if (brand_uucopy((const void *)a1, &m, sizeof (m)) != 0)
1157			return (EFAULT);
1158		m = ((m >> 1) & S10_AUDIT_HMASK) | (m & S10_AUDIT_LMASK);
1159		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
1160		    a2));
1161	} else if ((int)a0 == A_GETCOND) {
1162		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
1163		    &m, a2)) != 0)
1164			return (err);
1165		if (m == AUC_NOSPACE)
1166			m = S10_AUC_NOSPACE;
1167		if (brand_uucopy(&m, (void *)a1, sizeof (m)) != 0)
1168			return (EFAULT);
1169		return (0);
1170	} else if ((int)a0 == A_SETCOND) {
1171		if (brand_uucopy((const void *)a1, &m, sizeof (m)) != 0)
1172			return (EFAULT);
1173		if (m == S10_AUC_NOSPACE)
1174			m = AUC_NOSPACE;
1175		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
1176		    a2));
1177	}
1178
1179	return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1, a2));
1180}
1181
1182/*
1183 * Determine whether the executable passed to SYS_exec or SYS_execve is a
1184 * native executable.  The s10_npreload.so invokes the B_S10_NATIVE brand
1185 * operation which patches up the processes exec info to eliminate any trace
1186 * of the wrapper.  That will make pgrep and other commands that examine
1187 * process' executable names and command-line parameters work properly.
1188 */
1189static int
1190s10_exec_native(sysret_t *rval, const char *fname, const char **argp,
1191    const char **envp)
1192{
1193	const char *filename = fname;
1194	char path[64];
1195	int err;
1196
1197	/* Get a copy of the executable we're trying to run */
1198	path[0] = '\0';
1199	(void) brand_uucopystr(filename, path, sizeof (path));
1200
1201	/* Check if we're trying to run a native binary */
1202	if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native",
1203	    sizeof (path)) != 0)
1204		return (0);
1205
1206	/* Skip the first element in the argv array */
1207	argp++;
1208
1209	/*
1210	 * The the path of the dynamic linker is the second parameter
1211	 * of s10_native_exec().
1212	 */
1213	if (brand_uucopy(argp, &filename, sizeof (char *)) != 0)
1214		return (EFAULT);
1215
1216	/* If an exec call succeeds, it never returns */
1217	err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename,
1218	    argp, envp, NULL, NULL, NULL);
1219	brand_assert(err != 0);
1220	return (err);
1221}
1222
1223/*
1224 * Interpose on the SYS_exec syscall to detect native wrappers.
1225 */
1226int
1227s10_exec(sysret_t *rval, const char *fname, const char **argp)
1228{
1229	int err;
1230
1231	if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0)
1232		return (err);
1233
1234	/* If an exec call succeeds, it never returns */
1235	err = __systemcall(rval, SYS_execve + 1024, fname, argp, NULL);
1236	brand_assert(err != 0);
1237	return (err);
1238}
1239
1240/*
1241 * Interpose on the SYS_execve syscall to detect native wrappers.
1242 */
1243int
1244s10_execve(sysret_t *rval, const char *fname, const char **argp,
1245    const char **envp)
1246{
1247	int err;
1248
1249	if ((err = s10_exec_native(rval, fname, argp, envp)) != 0)
1250		return (err);
1251
1252	/* If an exec call succeeds, it never returns */
1253	err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp);
1254	brand_assert(err != 0);
1255	return (err);
1256}
1257
1258/*
1259 * S10's issetugid() syscall is now a subcode to privsys().
1260 */
1261static int
1262s10_issetugid(sysret_t *rval)
1263{
1264	return (__systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID,
1265	    0, 0, 0, 0, 0));
1266}
1267
1268static long
1269s10_uname(sysret_t *rv, uintptr_t p1)
1270{
1271	struct utsname un, *unp = (struct utsname *)p1;
1272	int rev, err;
1273
1274	if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0)
1275		return (err);
1276
1277	rev = atoi(&un.release[2]);
1278	brand_assert(rev >= 11);
1279	bzero(un.release, _SYS_NMLN);
1280	(void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN);
1281	bzero(un.version, _SYS_NMLN);
1282	(void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN);
1283
1284	/* copy out the modified uname info */
1285	return (brand_uucopy(&un, unp, sizeof (un)));
1286}
1287
1288int
1289s10_sysconfig(sysret_t *rv, int which)
1290{
1291	long value;
1292
1293	/*
1294	 * We must interpose on the sysconfig(2) requests
1295	 * that deal with the realtime signal number range.
1296	 * All others get passed to the native sysconfig(2).
1297	 */
1298	switch (which) {
1299	case _CONFIG_RTSIG_MAX:
1300		value = S10_SIGRTMAX - S10_SIGRTMIN + 1;
1301		break;
1302	case _CONFIG_SIGRT_MIN:
1303		value = S10_SIGRTMIN;
1304		break;
1305	case _CONFIG_SIGRT_MAX:
1306		value = S10_SIGRTMAX;
1307		break;
1308	default:
1309		return (__systemcall(rv, SYS_sysconfig + 1024, which));
1310	}
1311
1312	(void) B_TRUSS_POINT_1(rv, SYS_sysconfig, 0, which);
1313	rv->sys_rval1 = value;
1314	rv->sys_rval2 = 0;
1315
1316	return (0);
1317}
1318
1319int
1320s10_sysinfo(sysret_t *rv, int command, char *buf, long count)
1321{
1322	char *value;
1323	int len;
1324
1325	/*
1326	 * We must interpose on the sysinfo(2) commands SI_RELEASE and
1327	 * SI_VERSION; all others get passed to the native sysinfo(2)
1328	 * command.
1329	 */
1330	switch (command) {
1331		case SI_RELEASE:
1332			value = S10_UTS_RELEASE;
1333			break;
1334
1335		case SI_VERSION:
1336			value = S10_UTS_VERSION;
1337			break;
1338
1339		default:
1340			/*
1341			 * The default action is to pass the command to the
1342			 * native sysinfo(2) syscall.
1343			 */
1344			return (__systemcall(rv, SYS_systeminfo + 1024,
1345			    command, buf, count));
1346	}
1347
1348	len = strlen(value) + 1;
1349	if (count > 0) {
1350		if (brand_uucopystr(value, buf, count) != 0)
1351			return (EFAULT);
1352
1353		/*
1354		 * Assure NULL termination of buf as brand_uucopystr() doesn't.
1355		 */
1356		if (len > count && brand_uucopy("\0", buf + (count - 1), 1)
1357		    != 0)
1358			return (EFAULT);
1359	}
1360
1361	/*
1362	 * On success, sysinfo(2) returns the size of buffer required to hold
1363	 * the complete value plus its terminating NULL byte.
1364	 */
1365	(void) B_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count);
1366	rv->sys_rval1 = len;
1367	rv->sys_rval2 = 0;
1368	return (0);
1369}
1370
1371#if defined(__x86)
1372#if defined(__amd64)
1373/*
1374 * 64-bit x86 LWPs created by SYS_lwp_create start here if they need to set
1375 * their %fs registers to the legacy Solaris 10 selector value.
1376 *
1377 * This function does three things:
1378 *
1379 *	1.  Trap to the kernel so that it can set %fs to the legacy Solaris 10
1380 *	    selector value.
1381 *	2.  Read the LWP's true entry point (the entry point supplied by libc
1382 *	    when SYS_lwp_create was invoked) from %r14.
1383 *	3.  Eliminate this function's stack frame and pass control to the LWP's
1384 *	    true entry point.
1385 *
1386 * See the comment above s10_lwp_create_correct_fs() (see below) for the reason
1387 * why this function exists.
1388 */
1389/*ARGSUSED*/
1390static void
1391s10_lwp_create_entry_point(void *ulwp_structp)
1392{
1393	sysret_t rval;
1394
1395	/*
1396	 * The new LWP's %fs register is initially zero, but libc won't
1397	 * function correctly when %fs is zero.  Change the LWP's %fs register
1398	 * via SYS_brand.
1399	 */
1400	(void) __systemcall(&rval, SYS_brand + 1024, B_S10_FSREGCORRECTION);
1401
1402	/*
1403	 * Jump to the true entry point, which is stored in %r14.
1404	 * Remove our stack frame before jumping so that
1405	 * s10_lwp_create_entry_point() won't be seen in stack traces.
1406	 *
1407	 * NOTE: s10_lwp_create_entry_point() pushes %r12 onto its stack frame
1408	 * so that it can use it as a temporary register.  We don't restore %r12
1409	 * in this assembly block because we don't care about its value (and
1410	 * neither does _lwp_start()).  Besides, the System V ABI AMD64
1411	 * Actirecture Processor Supplement doesn't specify that %r12 should
1412	 * have a special value when LWPs start, so we can ignore its value when
1413	 * we jump to the true entry point.  Furthermore, %r12 is a callee-saved
1414	 * register, so the true entry point should push %r12 onto its stack
1415	 * before using the register.  We ignore %r14 after we read it for
1416	 * similar reasons.
1417	 *
1418	 * NOTE: The compiler will generate a function epilogue for this
1419	 * function despite the fact that the LWP will never execute it.
1420	 * We could hand-code this entire function in assembly to eliminate
1421	 * the epilogue, but the epilogue is only three or four instructions,
1422	 * so we wouldn't save much space.  Besides, why would we want
1423	 * to create yet another ugly, hard-to-maintain assembly function when
1424	 * we could write most of it in C?
1425	 */
1426	__asm__ __volatile__(
1427	    "movq %0, %%rdi\n\t"	/* pass ulwp_structp as arg1 */
1428	    "movq %%rbp, %%rsp\n\t"	/* eliminate the stack frame */
1429	    "popq %%rbp\n\t"
1430	    "jmp *%%r14\n\t"		/* jump to the true entry point */
1431	    : : "r" (ulwp_structp));
1432	/*NOTREACHED*/
1433}
1434
1435/*
1436 * The S10 libc expects that %fs will be nonzero for new 64-bit x86 LWPs but the
1437 * Nevada kernel clears %fs for such LWPs.  Unforunately, new LWPs do not issue
1438 * SYS_lwp_private (see s10_lwp_private() below) after they are created, so
1439 * we must ensure that new LWPs invoke a brand operation that sets %fs to a
1440 * nonzero value immediately after their creation.
1441 *
1442 * The easiest way to do this is to make new LWPs start at a special function,
1443 * s10_lwp_create_entry_point() (see its definition above), that invokes the
1444 * brand operation that corrects %fs.  We'll store the entry points of new LWPs
1445 * in their %r14 registers so that s10_lwp_create_entry_point() can find and
1446 * call them after invoking the special brand operation.  %r14 is a callee-saved
1447 * register; therefore, any functions invoked by s10_lwp_create_entry_point()
1448 * and all functions dealing with signals (e.g., sigacthandler()) will preserve
1449 * %r14 for s10_lwp_create_entry_point().
1450 *
1451 * The Nevada kernel can safely work with nonzero %fs values because the kernel
1452 * configures per-thread %fs segment descriptors so that the legacy %fs selector
1453 * value will still work.  See the comment in lwp_load() regarding %fs and
1454 * %fsbase in 64-bit x86 processes.
1455 *
1456 * This emulation exists thanks to CRs 6467491 and 6501650.
1457 */
1458static int
1459s10_lwp_create_correct_fs(sysret_t *rval, ucontext_t *ucp, int flags,
1460    id_t *new_lwp)
1461{
1462	ucontext_t s10_uc;
1463
1464	/*
1465	 * Copy the supplied ucontext_t structure to the local stack
1466	 * frame and store the new LWP's entry point (the value of %rip
1467	 * stored in the ucontext_t) in the new LWP's %r14 register.
1468	 * Then make s10_lwp_create_entry_point() the new LWP's entry
1469	 * point.
1470	 */
1471	if (brand_uucopy(ucp, &s10_uc, sizeof (s10_uc)) != 0)
1472		return (EFAULT);
1473
1474	s10_uc.uc_mcontext.gregs[REG_R14] = s10_uc.uc_mcontext.gregs[REG_RIP];
1475	s10_uc.uc_mcontext.gregs[REG_RIP] = (greg_t)s10_lwp_create_entry_point;
1476
1477	/*  fix up the signal mask */
1478	if (s10_uc.uc_flags & UC_SIGMASK)
1479		(void) s10sigset_to_native(&s10_uc.uc_sigmask,
1480		    &s10_uc.uc_sigmask);
1481
1482	/*
1483	 * Issue SYS_lwp_create to create the new LWP.  We pass the
1484	 * modified ucontext_t to make sure that the new LWP starts at
1485	 * s10_lwp_create_entry_point().
1486	 */
1487	return (__systemcall(rval, SYS_lwp_create + 1024, &s10_uc,
1488	    flags, new_lwp));
1489}
1490#endif	/* __amd64 */
1491
1492/*
1493 * SYS_lwp_private is issued by libc_init() to set %fsbase in 64-bit x86
1494 * processes.  The Nevada kernel sets %fs to zero but the S10 libc expects
1495 * %fs to be nonzero.  We'll pass the issued system call to the kernel untouched
1496 * and invoke a brand operation to set %fs to the legacy S10 selector value.
1497 *
1498 * This emulation exists thanks to CRs 6467491 and 6501650.
1499 */
1500static int
1501s10_lwp_private(sysret_t *rval, int cmd, int which, uintptr_t base)
1502{
1503#if defined(__amd64)
1504	int err;
1505
1506	/*
1507	 * The current LWP's %fs register should be zero.  Determine whether the
1508	 * Solaris 10 libc with which we're working functions correctly when %fs
1509	 * is zero by calling thr_main() after issuing the SYS_lwp_private
1510	 * syscall.  If thr_main() barfs (returns -1), then change the LWP's %fs
1511	 * register via SYS_brand and patch brand_sysent_table so that issuing
1512	 * SYS_lwp_create executes s10_lwp_create_correct_fs() rather than the
1513	 * default s10_lwp_create().  s10_lwp_create_correct_fs() will
1514	 * guarantee that new LWPs will have correct %fs values.
1515	 */
1516	if ((err = __systemcall(rval, SYS_lwp_private + 1024, cmd, which,
1517	    base)) != 0)
1518		return (err);
1519	if (thr_main() == -1) {
1520		/*
1521		 * SYS_lwp_private is only issued by libc_init(), which is
1522		 * executed when libc is first loaded by ld.so.1.  Thus we
1523		 * are guaranteed to be single-threaded at this point.  Even
1524		 * if we were multithreaded at this point, writing a 64-bit
1525		 * value to the st_callc field of a brand_sysent_table
1526		 * entry is guaranteed to be atomic on 64-bit x86 chips
1527		 * as long as the field is not split across cache lines
1528		 * (It shouldn't be.).  See chapter 8, section 1.1 of
1529		 * "The Intel 64 and IA32 Architectures Software Developer's
1530		 * Manual," Volume 3A for more details.
1531		 */
1532		brand_sysent_table[SYS_lwp_create].st_callc =
1533		    (sysent_cb_t)s10_lwp_create_correct_fs;
1534		return (__systemcall(rval, SYS_brand + 1024,
1535		    B_S10_FSREGCORRECTION));
1536	}
1537	return (0);
1538#else	/* !__amd64 */
1539	return (__systemcall(rval, SYS_lwp_private + 1024, cmd, which, base));
1540#endif	/* !__amd64 */
1541}
1542#endif	/* __x86 */
1543
1544/*
1545 * The Opensolaris versions of lwp_mutex_timedlock() and lwp_mutex_trylock()
1546 * add an extra argument to the interfaces, a uintptr_t value for the mutex's
1547 * mutex_owner field.  The Solaris 10 libc assigns the mutex_owner field at
1548 * user-level, so we just make the extra argument be zero in both syscalls.
1549 */
1550
1551static int
1552s10_lwp_mutex_timedlock(sysret_t *rval, lwp_mutex_t *lp, timespec_t *tsp)
1553{
1554	return (__systemcall(rval, SYS_lwp_mutex_timedlock + 1024, lp, tsp, 0));
1555}
1556
1557static int
1558s10_lwp_mutex_trylock(sysret_t *rval, lwp_mutex_t *lp)
1559{
1560	return (__systemcall(rval, SYS_lwp_mutex_trylock + 1024, lp, 0));
1561}
1562
1563/*
1564 * If the emul_global_zone flag is set then emulate some aspects of the
1565 * zone system call.  In particular, emulate the global zone ID on the
1566 * ZONE_LOOKUP subcommand and emulate some of the global zone attributes
1567 * on the ZONE_GETATTR subcommand.  If the flag is not set or we're performing
1568 * some other operation, simply pass the calls through.
1569 */
1570int
1571s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3,
1572    void *arg4)
1573{
1574	char		*aval;
1575	int		len;
1576	zoneid_t	zid;
1577	int		attr;
1578	char		*buf;
1579	size_t		bufsize;
1580
1581	/*
1582	 * We only emulate the zone syscall for a subset of specific commands,
1583	 * otherwise we just pass the call through.
1584	 */
1585	if (!emul_global_zone)
1586		return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2,
1587		    arg3, arg4));
1588
1589	switch (cmd) {
1590	case ZONE_LOOKUP:
1591		(void) B_TRUSS_POINT_1(rval, SYS_zone, 0, cmd);
1592		rval->sys_rval1 = GLOBAL_ZONEID;
1593		rval->sys_rval2 = 0;
1594		return (0);
1595
1596	case ZONE_GETATTR:
1597		zid = (zoneid_t)(uintptr_t)arg1;
1598		attr = (int)(uintptr_t)arg2;
1599		buf = (char *)arg3;
1600		bufsize = (size_t)arg4;
1601
1602		/*
1603		 * If the request is for the global zone then we're emulating
1604		 * that, otherwise pass this thru.
1605		 */
1606		if (zid != GLOBAL_ZONEID)
1607			goto passthru;
1608
1609		switch (attr) {
1610		case ZONE_ATTR_NAME:
1611			aval = GLOBAL_ZONENAME;
1612			break;
1613
1614		case ZONE_ATTR_BRAND:
1615			aval = NATIVE_BRAND_NAME;
1616			break;
1617		default:
1618			/*
1619			 * We only emulate a subset of the attrs, use the
1620			 * real zone id to pass thru the rest.
1621			 */
1622			arg1 = (void *)(uintptr_t)zoneid;
1623			goto passthru;
1624		}
1625
1626		(void) B_TRUSS_POINT_5(rval, SYS_zone, 0, cmd, zid, attr,
1627		    buf, bufsize);
1628
1629		len = strlen(aval) + 1;
1630		if (len > bufsize)
1631			return (ENAMETOOLONG);
1632
1633		if (buf != NULL) {
1634			if (len == 1) {
1635				if (brand_uucopy("\0", buf, 1) != 0)
1636					return (EFAULT);
1637			} else {
1638				if (brand_uucopystr(aval, buf, len) != 0)
1639					return (EFAULT);
1640
1641				/*
1642				 * Assure NULL termination of "buf" as
1643				 * brand_uucopystr() does NOT.
1644				 */
1645				if (brand_uucopy("\0", buf + (len - 1), 1) != 0)
1646					return (EFAULT);
1647			}
1648		}
1649
1650		rval->sys_rval1 = len;
1651		rval->sys_rval2 = 0;
1652		return (0);
1653
1654	default:
1655		break;
1656	}
1657
1658passthru:
1659	return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3,
1660	    arg4));
1661}
1662
1663/*ARGSUSED*/
1664int
1665brand_init(int argc, char *argv[], char *envp[])
1666{
1667	sysret_t		rval;
1668	ulong_t			ldentry;
1669	int			err;
1670	char			*bname;
1671
1672	brand_pre_init();
1673
1674	/*
1675	 * Cache the pid of the zone's init process and determine if
1676	 * we're init(1m) for the zone.  Remember: we might be init
1677	 * now, but as soon as we fork(2) we won't be.
1678	 */
1679	(void) get_initpid_info();
1680
1681	/* get the current zoneid */
1682	err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL);
1683	brand_assert(err == 0);
1684	zoneid = (zoneid_t)rval.sys_rval1;
1685
1686	/* Get the zone's emulation bitmap. */
1687	if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid,
1688	    S10_EMUL_BITMAP, emul_bitmap, sizeof (emul_bitmap))) != 0) {
1689		brand_abort(err, "The zone's patch level is unsupported");
1690		/*NOTREACHED*/
1691	}
1692
1693	bname = basename(argv[0]);
1694
1695	/*
1696	 * In general we want the S10 commands that are zone-aware to continue
1697	 * to behave as they normally do within a zone.  Since these commands
1698	 * are zone-aware, they should continue to "do the right thing".
1699	 * However, some zone-aware commands aren't going to work the way
1700	 * we expect them to inside the branded zone.  In particular, the pkg
1701	 * and patch commands will not properly manage all pkgs/patches
1702	 * unless the commands think they are running in the global zone.  For
1703	 * these commands we want to emulate the global zone.
1704	 *
1705	 * We don't do any emulation for pkgcond since it is typically used
1706	 * in pkg/patch postinstall scripts and we want those scripts to do
1707	 * the right thing inside a zone.
1708	 *
1709	 * One issue is the handling of hollow pkgs.  Since the pkgs are
1710	 * hollow, they won't use pkgcond in their postinstall scripts.  These
1711	 * pkgs typically are installing drivers so we handle that by
1712	 * replacing add_drv and rem_drv in the s10_boot script.
1713	 */
1714	if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 ||
1715	    strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0)
1716		emul_global_zone = B_TRUE;
1717
1718	ldentry = brand_post_init(S10_VERSION, argc, argv, envp);
1719
1720	brand_runexe(argv, ldentry);
1721	/*NOTREACHED*/
1722	brand_abort(0, "brand_runexe() returned");
1723	return (-1);
1724}
1725
1726/*
1727 * This table must have at least NSYSCALL entries in it.
1728 *
1729 * The second parameter of each entry in the brand_sysent_table
1730 * contains the number of parameters and flags that describe the
1731 * syscall return value encoding.  See the block comments at the
1732 * top of this file for more information about the syscall return
1733 * value flags and when they should be used.
1734 */
1735brand_sysent_table_t brand_sysent_table[] = {
1736#if defined(__sparc) && !defined(__sparcv9)
1737	EMULATE(brand_indir, 9 | RV_64RVAL),	/*  0 */
1738#else
1739	NOSYS,					/*  0 */
1740#endif
1741	NOSYS,					/*   1 */
1742	EMULATE(s10_forkall, 0 | RV_32RVAL2),	/*   2 */
1743	NOSYS,					/*   3 */
1744	NOSYS,					/*   4 */
1745	EMULATE(s10_open, 3 | RV_DEFAULT),	/*   5 */
1746	NOSYS,					/*   6 */
1747	EMULATE(s10_wait, 0 | RV_32RVAL2),	/*   7 */
1748	EMULATE(s10_creat, 2 | RV_DEFAULT),	/*   8 */
1749	NOSYS,					/*   9 */
1750	EMULATE(s10_unlink, 1 | RV_DEFAULT),	/*  10 */
1751	EMULATE(s10_exec, 2 | RV_DEFAULT),	/*  11 */
1752	NOSYS,					/*  12 */
1753	NOSYS,					/*  13 */
1754	NOSYS,					/*  14 */
1755	NOSYS,					/*  15 */
1756	EMULATE(s10_chown, 3 | RV_DEFAULT),	/*  16 */
1757	NOSYS,					/*  17 */
1758	EMULATE(s10_stat, 2 | RV_DEFAULT),	/*  18 */
1759	NOSYS,					/*  19 */
1760	NOSYS,					/*  20 */
1761	NOSYS,					/*  21 */
1762	EMULATE(s10_umount, 1 | RV_DEFAULT),	/*  22 */
1763	NOSYS,					/*  23 */
1764	NOSYS,					/*  24 */
1765	NOSYS,					/*  25 */
1766	NOSYS,					/*  26 */
1767	NOSYS,					/*  27 */
1768	EMULATE(s10_fstat, 2 | RV_DEFAULT),	/*  28 */
1769	NOSYS,					/*  29 */
1770	EMULATE(s10_utime, 2 | RV_DEFAULT),	/*  30 */
1771	NOSYS,					/*  31 */
1772	NOSYS,					/*  32 */
1773	EMULATE(s10_access, 2 | RV_DEFAULT),	/*  33 */
1774	NOSYS,					/*  34 */
1775	NOSYS,					/*  35 */
1776	NOSYS,					/*  36 */
1777	EMULATE(s10_kill, 2 | RV_DEFAULT),	/*  37 */
1778	NOSYS,					/*  38 */
1779	NOSYS,					/*  39 */
1780	NOSYS,					/*  40 */
1781	EMULATE(s10_dup, 1 | RV_DEFAULT),	/*  41 */
1782	NOSYS,					/*  42 */
1783	NOSYS,					/*  43 */
1784	NOSYS,					/*  44 */
1785	NOSYS,					/*  45 */
1786	NOSYS,					/*  46 */
1787	NOSYS,					/*  47 */
1788	NOSYS,					/*  48 */
1789	NOSYS,					/*  49 */
1790	NOSYS,					/*  50 */
1791	NOSYS,					/*  51 */
1792	NOSYS,					/*  52 */
1793	NOSYS,					/*  53 */
1794	EMULATE(s10_ioctl, 3 | RV_DEFAULT),	/*  54 */
1795	NOSYS,					/*  55 */
1796	NOSYS,					/*  56 */
1797	NOSYS,					/*  57 */
1798	NOSYS,					/*  58 */
1799	EMULATE(s10_execve, 3 | RV_DEFAULT),	/*  59 */
1800	NOSYS,					/*  60 */
1801	NOSYS,					/*  61 */
1802	NOSYS,					/*  62 */
1803	NOSYS,					/*  63 */
1804	NOSYS,					/*  64 */
1805	NOSYS,					/*  65 */
1806	NOSYS,					/*  66 */
1807	NOSYS,					/*  67 */
1808	NOSYS,					/*  68 */
1809	NOSYS,					/*  69 */
1810	NOSYS,					/*  70 */
1811	EMULATE(s10_acctctl, 3 | RV_DEFAULT),	/*  71 */
1812	NOSYS,					/*  72 */
1813	NOSYS,					/*  73 */
1814	NOSYS,					/*  74 */
1815	EMULATE(s10_issetugid, 0 | RV_DEFAULT),	/*  75 */
1816	EMULATE(s10_fsat, 6 | RV_DEFAULT),	/*  76 */
1817	NOSYS,					/*  77 */
1818	NOSYS,					/*  78 */
1819	EMULATE(s10_rmdir, 1 | RV_DEFAULT),	/*  79 */
1820	NOSYS,					/*  80 */
1821	EMULATE(s10_getdents, 3 | RV_DEFAULT),	/*  81 */
1822	NOSYS,					/*  82 */
1823	NOSYS,					/*  83 */
1824	NOSYS,					/*  84 */
1825	NOSYS,					/*  85 */
1826	NOSYS,					/*  86 */
1827	EMULATE(s10_poll, 3 | RV_DEFAULT),	/*  87 */
1828	EMULATE(s10_lstat, 2 | RV_DEFAULT),	/*  88 */
1829	NOSYS,					/*  89 */
1830	NOSYS,					/*  90 */
1831	NOSYS,					/*  91 */
1832	NOSYS,					/*  92 */
1833	NOSYS,					/*  93 */
1834	EMULATE(s10_fchown, 3 | RV_DEFAULT),	/*  94 */
1835	EMULATE(s10_sigprocmask, 3 | RV_DEFAULT), /*  95 */
1836	EMULATE(s10_sigsuspend, 1 | RV_DEFAULT), /*  96 */
1837	NOSYS,					/*  97 */
1838	EMULATE(s10_sigaction, 3 | RV_DEFAULT),	/*  98 */
1839	EMULATE(s10_sigpending, 2 | RV_DEFAULT), /*  99 */
1840	NOSYS,					/* 100 */
1841	NOSYS,					/* 101 */
1842	NOSYS,					/* 102 */
1843	NOSYS,					/* 103 */
1844	NOSYS,					/* 104 */
1845	NOSYS,					/* 105 */
1846	NOSYS,					/* 106 */
1847	EMULATE(s10_waitid, 4 | RV_DEFAULT),	/* 107 */
1848	EMULATE(s10_sigsendsys, 2 | RV_DEFAULT), /* 108 */
1849	NOSYS,					/* 109 */
1850	NOSYS,					/* 110 */
1851	NOSYS,					/* 111 */
1852	NOSYS,					/* 112 */
1853	NOSYS,					/* 113 */
1854	NOSYS,					/* 114 */
1855	NOSYS,					/* 115 */
1856	NOSYS,					/* 116 */
1857	NOSYS,					/* 117 */
1858	NOSYS,					/* 118 */
1859	NOSYS,					/* 119 */
1860	NOSYS,					/* 120 */
1861	NOSYS,					/* 121 */
1862	NOSYS,					/* 122 */
1863#if defined(__x86)
1864	EMULATE(s10_xstat, 3 | RV_DEFAULT),	/* 123 */
1865	EMULATE(s10_lxstat, 3 | RV_DEFAULT),	/* 124 */
1866	EMULATE(s10_fxstat, 3 | RV_DEFAULT),	/* 125 */
1867	EMULATE(s10_xmknod, 4 | RV_DEFAULT),	/* 126 */
1868#else
1869	NOSYS,					/* 123 */
1870	NOSYS,					/* 124 */
1871	NOSYS,					/* 125 */
1872	NOSYS,					/* 126 */
1873#endif
1874	NOSYS,					/* 127 */
1875	NOSYS,					/* 128 */
1876	NOSYS,					/* 129 */
1877	EMULATE(s10_lchown, 3 | RV_DEFAULT),	/* 130 */
1878	NOSYS,					/* 131 */
1879	NOSYS,					/* 132 */
1880	NOSYS,					/* 133 */
1881	EMULATE(s10_rename, 2 | RV_DEFAULT),	/* 134 */
1882	EMULATE(s10_uname, 1 | RV_DEFAULT),	/* 135 */
1883	NOSYS,					/* 136 */
1884	EMULATE(s10_sysconfig, 1 | RV_DEFAULT),	/* 137 */
1885	NOSYS,					/* 138 */
1886	EMULATE(s10_sysinfo, 3 | RV_DEFAULT),	/* 139 */
1887	NOSYS,					/* 140 */
1888	NOSYS,					/* 141 */
1889	NOSYS,					/* 142 */
1890	EMULATE(s10_fork1, 0 | RV_32RVAL2),	/* 143 */
1891	EMULATE(s10_sigtimedwait, 3 | RV_DEFAULT), /* 144 */
1892	NOSYS,					/* 145 */
1893	NOSYS,					/* 146 */
1894	EMULATE(s10_lwp_sema_wait, 1 | RV_DEFAULT), /* 147 */
1895	NOSYS,					/* 148 */
1896	NOSYS,					/* 149 */
1897	NOSYS,					/* 150 */
1898	NOSYS,					/* 151 */
1899	NOSYS,					/* 152 */
1900	NOSYS,					/* 153 */
1901	EMULATE(s10_utimes, 2 | RV_DEFAULT),	/* 154 */
1902	NOSYS,					/* 155 */
1903	NOSYS,					/* 156 */
1904	NOSYS,					/* 157 */
1905	NOSYS,					/* 158 */
1906	EMULATE(s10_lwp_create, 3 | RV_DEFAULT), /* 159 */
1907	NOSYS,					/* 160 */
1908	NOSYS,					/* 161 */
1909	NOSYS,					/* 162 */
1910	EMULATE(s10_lwp_kill, 2 | RV_DEFAULT),	/* 163 */
1911	NOSYS,					/* 164 */
1912	EMULATE(s10_lwp_sigmask, 3 | RV_32RVAL2), /* 165 */
1913#if defined(__x86)
1914	EMULATE(s10_lwp_private, 3 | RV_DEFAULT), /* 166 */
1915#else
1916	NOSYS,					/* 166 */
1917#endif
1918	NOSYS,					/* 167 */
1919	NOSYS,					/* 168 */
1920	EMULATE(s10_lwp_mutex_lock, 1 | RV_DEFAULT), /* 169 */
1921	NOSYS,					/* 170 */
1922	NOSYS,					/* 171 */
1923	NOSYS,					/* 172 */
1924	NOSYS,					/* 173 */
1925	EMULATE(s10_pwrite, 4 | RV_DEFAULT),	/* 174 */
1926	NOSYS,					/* 175 */
1927	NOSYS,					/* 176 */
1928	NOSYS,					/* 177 */
1929	NOSYS,					/* 178 */
1930	NOSYS,					/* 179 */
1931	NOSYS,					/* 180 */
1932	NOSYS,					/* 181 */
1933	NOSYS,					/* 182 */
1934	NOSYS,					/* 183 */
1935	NOSYS,					/* 184 */
1936	NOSYS,					/* 185 */
1937	EMULATE(s10_auditsys, 4 | RV_64RVAL),	/* 186 */
1938	NOSYS,					/* 187 */
1939	NOSYS,					/* 188 */
1940	NOSYS,					/* 189 */
1941	EMULATE(s10_sigqueue, 4 | RV_DEFAULT),	/* 190 */
1942	NOSYS,					/* 191 */
1943	NOSYS,					/* 192 */
1944	NOSYS,					/* 193 */
1945	NOSYS,					/* 194 */
1946	NOSYS,					/* 195 */
1947	NOSYS,					/* 196 */
1948	NOSYS,					/* 197 */
1949	NOSYS,					/* 198 */
1950	NOSYS,					/* 199 */
1951	NOSYS,					/* 200 */
1952	NOSYS,					/* 201 */
1953	NOSYS,					/* 202 */
1954	NOSYS,					/* 203 */
1955	NOSYS,					/* 204 */
1956	EMULATE(s10_signotify, 3 | RV_DEFAULT),	/* 205 */
1957	NOSYS,					/* 206 */
1958	NOSYS,					/* 207 */
1959	NOSYS,					/* 208 */
1960	NOSYS,					/* 209 */
1961	EMULATE(s10_lwp_mutex_timedlock, 2 | RV_DEFAULT), /* 210 */
1962	NOSYS,					/* 211 */
1963	NOSYS,					/* 212 */
1964#if defined(_LP64)
1965	NOSYS,					/* 213 */
1966#else
1967	EMULATE(s10_getdents64, 3 | RV_DEFAULT), /* 213 */
1968#endif
1969	NOSYS,					/* 214 */
1970#if defined(_LP64)
1971	NOSYS,					/* 215 */
1972	NOSYS,					/* 216 */
1973	NOSYS,					/* 217 */
1974#else
1975	EMULATE(s10_stat64, 2 | RV_DEFAULT),	/* 215 */
1976	EMULATE(s10_lstat64, 2 | RV_DEFAULT),	/* 216 */
1977	EMULATE(s10_fstat64, 2 | RV_DEFAULT),	/* 217 */
1978#endif
1979	NOSYS,					/* 218 */
1980	NOSYS,					/* 219 */
1981	NOSYS,					/* 220 */
1982	NOSYS,					/* 221 */
1983	NOSYS,					/* 222 */
1984#if defined(_LP64)
1985	NOSYS,					/* 223 */
1986	NOSYS,					/* 224 */
1987	NOSYS,					/* 225 */
1988#else
1989	EMULATE(s10_pwrite64, 5 | RV_DEFAULT),	/* 223 */
1990	EMULATE(s10_creat64, 2 | RV_DEFAULT),	/* 224 */
1991	EMULATE(s10_open64, 3 | RV_DEFAULT),	/* 225 */
1992#endif
1993	NOSYS,					/* 226 */
1994	EMULATE(s10_zone, 5 | RV_DEFAULT),	/* 227 */
1995	NOSYS,					/* 228 */
1996	NOSYS,					/* 229 */
1997	NOSYS,					/* 230 */
1998	NOSYS,					/* 231 */
1999	NOSYS,					/* 232 */
2000	NOSYS,					/* 233 */
2001	NOSYS,					/* 234 */
2002	NOSYS,					/* 235 */
2003	NOSYS,					/* 236 */
2004	NOSYS,					/* 237 */
2005	NOSYS,					/* 238 */
2006	NOSYS,					/* 239 */
2007	NOSYS,					/* 240 */
2008	NOSYS,					/* 241 */
2009	NOSYS,					/* 242 */
2010	NOSYS,					/* 243 */
2011	NOSYS,					/* 244 */
2012	NOSYS,					/* 245 */
2013	NOSYS,					/* 246 */
2014	NOSYS,					/* 247 */
2015	NOSYS,					/* 248 */
2016	NOSYS,					/* 249 */
2017	NOSYS,					/* 250 */
2018	EMULATE(s10_lwp_mutex_trylock, 1 | RV_DEFAULT), /* 251 */
2019	NOSYS,					/* 252 */
2020	NOSYS,					/* 253 */
2021	NOSYS,					/* 254 */
2022	NOSYS					/* 255 */
2023};
2024