1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 *	pthread_synch.c
31 */
32
33#define  _PTHREAD_CONDATTR_T
34#define  _PTHREAD_COND_T
35#define _PTHREAD_MUTEXATTR_T
36#define _PTHREAD_MUTEX_T
37#define _PTHREAD_RWLOCKATTR_T
38#define _PTHREAD_RWLOCK_T
39
40#undef pthread_mutexattr_t
41#undef pthread_mutex_t
42#undef pthread_condattr_t
43#undef pthread_cond_t
44#undef pthread_rwlockattr_t
45#undef pthread_rwlock_t
46
47#include <sys/param.h>
48#include <sys/queue.h>
49#include <sys/resourcevar.h>
50#include <sys/proc_internal.h>
51#include <sys/kauth.h>
52#include <sys/systm.h>
53#include <sys/timeb.h>
54#include <sys/times.h>
55#include <sys/acct.h>
56#include <sys/kernel.h>
57#include <sys/wait.h>
58#include <sys/signalvar.h>
59#include <sys/syslog.h>
60#include <sys/stat.h>
61#include <sys/lock.h>
62#include <sys/kdebug.h>
63#include <sys/sysproto.h>
64#include <sys/pthread_internal.h>
65#include <sys/vm.h>
66#include <sys/user.h>		/* for coredump */
67
68
69#include <mach/mach_types.h>
70#include <mach/vm_prot.h>
71#include <mach/semaphore.h>
72#include <mach/sync_policy.h>
73#include <mach/task.h>
74#include <kern/kern_types.h>
75#include <kern/task.h>
76#include <kern/clock.h>
77#include <mach/kern_return.h>
78#include <kern/thread.h>
79#include <kern/sched_prim.h>
80#include <kern/kalloc.h>
81#include <kern/sched_prim.h>	/* for thread_exception_return */
82#include <kern/processor.h>
83#include <kern/affinity.h>
84#include <mach/mach_vm.h>
85#include <mach/mach_param.h>
86#include <mach/thread_status.h>
87#include <mach/thread_policy.h>
88#include <mach/message.h>
89#include <mach/port.h>
90#include <vm/vm_protos.h>
91#include <vm/vm_map.h>`	/* for current_map() */
92#include <mach/thread_act.h> /* for thread_resume */
93#include <machine/machine_routines.h>
94#if defined(__i386__)
95#include <i386/machine_routines.h>
96#include <i386/eflags.h>
97#include <i386/psl.h>
98#include <i386/seg.h>
99#endif
100
101#include <libkern/OSAtomic.h>
102
103#if 0
104#undef KERNEL_DEBUG
105#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
106#undef KERNEL_DEBUG1
107#define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
108#endif
109
110
111#if defined(__ppc__) || defined(__ppc64__)
112#include <architecture/ppc/cframe.h>
113#endif
114
115
116lck_grp_attr_t   *pthread_lck_grp_attr;
117lck_grp_t    *pthread_lck_grp;
118lck_attr_t   *pthread_lck_attr;
119lck_mtx_t * pthread_list_mlock;
120extern void pthread_init(void);
121
122extern kern_return_t thread_getstatus(register thread_t act, int flavor,
123			thread_state_t tstate, mach_msg_type_number_t *count);
124extern kern_return_t thread_setstatus(thread_t thread, int flavor,
125			thread_state_t tstate, mach_msg_type_number_t count);
126extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
127extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
128extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
129
130static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item);
131static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item);
132static void workqueue_run_nextitem(proc_t p, thread_t th);
133static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
134		       int reuse_thread, int wake_thread, int return_directly);
135static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
136static int  workqueue_addnewthread(struct workqueue *wq);
137static void workqueue_removethread(struct workqueue *wq);
138static void workqueue_lock(proc_t);
139static void workqueue_lock_spin(proc_t);
140static void workqueue_unlock(proc_t);
141
142#define C_32_STK_ALIGN          16
143#define C_64_STK_ALIGN          16
144#define C_64_REDZONE_LEN        128
145#define TRUNC_DOWN32(a,c)       ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
146#define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
147
148
149/*
150 * Flags filed passed to bsdthread_create and back in pthread_start
15131  <---------------------------------> 0
152_________________________________________
153| flags(8) | policy(8) | importance(16) |
154-----------------------------------------
155*/
156void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
157
158#define PTHREAD_START_CUSTOM	0x01000000
159#define PTHREAD_START_SETSCHED	0x02000000
160#define PTHREAD_START_DETACHED	0x04000000
161#define PTHREAD_START_POLICY_BITSHIFT 16
162#define PTHREAD_START_POLICY_MASK 0xff
163#define PTHREAD_START_IMPORTANCE_MASK 0xffff
164
165#define SCHED_OTHER      POLICY_TIMESHARE
166#define SCHED_FIFO       POLICY_FIFO
167#define SCHED_RR         POLICY_RR
168
169void
170pthread_init(void)
171{
172
173	pthread_lck_grp_attr = lck_grp_attr_alloc_init();
174	pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
175
176	/*
177	 * allocate the lock attribute for pthread synchronizers
178	 */
179	pthread_lck_attr = lck_attr_alloc_init();
180
181	pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
182
183}
184
185void
186pthread_list_lock(void)
187{
188	lck_mtx_lock(pthread_list_mlock);
189}
190
191void
192pthread_list_unlock(void)
193{
194	lck_mtx_unlock(pthread_list_mlock);
195}
196
197
198int
199__pthread_mutex_destroy(__unused struct proc *p, struct __pthread_mutex_destroy_args *uap, __unused register_t *retval)
200{
201	int res;
202	int mutexid = uap->mutexid;
203	pthread_mutex_t * mutex;
204	lck_mtx_t * lmtx;
205	lck_mtx_t * lmtx1;
206
207
208	mutex = pthread_id_to_mutex(mutexid);
209	if (mutex == 0)
210		return(EINVAL);
211
212	MTX_LOCK(mutex->lock);
213	if (mutex->sig == _PTHREAD_KERN_MUTEX_SIG)
214	{
215		if (mutex->owner == (thread_t)NULL &&
216		    mutex->refcount == 1)
217		{
218			mutex->sig = _PTHREAD_NO_SIG;
219			lmtx = mutex->mutex;
220			lmtx1 = mutex->lock;
221			mutex->mutex = NULL;
222			pthread_id_mutex_remove(mutexid);
223			mutex->refcount --;
224			MTX_UNLOCK(mutex->lock);
225			lck_mtx_free(lmtx, pthread_lck_grp);
226			lck_mtx_free(lmtx1, pthread_lck_grp);
227			kfree((void *)mutex, sizeof(struct _pthread_mutex));
228			return(0);
229		}
230		else
231			res = EBUSY;
232	}
233	else
234		res = EINVAL;
235	MTX_UNLOCK(mutex->lock);
236	pthread_mutex_release(mutex);
237	return (res);
238}
239
240/*
241 * Initialize a mutex variable, possibly with additional attributes.
242 */
243static void
244pthread_mutex_init_internal(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
245{
246	mutex->prioceiling = attr->prioceiling;
247	mutex->protocol = attr->protocol;
248	mutex->type = attr->type;
249	mutex->pshared = attr->pshared;
250	mutex->refcount = 0;
251	mutex->owner = (thread_t)NULL;
252	mutex->owner_proc = current_proc();
253	mutex->sig = _PTHREAD_KERN_MUTEX_SIG;
254	mutex->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
255	mutex->mutex = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
256}
257
258/*
259 * Initialize a mutex variable, possibly with additional attributes.
260 * Public interface - so don't trust the lock - initialize it first.
261 */
262int
263__pthread_mutex_init(__unused struct proc *p, struct __pthread_mutex_init_args *uap, __unused register_t *retval)
264{
265	user_addr_t umutex = uap->mutex;
266	pthread_mutex_t * mutex;
267	user_addr_t uattr = uap->attr;
268	pthread_mutexattr_t attr;
269	unsigned int addr = (unsigned int)((uintptr_t)uap->mutex);
270	int pmutex_sig;
271	int mutexid;
272	int error = 0;
273
274	if ((umutex == 0) || (uattr == 0))
275		return(EINVAL);
276
277	if ((error = copyin(uattr, &attr, sizeof(pthread_mutexattr_t))))
278		return(error);
279
280	if (attr.sig != _PTHREAD_MUTEX_ATTR_SIG)
281			return (EINVAL);
282
283	if ((error = copyin(umutex, &pmutex_sig, sizeof(int))))
284		return(error);
285
286	if (pmutex_sig == _PTHREAD_KERN_MUTEX_SIG)
287		return(EBUSY);
288	mutex = (pthread_mutex_t *)kalloc(sizeof(pthread_mutex_t));
289
290	 pthread_mutex_init_internal(mutex, &attr);
291
292
293	addr += 8;
294	mutexid = pthread_id_mutex_add(mutex);
295	if (mutexid) {
296		if ((error = copyout(&mutexid, ((user_addr_t)((uintptr_t)(addr))), 4)))
297			goto cleanup;
298		return(0);
299	}  else
300		error = ENOMEM;
301cleanup:
302	if(mutexid)
303		pthread_id_mutex_remove(mutexid);
304	lck_mtx_free(mutex->lock, pthread_lck_grp);
305	lck_mtx_free(mutex->mutex, pthread_lck_grp);
306	kfree(mutex, sizeof(struct _pthread_mutex));
307	return(error);
308}
309
310/*
311 * Lock a mutex.
312 * TODO: Priority inheritance stuff
313 */
314int
315__pthread_mutex_lock(struct proc *p, struct __pthread_mutex_lock_args *uap, __unused register_t *retval)
316{
317	int mutexid = uap->mutexid;
318	pthread_mutex_t  * mutex;
319	int error;
320
321	mutex = pthread_id_to_mutex(mutexid);
322	if (mutex == 0)
323		return(EINVAL);
324
325	MTX_LOCK(mutex->lock);
326
327	if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
328	{
329		error = EINVAL;
330		goto out;
331	}
332
333	if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
334		error = EINVAL;
335		goto out;
336	}
337
338	MTX_UNLOCK(mutex->lock);
339
340	lck_mtx_lock(mutex->mutex);
341
342	MTX_LOCK(mutex->lock);
343	mutex->owner = current_thread();
344	error = 0;
345out:
346	MTX_UNLOCK(mutex->lock);
347	pthread_mutex_release(mutex);
348	return (error);
349}
350
351/*
352 * Attempt to lock a mutex, but don't block if this isn't possible.
353 */
354int
355__pthread_mutex_trylock(struct proc *p, struct __pthread_mutex_trylock_args *uap, __unused register_t *retval)
356{
357	int mutexid = uap->mutexid;
358	pthread_mutex_t  * mutex;
359	boolean_t state;
360	int error;
361
362	mutex = pthread_id_to_mutex(mutexid);
363	if (mutex == 0)
364		return(EINVAL);
365
366	MTX_LOCK(mutex->lock);
367
368	if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
369	{
370		error = EINVAL;
371		goto out;
372	}
373
374	if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
375		error = EINVAL;
376		goto out;
377	}
378
379	MTX_UNLOCK(mutex->lock);
380
381	state = lck_mtx_try_lock(mutex->mutex);
382	if (state) {
383		MTX_LOCK(mutex->lock);
384		mutex->owner = current_thread();
385		MTX_UNLOCK(mutex->lock);
386		error = 0;
387	} else
388		error = EBUSY;
389
390	pthread_mutex_release(mutex);
391	return (error);
392out:
393	MTX_UNLOCK(mutex->lock);
394	pthread_mutex_release(mutex);
395	return (error);
396}
397
398/*
399 * Unlock a mutex.
400 * TODO: Priority inheritance stuff
401 */
402int
403__pthread_mutex_unlock(struct proc *p, struct __pthread_mutex_unlock_args *uap, __unused register_t *retval)
404{
405	int mutexid = uap->mutexid;
406	pthread_mutex_t  * mutex;
407	int error;
408
409	mutex = pthread_id_to_mutex(mutexid);
410	if (mutex == 0)
411		return(EINVAL);
412
413	MTX_LOCK(mutex->lock);
414
415	if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG)
416	{
417		error = EINVAL;
418		goto out;
419	}
420
421	if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) {
422		error = EINVAL;
423		goto out;
424	}
425
426	MTX_UNLOCK(mutex->lock);
427
428	lck_mtx_unlock(mutex->mutex);
429
430	MTX_LOCK(mutex->lock);
431	mutex->owner = NULL;
432	error = 0;
433out:
434	MTX_UNLOCK(mutex->lock);
435	pthread_mutex_release(mutex);
436	return (error);
437}
438
439
440int
441__pthread_cond_init(__unused struct proc *p, struct __pthread_cond_init_args *uap, __unused register_t *retval)
442{
443	pthread_cond_t * cond;
444	pthread_condattr_t attr;
445	user_addr_t ucond = uap->cond;
446	user_addr_t uattr = uap->attr;
447	unsigned int addr = (unsigned int)((uintptr_t)uap->cond);
448	int condid, error, cond_sig;
449	semaphore_t sem;
450	kern_return_t kret;
451	int value = 0;
452
453	if ((ucond == 0) || (uattr == 0))
454		return(EINVAL);
455
456	if ((error = copyin(uattr, &attr, sizeof(pthread_condattr_t))))
457		return(error);
458
459	if (attr.sig != _PTHREAD_COND_ATTR_SIG)
460			return (EINVAL);
461
462	if ((error = copyin(ucond, &cond_sig, sizeof(int))))
463		return(error);
464
465	if (cond_sig == _PTHREAD_KERN_COND_SIG)
466		return(EBUSY);
467	kret = semaphore_create(kernel_task, &sem, SYNC_POLICY_FIFO, value);
468	if (kret != KERN_SUCCESS)
469		return(ENOMEM);
470
471	cond = (pthread_cond_t *)kalloc(sizeof(pthread_cond_t));
472
473	cond->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
474	cond->pshared = attr.pshared;
475	cond->sig = _PTHREAD_KERN_COND_SIG;
476	cond->sigpending = 0;
477	cond->waiters = 0;
478	cond->refcount = 0;
479	cond->mutex = (pthread_mutex_t *)0;
480	cond->owner_proc = current_proc();
481	cond->sem = sem;
482
483	addr += 8;
484	condid = pthread_id_cond_add(cond);
485	if (condid) {
486		if ((error = copyout(&condid, ((user_addr_t)((uintptr_t)(addr))), 4)))
487			goto cleanup;
488		return(0);
489	}  else
490		error = ENOMEM;
491cleanup:
492	if(condid)
493		pthread_id_cond_remove(condid);
494	semaphore_destroy(kernel_task, cond->sem);
495	kfree(cond, sizeof(pthread_cond_t));
496	return(error);
497}
498
499
500/*
501 * Destroy a condition variable.
502 */
503int
504__pthread_cond_destroy(__unused struct proc *p, struct __pthread_cond_destroy_args  *uap, __unused register_t *retval)
505{
506	pthread_cond_t *cond;
507	int condid = uap->condid;
508	semaphore_t sem;
509	lck_mtx_t * lmtx;
510	int res;
511
512	cond = pthread_id_to_cond(condid);
513	if (cond == 0)
514		return(EINVAL);
515
516	COND_LOCK(cond->lock);
517	if (cond->sig == _PTHREAD_KERN_COND_SIG)
518	{
519		if (cond->refcount == 1)
520		{
521			cond->sig = _PTHREAD_NO_SIG;
522			sem = cond->sem;
523			cond->sem = NULL;
524			lmtx = cond->lock;
525			pthread_id_cond_remove(condid);
526			cond->refcount --;
527			COND_UNLOCK(cond->lock);
528			lck_mtx_free(lmtx, pthread_lck_grp);
529			(void)semaphore_destroy(kernel_task, sem);
530			kfree((void *)cond, sizeof(pthread_cond_t));
531			return(0);
532		}
533		else
534			res = EBUSY;
535	}
536	else
537		res = EINVAL;
538	COND_UNLOCK(cond->lock);
539	pthread_cond_release(cond);
540	return (res);
541}
542
543
544/*
545 * Signal a condition variable, waking up all threads waiting for it.
546 */
547int
548__pthread_cond_broadcast(__unused struct proc *p, struct __pthread_cond_broadcast_args  *uap, __unused register_t *retval)
549{
550	int condid = uap->condid;
551	pthread_cond_t  * cond;
552	int error;
553	kern_return_t kret;
554
555	cond = pthread_id_to_cond(condid);
556	if (cond == 0)
557		return(EINVAL);
558
559	COND_LOCK(cond->lock);
560
561	if (cond->sig != _PTHREAD_KERN_COND_SIG)
562	{
563		error = EINVAL;
564		goto out;
565	}
566
567	if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
568		error = EINVAL;
569		goto out;
570	}
571
572	COND_UNLOCK(cond->lock);
573
574	kret = semaphore_signal_all(cond->sem);
575    switch (kret) {
576    case KERN_INVALID_ADDRESS:
577    case KERN_PROTECTION_FAILURE:
578        error = EINVAL;
579        break;
580    case KERN_ABORTED:
581    case KERN_OPERATION_TIMED_OUT:
582        error = EINTR;
583        break;
584    case KERN_SUCCESS:
585        error = 0;
586        break;
587    default:
588        error = EINVAL;
589        break;
590    }
591
592	COND_LOCK(cond->lock);
593out:
594	COND_UNLOCK(cond->lock);
595	pthread_cond_release(cond);
596	return (error);
597}
598
599
600/*
601 * Signal a condition variable, waking only one thread.
602 */
603int
604__pthread_cond_signal(__unused struct proc *p, struct __pthread_cond_signal_args  *uap, __unused register_t *retval)
605{
606	int condid = uap->condid;
607	pthread_cond_t  * cond;
608	int error;
609	kern_return_t kret;
610
611	cond = pthread_id_to_cond(condid);
612	if (cond == 0)
613		return(EINVAL);
614
615	COND_LOCK(cond->lock);
616
617	if (cond->sig != _PTHREAD_KERN_COND_SIG)
618	{
619		error = EINVAL;
620		goto out;
621	}
622
623	if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
624		error = EINVAL;
625		goto out;
626	}
627
628	COND_UNLOCK(cond->lock);
629
630	kret = semaphore_signal(cond->sem);
631    switch (kret) {
632    case KERN_INVALID_ADDRESS:
633    case KERN_PROTECTION_FAILURE:
634        error = EINVAL;
635        break;
636    case KERN_ABORTED:
637    case KERN_OPERATION_TIMED_OUT:
638        error = EINTR;
639        break;
640    case KERN_SUCCESS:
641        error = 0;
642        break;
643    default:
644        error = EINVAL;
645        break;
646    }
647
648	COND_LOCK(cond->lock);
649out:
650	COND_UNLOCK(cond->lock);
651	pthread_cond_release(cond);
652	return (error);
653}
654
655
656int
657__pthread_cond_wait(__unused struct proc *p, struct __pthread_cond_wait_args  *uap, __unused register_t *retval)
658{
659	int condid = uap->condid;
660	pthread_cond_t  * cond;
661	int mutexid = uap->mutexid;
662	pthread_mutex_t  * mutex;
663	int error;
664	kern_return_t kret;
665
666	cond = pthread_id_to_cond(condid);
667	if (cond == 0)
668		return(EINVAL);
669
670	mutex = pthread_id_to_mutex(mutexid);
671	if (mutex == 0) {
672		pthread_cond_release(cond);
673		return(EINVAL);
674	}
675	COND_LOCK(cond->lock);
676
677	if (cond->sig != _PTHREAD_KERN_COND_SIG)
678	{
679		error = EINVAL;
680		goto out;
681	}
682
683	if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
684		error = EINVAL;
685		goto out;
686	}
687
688	COND_UNLOCK(cond->lock);
689
690	kret = semaphore_wait(cond->sem);
691    switch (kret) {
692    case KERN_INVALID_ADDRESS:
693    case KERN_PROTECTION_FAILURE:
694        error = EACCES;
695        break;
696    case KERN_ABORTED:
697    case KERN_OPERATION_TIMED_OUT:
698        error = EINTR;
699        break;
700    case KERN_SUCCESS:
701        error = 0;
702        break;
703    default:
704        error = EINVAL;
705        break;
706    }
707
708	COND_LOCK(cond->lock);
709out:
710	COND_UNLOCK(cond->lock);
711	pthread_cond_release(cond);
712	pthread_mutex_release(mutex);
713	return (error);
714}
715
716int
717__pthread_cond_timedwait(__unused struct proc *p, struct __pthread_cond_timedwait_args  *uap, __unused register_t *retval)
718{
719	int condid = uap->condid;
720	pthread_cond_t  * cond;
721	int mutexid = uap->mutexid;
722	pthread_mutex_t  * mutex;
723	mach_timespec_t absts;
724	int error;
725	kern_return_t kret;
726
727	absts.tv_sec = 0;
728	absts.tv_nsec = 0;
729
730	if (uap->abstime)
731		if ((error = copyin(uap->abstime, &absts, sizeof(mach_timespec_t ))))
732			return(error);
733	cond = pthread_id_to_cond(condid);
734	if (cond == 0)
735		return(EINVAL);
736
737	mutex = pthread_id_to_mutex(mutexid);
738	if (mutex == 0) {
739		pthread_cond_release(cond);
740		return(EINVAL);
741	}
742	COND_LOCK(cond->lock);
743
744	if (cond->sig != _PTHREAD_KERN_COND_SIG)
745	{
746		error = EINVAL;
747		goto out;
748	}
749
750	if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) {
751		error = EINVAL;
752		goto out;
753	}
754
755	COND_UNLOCK(cond->lock);
756
757	kret = semaphore_timedwait(cond->sem, absts);
758    switch (kret) {
759    case KERN_INVALID_ADDRESS:
760    case KERN_PROTECTION_FAILURE:
761        error = EACCES;
762        break;
763    case KERN_ABORTED:
764    case KERN_OPERATION_TIMED_OUT:
765        error = EINTR;
766        break;
767    case KERN_SUCCESS:
768        error = 0;
769        break;
770    default:
771        error = EINVAL;
772        break;
773    }
774
775	COND_LOCK(cond->lock);
776out:
777	COND_UNLOCK(cond->lock);
778	pthread_cond_release(cond);
779	pthread_mutex_release(mutex);
780	return (error);
781}
782
783int
784bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, user_addr_t *retval)
785{
786	kern_return_t kret;
787	void * sright;
788	int error = 0;
789	int allocated = 0;
790	mach_vm_offset_t stackaddr;
791        mach_vm_size_t th_allocsize = 0;
792        mach_vm_size_t user_stacksize;
793        mach_vm_size_t th_stacksize;
794        mach_vm_offset_t th_stackaddr;
795        mach_vm_offset_t th_stack;
796        mach_vm_offset_t th_pthread;
797        mach_port_t th_thport;
798	thread_t th;
799	user_addr_t user_func = uap->func;
800	user_addr_t user_funcarg = uap->func_arg;
801	user_addr_t user_stack = uap->stack;
802	user_addr_t user_pthread = uap->pthread;
803	unsigned int  flags = (unsigned int)uap->flags;
804	vm_map_t vmap = current_map();
805	task_t ctask = current_task();
806	unsigned int policy, importance;
807
808	int isLP64 = 0;
809
810
811#if 0
812	KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0);
813#endif
814
815	isLP64 = IS_64BIT_PROCESS(p);
816
817
818#if defined(__ppc__)
819	stackaddr = 0xF0000000;
820#elif defined(__i386__)
821	stackaddr = 0xB0000000;
822#else
823#error Need to define a stack address hint for this architecture
824#endif
825	kret = thread_create(ctask, &th);
826	if (kret != KERN_SUCCESS)
827		return(ENOMEM);
828	thread_reference(th);
829
830	sright = (void *) convert_thread_to_port(th);
831	th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(ctask));
832
833	if ((flags & PTHREAD_START_CUSTOM) == 0) {
834		th_stacksize = (mach_vm_size_t)user_stack;		/* if it is custom them it is stacksize */
835		th_allocsize = th_stacksize + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
836
837		kret = mach_vm_map(vmap, &stackaddr,
838    				th_allocsize,
839    				page_size-1,
840    				VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
841    				0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
842    				VM_INHERIT_DEFAULT);
843    		if (kret != KERN_SUCCESS)
844    			kret = mach_vm_allocate(vmap,
845    					&stackaddr, th_allocsize,
846    					VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
847    		if (kret != KERN_SUCCESS) {
848			error = ENOMEM;
849			goto out;
850    		}
851#if 0
852		KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
853#endif
854		th_stackaddr = stackaddr;
855		allocated = 1;
856     		/*
857		 * The guard page is at the lowest address
858     		 * The stack base is the highest address
859		 */
860		kret = mach_vm_protect(vmap,  stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
861
862    		if (kret != KERN_SUCCESS) {
863			error = ENOMEM;
864			goto out1;
865    		}
866		th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
867		th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
868		user_stacksize = th_stacksize;
869	} else {
870		th_stack = user_stack;
871		user_stacksize = user_stack;
872		th_pthread = user_pthread;
873#if 0
874		KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, 0, 0, 0, 3, 0);
875#endif
876	}
877
878#if defined(__ppc__)
879	/*
880	 * Set up PowerPC registers...
881	 * internally they are always kept as 64 bit and
882	 * since the register set is the same between 32 and 64bit modes
883	 * we don't need 2 different methods for setting the state
884	 */
885	{
886	        ppc_thread_state64_t state64;
887		ppc_thread_state64_t *ts64 = &state64;
888
889		ts64->srr0 = (uint64_t)p->p_threadstart;
890		ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE);
891		ts64->r3 = (uint64_t)th_pthread;
892		ts64->r4 = (uint64_t)((unsigned int)th_thport);
893		ts64->r5 = (uint64_t)user_func;
894		ts64->r6 = (uint64_t)user_funcarg;
895		ts64->r7 = (uint64_t)user_stacksize;
896		ts64->r8 = (uint64_t)uap->flags;
897
898		thread_set_wq_state64(th, (thread_state_t)ts64);
899
900		thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64);
901	}
902#elif defined(__i386__)
903	{
904        /*
905         * Set up i386 registers & function call.
906         */
907	if (isLP64 == 0) {
908		x86_thread_state32_t state;
909		x86_thread_state32_t *ts = &state;
910
911        	ts->eip = (int)p->p_threadstart;
912		ts->eax = (unsigned int)th_pthread;
913		ts->ebx = (unsigned int)th_thport;
914		ts->ecx = (unsigned int)user_func;
915		ts->edx = (unsigned int)user_funcarg;
916		ts->edi = (unsigned int)user_stacksize;
917		ts->esi = (unsigned int)uap->flags;
918		/*
919		 * set stack pointer
920		 */
921        	ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
922
923		thread_set_wq_state32(th, (thread_state_t)ts);
924
925	} else {
926	        x86_thread_state64_t state64;
927		x86_thread_state64_t *ts64 = &state64;
928
929        	ts64->rip = (uint64_t)p->p_threadstart;
930		ts64->rdi = (uint64_t)th_pthread;
931		ts64->rsi = (uint64_t)((unsigned int)(th_thport));
932		ts64->rdx = (uint64_t)user_func;
933		ts64->rcx = (uint64_t)user_funcarg;
934		ts64->r8 = (uint64_t)user_stacksize;
935		ts64->r9 = (uint64_t)uap->flags;
936		/*
937		 * set stack pointer aligned to 16 byte boundary
938		 */
939		ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
940
941		thread_set_wq_state64(th, (thread_state_t)ts64);
942	}
943	}
944#else
945#error bsdthread_create  not defined for this architecture
946#endif
947	/* Set scheduling parameters if needed */
948	if ((flags & PTHREAD_START_SETSCHED) != 0) {
949		thread_extended_policy_data_t    extinfo;
950		thread_precedence_policy_data_t   precedinfo;
951
952		importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
953		policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
954
955		if (policy == SCHED_OTHER)
956			extinfo.timeshare = 1;
957		else
958			extinfo.timeshare = 0;
959		thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
960
961#define BASEPRI_DEFAULT 31
962		precedinfo.importance = (importance - BASEPRI_DEFAULT);
963		thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
964	}
965
966	kret = thread_resume(th);
967	if (kret != KERN_SUCCESS) {
968		error = EINVAL;
969		goto out1;
970	}
971	thread_deallocate(th);	/* drop the creator reference */
972#if 0
973	KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, (unsigned int)th_pthread, 0, 0, 0);
974#endif
975	*retval = th_pthread;
976
977	return(0);
978
979out1:
980	if (allocated != 0)
981		(void)mach_vm_deallocate(vmap,  stackaddr, th_allocsize);
982out:
983	(void)mach_port_deallocate(get_task_ipcspace(ctask), (mach_port_name_t)th_thport);
984	(void)thread_terminate(th);
985	(void)thread_deallocate(th);
986	return(error);
987}
988
989int
990bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args  *uap, __unused register_t *retval)
991{
992	mach_vm_offset_t  freeaddr;
993	mach_vm_size_t freesize;
994	kern_return_t kret;
995	mach_port_name_t kthport = (mach_port_name_t)uap->port;
996	mach_port_name_t sem = (mach_port_name_t)uap->sem;
997
998	freeaddr = (mach_vm_offset_t)uap->stackaddr;
999	freesize = uap->freesize;
1000
1001#if 0
1002	KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, (unsigned int)freeaddr, (unsigned int)freesize, (unsigned int)kthport, 0xff, 0);
1003#endif
1004	if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
1005		kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
1006		if (kret != KERN_SUCCESS) {
1007			return(EINVAL);
1008		}
1009	}
1010
1011	(void) thread_terminate(current_thread());
1012	if (sem != MACH_PORT_NULL) {
1013		 kret = semaphore_signal_internal_trap(sem);
1014		if (kret != KERN_SUCCESS) {
1015			return(EINVAL);
1016		}
1017	}
1018
1019	if (kthport != MACH_PORT_NULL)
1020			mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
1021	thread_exception_return();
1022	panic("bsdthread_terminate: still running\n");
1023#if 0
1024	KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END, 0, 0, 0, 0xff, 0);
1025#endif
1026	return(0);
1027}
1028
1029
1030int
1031bsdthread_register(struct proc *p, struct bsdthread_register_args  *uap, __unused register_t *retval)
1032{
1033	/* syscall randomizer test can pass bogus values */
1034	if (uap->pthsize > MAX_PTHREAD_SIZE) {
1035		return(EINVAL);
1036	}
1037	p->p_threadstart = uap->threadstart;
1038	p->p_wqthread = uap->wqthread;
1039	p->p_pthsize = uap->pthsize;
1040
1041	return(0);
1042}
1043
1044
1045
1046
1047int wq_stalled_window_usecs	= WQ_STALLED_WINDOW_USECS;
1048int wq_reduce_pool_window_usecs	= WQ_REDUCE_POOL_WINDOW_USECS;
1049int wq_max_run_latency_usecs	= WQ_MAX_RUN_LATENCY_USECS;
1050int wq_timer_interval_msecs	= WQ_TIMER_INTERVAL_MSECS;
1051
1052
1053SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
1054	   &wq_stalled_window_usecs, 0, "");
1055
1056SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
1057	   &wq_reduce_pool_window_usecs, 0, "");
1058
1059SYSCTL_INT(_kern, OID_AUTO, wq_max_run_latency_usecs, CTLFLAG_RW,
1060	   &wq_max_run_latency_usecs, 0, "");
1061
1062SYSCTL_INT(_kern, OID_AUTO, wq_timer_interval_msecs, CTLFLAG_RW,
1063	   &wq_timer_interval_msecs, 0, "");
1064
1065
1066
1067
1068void
1069workqueue_init_lock(proc_t p)
1070{
1071        lck_mtx_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr);
1072}
1073
1074void
1075workqueue_destroy_lock(proc_t p)
1076{
1077	lck_mtx_destroy(&p->p_wqlock, pthread_lck_grp);
1078}
1079
1080static void
1081workqueue_lock(proc_t p)
1082{
1083	lck_mtx_lock(&p->p_wqlock);
1084}
1085
1086static void
1087workqueue_lock_spin(proc_t p)
1088{
1089	lck_mtx_lock_spin(&p->p_wqlock);
1090}
1091
1092static void
1093workqueue_unlock(proc_t p)
1094{
1095	lck_mtx_unlock(&p->p_wqlock);
1096}
1097
1098
1099
1100static void
1101workqueue_interval_timer_start(thread_call_t call, int interval_in_ms)
1102{
1103        uint64_t deadline;
1104
1105	clock_interval_to_deadline(interval_in_ms, 1000 * 1000, &deadline);
1106
1107	thread_call_enter_delayed(call, deadline);
1108}
1109
1110
1111static void
1112workqueue_timer(struct workqueue *wq, __unused int param1)
1113{
1114        struct timeval tv, dtv;
1115        uint32_t i;
1116	boolean_t added_more_threads = FALSE;
1117	boolean_t reset_maxactive = FALSE;
1118	boolean_t restart_timer = FALSE;
1119
1120        microuptime(&tv);
1121
1122        KERNEL_DEBUG(0xefffd108, (int)wq, 0, 0, 0, 0);
1123
1124	/*
1125	 * check to see if the stall frequency was beyond our tolerance
1126	 * or we have work on the queue, but haven't scheduled any
1127	 * new work within our acceptable time interval because
1128	 * there were no idle threads left to schedule
1129	 *
1130	 * WQ_TIMER_WATCH will only be set if we have 1 or more affinity
1131	 * groups that have stalled (no active threads and no idle threads)...
1132	 * it will not be set if all affinity groups have at least 1 thread
1133	 * that is currently runnable... if all processors have a runnable
1134	 * thread, there is no need to add more threads even if we're not
1135	 * scheduling new work within our allowed window... it just means
1136	 * that the work items are taking a long time to complete.
1137	 */
1138	if (wq->wq_flags & (WQ_ADD_TO_POOL | WQ_TIMER_WATCH)) {
1139
1140		if (wq->wq_flags & WQ_ADD_TO_POOL)
1141		        added_more_threads = TRUE;
1142		else {
1143		        timersub(&tv, &wq->wq_lastran_ts, &dtv);
1144
1145			if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_stalled_window_usecs)
1146			        added_more_threads = TRUE;
1147		}
1148		if (added_more_threads == TRUE) {
1149		        for (i = 0; i < wq->wq_affinity_max && wq->wq_nthreads < WORKQUEUE_MAXTHREADS; i++) {
1150			        (void)workqueue_addnewthread(wq);
1151			}
1152		}
1153	}
1154	timersub(&tv, &wq->wq_reduce_ts, &dtv);
1155
1156	if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_reduce_pool_window_usecs)
1157	        reset_maxactive = TRUE;
1158
1159	/*
1160	 * if the pool size has grown beyond the minimum number
1161	 * of threads needed to keep all of the processors busy, and
1162	 * the maximum number of threads scheduled concurrently during
1163	 * the last sample period didn't exceed half the current pool
1164	 * size, then its time to trim the pool size back
1165	 */
1166	if (added_more_threads == FALSE &&
1167	    reset_maxactive == TRUE &&
1168	    wq->wq_nthreads > wq->wq_affinity_max &&
1169	    wq->wq_max_threads_scheduled <= (wq->wq_nthreads / 2)) {
1170		uint32_t nthreads_to_remove;
1171
1172		if ((nthreads_to_remove = (wq->wq_nthreads / 4)) == 0)
1173			nthreads_to_remove = 1;
1174
1175	        for (i = 0; i < nthreads_to_remove && wq->wq_nthreads > wq->wq_affinity_max; i++)
1176		        workqueue_removethread(wq);
1177	}
1178	workqueue_lock_spin(wq->wq_proc);
1179
1180	if (reset_maxactive == TRUE) {
1181	        wq->wq_max_threads_scheduled = 0;
1182		microuptime(&wq->wq_reduce_ts);
1183	}
1184	if (added_more_threads) {
1185	        wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH);
1186
1187	        /*
1188		 * since we added more threads, we should be
1189		 * able to run some work if its still available
1190		 */
1191	        workqueue_run_nextitem(wq->wq_proc, THREAD_NULL);
1192		workqueue_lock_spin(wq->wq_proc);
1193	}
1194	if ((wq->wq_nthreads > wq->wq_affinity_max) ||
1195	    (wq->wq_flags & WQ_TIMER_WATCH)) {
1196	        restart_timer = TRUE;
1197	} else
1198	        wq->wq_flags &= ~WQ_TIMER_RUNNING;
1199
1200	workqueue_unlock(wq->wq_proc);
1201
1202	/*
1203	 * we needed to knock down the WQ_TIMER_RUNNING flag while behind
1204	 * the workqueue lock... however, we don't want to hold the lock
1205	 * while restarting the timer and we certainly don't want 2 or more
1206	 * instances of the timer... so set a local to indicate the need
1207	 * for a restart since the state of wq_flags may change once we
1208	 * drop the workqueue lock...
1209	 */
1210	if (restart_timer == TRUE)
1211	        workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1212}
1213
1214
1215static void
1216workqueue_callback(
1217		   int		type,
1218		   thread_t	thread)
1219{
1220	struct uthread    *uth;
1221	struct threadlist *tl;
1222	struct workqueue  *wq;
1223
1224	uth = get_bsdthread_info(thread);
1225	tl  = uth->uu_threadlist;
1226	wq  = tl->th_workq;
1227
1228        switch (type) {
1229
1230	      case SCHED_CALL_BLOCK:
1231	        {
1232		uint32_t	old_activecount;
1233
1234		old_activecount = OSAddAtomic(-1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1235
1236		if (old_activecount == 1 && wq->wq_itemcount) {
1237		        /*
1238			 * we were the last active thread on this affinity set
1239			 * and we've got work to do
1240			 */
1241		        workqueue_lock_spin(wq->wq_proc);
1242			/*
1243			 * if this thread is blocking (not parking)
1244			 * and the idle list is empty for this affinity group
1245			 * we'll count it as a 'stall'
1246			 */
1247			if ((tl->th_flags & TH_LIST_RUNNING) &&
1248			    TAILQ_EMPTY(&wq->wq_thidlelist[tl->th_affinity_tag]))
1249			        wq->wq_stalled_count++;
1250
1251			workqueue_run_nextitem(wq->wq_proc, THREAD_NULL);
1252			/*
1253			 * workqueue_run_nextitem will drop the workqueue
1254			 * lock before it returns
1255			 */
1256		}
1257	        KERNEL_DEBUG(0xefffd020, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0);
1258	        }
1259		break;
1260
1261	      case SCHED_CALL_UNBLOCK:
1262		/*
1263		 * we cannot take the workqueue_lock here...
1264		 * an UNBLOCK can occur from a timer event which
1265		 * is run from an interrupt context... if the workqueue_lock
1266		 * is already held by this processor, we'll deadlock...
1267		 * the thread lock for the thread being UNBLOCKED
1268		 * is also held
1269		 */
1270		if (tl->th_unparked)
1271		        OSAddAtomic(-1, (SInt32 *)&tl->th_unparked);
1272		else
1273		        OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1274
1275		KERNEL_DEBUG(0xefffd024, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0);
1276		break;
1277	}
1278}
1279
1280static void
1281workqueue_removethread(struct workqueue *wq)
1282{
1283        struct threadlist *tl;
1284	uint32_t	i, affinity_tag = 0;
1285
1286	tl = NULL;
1287
1288	workqueue_lock_spin(wq->wq_proc);
1289
1290	for (i = 0; i < wq->wq_affinity_max; i++) {
1291
1292	        affinity_tag = wq->wq_nextaffinitytag;
1293
1294		if (affinity_tag == 0)
1295		        affinity_tag = wq->wq_affinity_max - 1;
1296		else
1297		        affinity_tag--;
1298		wq->wq_nextaffinitytag = affinity_tag;
1299
1300		/*
1301		 * look for an idle thread to steal from this affinity group
1302		 * but don't grab the only thread associated with it
1303		 */
1304		if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag]) && wq->wq_thcount[affinity_tag] > 1) {
1305		        tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]);
1306			TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry);
1307
1308			wq->wq_nthreads--;
1309			wq->wq_thcount[affinity_tag]--;
1310
1311			break;
1312		}
1313	}
1314	workqueue_unlock(wq->wq_proc);
1315
1316	if (tl != NULL) {
1317		thread_sched_call(tl->th_thread, NULL);
1318
1319		if ( (tl->th_flags & TH_LIST_BLOCKED) )
1320		        wakeup(tl);
1321		else {
1322			/*
1323			 * thread was created, but never used...
1324			 * need to clean up the stack and port ourselves
1325			 * since we're not going to spin up through the
1326			 * normal exit path triggered from Libc
1327			 */
1328		        (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
1329			(void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), (mach_port_name_t)tl->th_thport);
1330
1331		        thread_terminate(tl->th_thread);
1332		}
1333		KERNEL_DEBUG(0xefffd030, (int)tl->th_thread, wq->wq_nthreads, tl->th_flags & TH_LIST_BLOCKED, 0, 0);
1334		/*
1335		 * drop our ref on the thread
1336		 */
1337		thread_deallocate(tl->th_thread);
1338
1339		kfree(tl, sizeof(struct threadlist));
1340	}
1341}
1342
1343
1344static int
1345workqueue_addnewthread(struct workqueue *wq)
1346{
1347	struct threadlist *tl;
1348	struct uthread	*uth;
1349	kern_return_t	kret;
1350	thread_t	th;
1351	proc_t		p;
1352	void 	 	*sright;
1353	mach_vm_offset_t stackaddr;
1354	uint32_t	affinity_tag;
1355
1356	p = wq->wq_proc;
1357
1358	kret = thread_create(wq->wq_task, &th);
1359
1360 	if (kret != KERN_SUCCESS)
1361	        return(EINVAL);
1362
1363	tl = kalloc(sizeof(struct threadlist));
1364	bzero(tl, sizeof(struct threadlist));
1365
1366#if defined(__ppc__)
1367	stackaddr = 0xF0000000;
1368#elif defined(__i386__)
1369	stackaddr = 0xB0000000;
1370#else
1371#error Need to define a stack address hint for this architecture
1372#endif
1373	tl->th_allocsize = PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
1374
1375	kret = mach_vm_map(wq->wq_map, &stackaddr,
1376    			tl->th_allocsize,
1377    			page_size-1,
1378    			VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1379    			0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1380    			VM_INHERIT_DEFAULT);
1381
1382	if (kret != KERN_SUCCESS) {
1383	        kret = mach_vm_allocate(wq->wq_map,
1384    					&stackaddr, tl->th_allocsize,
1385    					VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1386	}
1387	if (kret == KERN_SUCCESS) {
1388	        /*
1389		 * The guard page is at the lowest address
1390		 * The stack base is the highest address
1391		 */
1392	        kret = mach_vm_protect(wq->wq_map, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
1393
1394		if (kret != KERN_SUCCESS)
1395		        (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1396	}
1397	if (kret != KERN_SUCCESS) {
1398		(void) thread_terminate(th);
1399
1400		kfree(tl, sizeof(struct threadlist));
1401
1402	        return(EINVAL);
1403	}
1404	thread_reference(th);
1405
1406	sright = (void *) convert_thread_to_port(th);
1407	tl->th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task));
1408
1409	thread_static_param(th, TRUE);
1410
1411        workqueue_lock_spin(p);
1412
1413	affinity_tag = wq->wq_nextaffinitytag;
1414	wq->wq_nextaffinitytag = (affinity_tag + 1) % wq->wq_affinity_max;
1415
1416        workqueue_unlock(p);
1417
1418	tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1419
1420	tl->th_thread = th;
1421	tl->th_workq = wq;
1422	tl->th_stackaddr = stackaddr;
1423	tl->th_affinity_tag = affinity_tag;
1424
1425#if defined(__ppc__)
1426	//ml_fp_setvalid(FALSE);
1427	thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p));
1428#endif /* __ppc__ */
1429	/*
1430	 * affinity tag of 0 means no affinity...
1431	 * but we want our tags to be 0 based because they
1432	 * are used to index arrays, so...
1433	 * keep it 0 based internally and bump by 1 when
1434	 * calling out to set it
1435	 */
1436	(void)thread_affinity_set(th, affinity_tag + 1);
1437	thread_sched_call(th, workqueue_callback);
1438
1439	uth = get_bsdthread_info(tl->th_thread);
1440	uth->uu_threadlist = (void *)tl;
1441
1442        workqueue_lock_spin(p);
1443
1444	TAILQ_INSERT_TAIL(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry);
1445	wq->wq_nthreads++;
1446	wq->wq_thcount[affinity_tag]++;
1447
1448	KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, (int)current_thread(), affinity_tag, wq->wq_nthreads, 0, (int)tl->th_thread);
1449
1450	/*
1451	 * work may have come into the queue while
1452	 * no threads were available to run... since
1453	 * we're adding a new thread, go evaluate the
1454	 * current state
1455	 */
1456	workqueue_run_nextitem(p, THREAD_NULL);
1457	/*
1458	 * workqueue_run_nextitem is responsible for
1459	 * dropping the workqueue lock in all cases
1460	 */
1461
1462	return(0);
1463}
1464
1465int
1466workq_open(__unused struct proc *p, __unused struct workq_open_args  *uap, __unused register_t *retval)
1467{
1468	struct workqueue * wq;
1469	int size;
1470	char * ptr;
1471	int j;
1472	uint32_t i;
1473	int error = 0;
1474	int num_cpus;
1475	struct workitem * witem;
1476	struct workitemlist *wl;
1477
1478	workqueue_lock(p);
1479
1480	if (p->p_wqptr == NULL) {
1481	        num_cpus = ml_get_max_cpus();
1482
1483		size = (sizeof(struct workqueue)) +
1484		       (num_cpus * sizeof(int *)) +
1485		       (num_cpus * sizeof(TAILQ_HEAD(, threadlist)));
1486
1487		ptr = (char *)kalloc(size);
1488		bzero(ptr, size);
1489
1490		wq = (struct workqueue *)ptr;
1491		wq->wq_flags = WQ_LIST_INITED;
1492		wq->wq_proc = p;
1493		wq->wq_affinity_max = num_cpus;
1494		wq->wq_task = current_task();
1495		wq->wq_map  = current_map();
1496
1497		for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1498		        wl = (struct workitemlist *)&wq->wq_list[i];
1499			TAILQ_INIT(&wl->wl_itemlist);
1500			TAILQ_INIT(&wl->wl_freelist);
1501
1502			for (j = 0; j < WORKITEM_SIZE; j++) {
1503			        witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
1504				TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
1505			}
1506		}
1507		wq->wq_thactivecount = (uint32_t *)((char *)ptr + sizeof(struct workqueue));
1508		wq->wq_thcount       = (uint32_t *)&wq->wq_thactivecount[wq->wq_affinity_max];
1509		wq->wq_thidlelist    = (struct wq_thidlelist *)&wq->wq_thcount[wq->wq_affinity_max];
1510
1511		for (i = 0; i < wq->wq_affinity_max; i++)
1512		        TAILQ_INIT(&wq->wq_thidlelist[i]);
1513
1514		TAILQ_INIT(&wq->wq_thrunlist);
1515
1516		p->p_wqptr = (void *)wq;
1517		p->p_wqsize = size;
1518
1519		workqueue_unlock(p);
1520
1521		wq->wq_timer_call = thread_call_allocate((thread_call_func_t)workqueue_timer, (thread_call_param_t)wq);
1522
1523		for (i = 0; i < wq->wq_affinity_max; i++) {
1524			(void)workqueue_addnewthread(wq);
1525		}
1526		/* If unable to create any threads, return error */
1527		if (wq->wq_nthreads == 0)
1528			error = EINVAL;
1529		workqueue_lock_spin(p);
1530
1531		microuptime(&wq->wq_reduce_ts);
1532		microuptime(&wq->wq_lastran_ts);
1533		wq->wq_max_threads_scheduled = 0;
1534		wq->wq_stalled_count = 0;
1535	}
1536	workqueue_unlock(p);
1537
1538	return(error);
1539}
1540
1541int
1542workq_ops(struct proc *p, struct workq_ops_args  *uap, __unused register_t *retval)
1543{
1544	int options	 = uap->options;
1545	int prio	 = uap->prio;	/* should  be used to find the right workqueue */
1546	user_addr_t item = uap->item;
1547	int error = 0;
1548	thread_t th = THREAD_NULL;
1549        struct workqueue *wq;
1550
1551	prio += 2;	/* normalize prio -2 to +2 to 0 -4 */
1552
1553	switch (options) {
1554
1555		case WQOPS_QUEUE_ADD: {
1556
1557		        KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, (int)item, 0, 0, 0, 0);
1558
1559			if ((prio < 0) || (prio >= 5))
1560				return (EINVAL);
1561
1562			workqueue_lock_spin(p);
1563
1564			if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1565			        workqueue_unlock(p);
1566			        return (EINVAL);
1567			}
1568			error = workqueue_additem(wq, prio, item);
1569
1570		        }
1571			break;
1572		case WQOPS_QUEUE_REMOVE: {
1573
1574			if ((prio < 0) || (prio >= 5))
1575				return (EINVAL);
1576
1577			workqueue_lock_spin(p);
1578
1579			if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1580			        workqueue_unlock(p);
1581			        return (EINVAL);
1582			}
1583		        error = workqueue_removeitem(wq, prio, item);
1584			}
1585			break;
1586		case WQOPS_THREAD_RETURN: {
1587
1588		        th = current_thread();
1589
1590		        KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, 0, 0, 0, 0, 0);
1591
1592			workqueue_lock_spin(p);
1593
1594			if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1595			        workqueue_unlock(p);
1596			        return (EINVAL);
1597			}
1598		        }
1599			break;
1600		default:
1601		        return (EINVAL);
1602	}
1603	workqueue_run_nextitem(p, th);
1604	/*
1605	 * workqueue_run_nextitem is responsible for
1606	 * dropping the workqueue lock in all cases
1607	 */
1608	return(error);
1609}
1610
1611void
1612workqueue_exit(struct proc *p)
1613{
1614	struct workqueue  * wq;
1615	struct threadlist  * tl, *tlist;
1616	uint32_t i;
1617
1618	if (p->p_wqptr != NULL) {
1619
1620	        workqueue_lock_spin(p);
1621
1622	        wq = (struct workqueue *)p->p_wqptr;
1623		p->p_wqptr = NULL;
1624
1625		workqueue_unlock(p);
1626
1627		if (wq == NULL)
1628		        return;
1629
1630		if (wq->wq_flags & WQ_TIMER_RUNNING)
1631		        thread_call_cancel(wq->wq_timer_call);
1632		thread_call_free(wq->wq_timer_call);
1633
1634		TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1635		        /*
1636			 * drop our last ref on the thread
1637			 */
1638		        thread_sched_call(tl->th_thread, NULL);
1639		        thread_deallocate(tl->th_thread);
1640
1641			TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1642			kfree(tl, sizeof(struct threadlist));
1643		}
1644		for (i = 0; i < wq->wq_affinity_max; i++) {
1645		        TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist[i], th_entry, tlist) {
1646			        /*
1647				 * drop our last ref on the thread
1648				 */
1649			        thread_sched_call(tl->th_thread, NULL);
1650				thread_deallocate(tl->th_thread);
1651
1652				TAILQ_REMOVE(&wq->wq_thidlelist[i], tl, th_entry);
1653				kfree(tl, sizeof(struct threadlist));
1654			}
1655		}
1656		kfree(wq, p->p_wqsize);
1657	}
1658}
1659
1660static int
1661workqueue_additem(struct workqueue *wq, int prio, user_addr_t item)
1662{
1663	struct workitem	*witem;
1664	struct workitemlist *wl;
1665
1666	wl = (struct workitemlist *)&wq->wq_list[prio];
1667
1668	if (TAILQ_EMPTY(&wl->wl_freelist))
1669		return (ENOMEM);
1670
1671	witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
1672	TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
1673
1674	witem->wi_item = item;
1675	TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
1676
1677	if (wq->wq_itemcount == 0) {
1678	        microuptime(&wq->wq_lastran_ts);
1679		wq->wq_stalled_count = 0;
1680	}
1681	wq->wq_itemcount++;
1682
1683	return (0);
1684}
1685
1686static int
1687workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
1688{
1689	struct workitem *witem;
1690	struct workitemlist *wl;
1691	int error = ESRCH;
1692
1693	wl = (struct workitemlist *)&wq->wq_list[prio];
1694
1695	TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
1696		if (witem->wi_item == item) {
1697			TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1698			wq->wq_itemcount--;
1699
1700			witem->wi_item = (user_addr_t)0;
1701			TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1702
1703			error = 0;
1704			break;
1705		}
1706	}
1707	if (wq->wq_itemcount == 0)
1708	        wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH);
1709
1710	return (error);
1711}
1712
1713/*
1714 * workqueue_run_nextitem:
1715 *   called with the workqueue lock held...
1716 *   responsible for dropping it in all cases
1717 */
1718static void
1719workqueue_run_nextitem(proc_t p, thread_t thread)
1720{
1721        struct workqueue *wq;
1722	struct workitem *witem = NULL;
1723	user_addr_t item = 0;
1724	thread_t th_to_run = THREAD_NULL;
1725	thread_t th_to_park = THREAD_NULL;
1726	int wake_thread = 0;
1727	int reuse_thread = 1;
1728	uint32_t stalled_affinity_count = 0;
1729	int i;
1730	uint32_t affinity_tag;
1731	struct threadlist *tl = NULL;
1732	struct uthread *uth = NULL;
1733	struct workitemlist *wl;
1734	boolean_t start_timer = FALSE;
1735	struct timeval tv, lat_tv;
1736
1737	wq = (struct workqueue *)p->p_wqptr;
1738
1739	KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, (int)thread, wq->wq_threads_scheduled, wq->wq_stalled_count, 0, 0);
1740
1741	if (wq->wq_itemcount == 0) {
1742	        if ((th_to_park = thread) == THREAD_NULL)
1743		        goto out;
1744	        goto parkit;
1745	}
1746	if (thread != THREAD_NULL) {
1747	        /*
1748		 * we're a worker thread from the pool... currently we
1749		 * are considered 'active' which means we're counted
1750		 * in "wq_thactivecount"
1751		 */
1752	        uth = get_bsdthread_info(thread);
1753		tl = uth->uu_threadlist;
1754
1755		if (wq->wq_thactivecount[tl->th_affinity_tag] == 1) {
1756		        /*
1757			 * we're the only active thread associated with our
1758			 * affinity group, so pick up some work and keep going
1759			 */
1760		        th_to_run = thread;
1761			goto pick_up_work;
1762		}
1763	}
1764	for (affinity_tag = 0; affinity_tag < wq->wq_affinity_max; affinity_tag++) {
1765	        /*
1766		 * look for first affinity group that is currently not active
1767		 * and has at least 1 idle thread
1768		 */
1769	        if (wq->wq_thactivecount[affinity_tag] == 0) {
1770			if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag]))
1771			        break;
1772		        stalled_affinity_count++;
1773		}
1774	}
1775	if (thread == THREAD_NULL) {
1776	        /*
1777		 * we're not one of the 'worker' threads
1778		 */
1779	        if (affinity_tag >= wq->wq_affinity_max) {
1780		        /*
1781			 * we've already got at least 1 thread per
1782			 * affinity group in the active state... or
1783			 * we've got no idle threads to play with
1784			 */
1785		        if (stalled_affinity_count) {
1786
1787				if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) {
1788				        wq->wq_flags |= WQ_TIMER_RUNNING;
1789					start_timer = TRUE;
1790				}
1791				wq->wq_flags |= WQ_TIMER_WATCH;
1792			}
1793			goto out;
1794		}
1795	} else {
1796	        /*
1797		 * we're overbooked on the affinity group we're associated with,
1798		 * so park this thread
1799		 */
1800	        th_to_park = thread;
1801
1802		if (affinity_tag >= wq->wq_affinity_max) {
1803		        /*
1804			 * all the affinity groups have active threads
1805			 * running, or there are no idle threads to
1806			 * schedule
1807			 */
1808		        if (stalled_affinity_count) {
1809
1810				if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) {
1811				        wq->wq_flags |= WQ_TIMER_RUNNING;
1812					start_timer = TRUE;
1813				}
1814				wq->wq_flags |= WQ_TIMER_WATCH;
1815			}
1816		        goto parkit;
1817		}
1818		/*
1819		 * we've got a candidate (affinity group with no currently
1820		 * active threads) to start a new thread on...
1821		 * we already know there is both work available
1822		 * and an idle thread with the correct affinity tag, so
1823		 * fall into the code that pulls a new thread and workitem...
1824		 * once we've kicked that thread off, we'll park this one
1825		 */
1826	}
1827	tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]);
1828	TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry);
1829
1830	th_to_run = tl->th_thread;
1831	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
1832
1833	if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
1834	        tl->th_flags &= ~TH_LIST_SUSPENDED;
1835		reuse_thread = 0;
1836	} else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
1837	        tl->th_flags &= ~TH_LIST_BLOCKED;
1838		wake_thread = 1;
1839	}
1840	tl->th_flags |= TH_LIST_RUNNING;
1841
1842        wq->wq_threads_scheduled++;
1843
1844	if (wq->wq_threads_scheduled > wq->wq_max_threads_scheduled)
1845	        wq->wq_max_threads_scheduled = wq->wq_threads_scheduled;
1846
1847pick_up_work:
1848	for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1849	        wl = (struct workitemlist *)&wq->wq_list[i];
1850
1851		if (!(TAILQ_EMPTY(&wl->wl_itemlist))) {
1852
1853		        witem = TAILQ_FIRST(&wl->wl_itemlist);
1854			TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1855			wq->wq_itemcount--;
1856
1857			item = witem->wi_item;
1858			witem->wi_item = (user_addr_t)0;
1859			TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1860
1861			break;
1862		}
1863	}
1864	if (witem == NULL)
1865	        panic("workq_run_nextitem: NULL witem");
1866
1867	if (thread != th_to_run) {
1868	        /*
1869		 * we're starting up a thread from a parked/suspended condition
1870		 */
1871	        OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]);
1872		OSAddAtomic(1, (SInt32 *)&tl->th_unparked);
1873	}
1874	if (wq->wq_itemcount == 0)
1875		wq->wq_flags &= ~WQ_TIMER_WATCH;
1876	else {
1877	        microuptime(&tv);
1878		/*
1879		 * if we had any affinity groups stall (no threads runnable)
1880		 * since we last scheduled an item... and
1881		 * the elapsed time since we last scheduled an item
1882		 * exceeds the latency tolerance...
1883		 * we ask the timer thread (which should already be running)
1884		 * to add some more threads to the pool
1885		 */
1886		if (wq->wq_stalled_count && !(wq->wq_flags & WQ_ADD_TO_POOL)) {
1887		        timersub(&tv, &wq->wq_lastran_ts, &lat_tv);
1888
1889			if (((lat_tv.tv_sec * 1000000) + lat_tv.tv_usec) > wq_max_run_latency_usecs)
1890			        wq->wq_flags |= WQ_ADD_TO_POOL;
1891
1892			KERNEL_DEBUG(0xefffd10c, wq->wq_stalled_count, lat_tv.tv_sec, lat_tv.tv_usec, wq->wq_flags, 0);
1893		}
1894		wq->wq_lastran_ts = tv;
1895	}
1896	wq->wq_stalled_count = 0;
1897        workqueue_unlock(p);
1898
1899        KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[0], wq->wq_thactivecount[1],
1900		     wq->wq_thactivecount[2], wq->wq_thactivecount[3], 0);
1901
1902        KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[4], wq->wq_thactivecount[5],
1903		     wq->wq_thactivecount[6], wq->wq_thactivecount[7], 0);
1904
1905	/*
1906	 * if current thread is reused for workitem, does not return via unix_syscall
1907	 */
1908	wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
1909
1910	if (th_to_park == THREAD_NULL) {
1911
1912	        KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, (int)item, wq->wq_flags, 1, 0);
1913
1914		return;
1915	}
1916	workqueue_lock_spin(p);
1917
1918parkit:
1919	wq->wq_threads_scheduled--;
1920	/*
1921	 * this is a workqueue thread with no more
1922	 * work to do... park it for now
1923	 */
1924	uth = get_bsdthread_info(th_to_park);
1925	tl = uth->uu_threadlist;
1926	if (tl == 0)
1927	        panic("wq thread with no threadlist ");
1928
1929	TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1930	tl->th_flags &= ~TH_LIST_RUNNING;
1931
1932	tl->th_flags |= TH_LIST_BLOCKED;
1933	TAILQ_INSERT_HEAD(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry);
1934
1935	assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
1936
1937	workqueue_unlock(p);
1938
1939	if (start_timer)
1940		workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1941
1942	KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, (int)current_thread(), wq->wq_threads_scheduled, 0, 0, (int)th_to_park);
1943
1944	thread_block((thread_continue_t)thread_exception_return);
1945
1946	panic("unexpected return from thread_block");
1947
1948out:
1949	workqueue_unlock(p);
1950
1951	if (start_timer)
1952		workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs);
1953
1954	KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, 0, wq->wq_flags, 2, 0);
1955
1956	return;
1957}
1958
1959static void
1960wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
1961	   int reuse_thread, int wake_thread, int return_directly)
1962{
1963	int ret = 0;
1964
1965	KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, (int)current_thread(), (int)item, wake_thread, tl->th_affinity_tag, (int)th);
1966
1967	ret = setup_wqthread(p, th, item, reuse_thread, tl);
1968
1969	if (ret != 0)
1970		panic("setup_wqthread failed  %x\n", ret);
1971
1972	if (return_directly) {
1973		thread_exception_return();
1974
1975		panic("wq_runitem: thread_exception_return returned ...\n");
1976	}
1977	if (wake_thread) {
1978		KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th);
1979
1980		wakeup(tl);
1981	} else {
1982	        KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th);
1983
1984		thread_resume(th);
1985	}
1986}
1987
1988
1989int
1990setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
1991{
1992#if defined(__ppc__)
1993	/*
1994	 * Set up PowerPC registers...
1995	 * internally they are always kept as 64 bit and
1996	 * since the register set is the same between 32 and 64bit modes
1997	 * we don't need 2 different methods for setting the state
1998	 */
1999	{
2000	        ppc_thread_state64_t state64;
2001		ppc_thread_state64_t *ts64 = &state64;
2002
2003		ts64->srr0 = (uint64_t)p->p_wqthread;
2004		ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE);
2005		ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2006		ts64->r4 = (uint64_t)((unsigned int)tl->th_thport);
2007		ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2008		ts64->r6 = (uint64_t)item;
2009		ts64->r7 = (uint64_t)reuse_thread;
2010		ts64->r8 = (uint64_t)0;
2011
2012		thread_set_wq_state64(th, (thread_state_t)ts64);
2013	}
2014#elif defined(__i386__)
2015	int isLP64 = 0;
2016
2017	isLP64 = IS_64BIT_PROCESS(p);
2018        /*
2019         * Set up i386 registers & function call.
2020         */
2021	if (isLP64 == 0) {
2022		x86_thread_state32_t state;
2023		x86_thread_state32_t *ts = &state;
2024
2025        	ts->eip = (int)p->p_wqthread;
2026		ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2027		ts->ebx = (unsigned int)tl->th_thport;
2028		ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2029		ts->edx = (unsigned int)item;
2030		ts->edi = (unsigned int)reuse_thread;
2031		ts->esi = (unsigned int)0;
2032		/*
2033		 * set stack pointer
2034		 */
2035        	ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
2036
2037		thread_set_wq_state32(th, (thread_state_t)ts);
2038
2039	} else {
2040	        x86_thread_state64_t state64;
2041		x86_thread_state64_t *ts64 = &state64;
2042
2043        	ts64->rip = (uint64_t)p->p_wqthread;
2044		ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2045		ts64->rsi = (uint64_t)((unsigned int)(tl->th_thport));
2046		ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2047		ts64->rcx = (uint64_t)item;
2048		ts64->r8 = (uint64_t)reuse_thread;
2049		ts64->r9 = (uint64_t)0;
2050
2051		/*
2052		 * set stack pointer aligned to 16 byte boundary
2053		 */
2054		ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
2055
2056		thread_set_wq_state64(th, (thread_state_t)ts64);
2057	}
2058#else
2059#error setup_wqthread  not defined for this architecture
2060#endif
2061	return(0);
2062}
2063
2064