1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 *	pthread_synch.c
31 */
32
33#define  _PTHREAD_CONDATTR_T
34#define  _PTHREAD_COND_T
35#define _PTHREAD_MUTEXATTR_T
36#define _PTHREAD_MUTEX_T
37#define _PTHREAD_RWLOCKATTR_T
38#define _PTHREAD_RWLOCK_T
39
40#undef pthread_mutexattr_t
41#undef pthread_mutex_t
42#undef pthread_condattr_t
43#undef pthread_cond_t
44#undef pthread_rwlockattr_t
45#undef pthread_rwlock_t
46
47#include <sys/param.h>
48#include <sys/queue.h>
49#include <sys/resourcevar.h>
50//#include <sys/proc_internal.h>
51#include <sys/kauth.h>
52#include <sys/systm.h>
53#include <sys/timeb.h>
54#include <sys/times.h>
55#include <sys/acct.h>
56#include <sys/kernel.h>
57#include <sys/wait.h>
58#include <sys/signalvar.h>
59#include <sys/sysctl.h>
60#include <sys/syslog.h>
61#include <sys/stat.h>
62#include <sys/lock.h>
63#include <sys/kdebug.h>
64//#include <sys/sysproto.h>
65#include <sys/vm.h>
66#include <sys/user.h>		/* for coredump */
67#include <sys/proc_info.h>	/* for fill_procworkqueue */
68
69
70#include <mach/mach_port.h>
71#include <mach/mach_types.h>
72#include <mach/semaphore.h>
73#include <mach/sync_policy.h>
74#include <mach/task.h>
75#include <mach/vm_prot.h>
76#include <kern/kern_types.h>
77#include <kern/task.h>
78#include <kern/clock.h>
79#include <mach/kern_return.h>
80#include <kern/thread.h>
81#include <kern/sched_prim.h>
82#include <kern/kalloc.h>
83#include <kern/sched_prim.h>	/* for thread_exception_return */
84#include <kern/processor.h>
85#include <kern/assert.h>
86#include <mach/mach_vm.h>
87#include <mach/mach_param.h>
88#include <mach/thread_status.h>
89#include <mach/thread_policy.h>
90#include <mach/message.h>
91#include <mach/port.h>
92//#include <vm/vm_protos.h>
93#include <vm/vm_fault.h>
94#include <vm/vm_map.h>
95#include <mach/thread_act.h> /* for thread_resume */
96#include <machine/machine_routines.h>
97
98#include <libkern/OSAtomic.h>
99
100#include <sys/pthread_shims.h>
101#include "kern_internal.h"
102
103uint32_t pthread_debug_tracing = 0;
104
105SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
106		   &pthread_debug_tracing, 0, "")
107
108// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
109#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
110
111lck_grp_attr_t   *pthread_lck_grp_attr;
112lck_grp_t    *pthread_lck_grp;
113lck_attr_t   *pthread_lck_attr;
114
115extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
116extern void workqueue_thread_yielded(void);
117
118static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
119					boolean_t  overcommit, pthread_priority_t oc_prio);
120
121static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
122
123static void wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
124		       int reuse_thread, int wake_thread, int return_directly);
125
126static int _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl);
127
128static void wq_unpark_continue(void);
129static void wq_unsuspend_continue(void);
130
131static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
132static void workqueue_removethread(struct threadlist *tl, int fromexit);
133static void workqueue_lock_spin(proc_t);
134static void workqueue_unlock(proc_t);
135
136int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
137int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
138
139#define WQ_MAXPRI_MIN	0	/* low prio queue num */
140#define WQ_MAXPRI_MAX	2	/* max  prio queuenum */
141#define WQ_PRI_NUM	3	/* number of prio work queues */
142
143#define C_32_STK_ALIGN          16
144#define C_64_STK_ALIGN          16
145#define C_64_REDZONE_LEN        128
146#define TRUNC_DOWN32(a,c)       ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
147#define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
148
149/*
150 * Flags filed passed to bsdthread_create and back in pthread_start
15131  <---------------------------------> 0
152_________________________________________
153| flags(8) | policy(8) | importance(16) |
154-----------------------------------------
155*/
156
157#define PTHREAD_START_CUSTOM	0x01000000
158#define PTHREAD_START_SETSCHED	0x02000000
159#define PTHREAD_START_DETACHED	0x04000000
160#define PTHREAD_START_QOSCLASS	0x08000000
161#define PTHREAD_START_QOSCLASS_MASK 0xffffff
162#define PTHREAD_START_POLICY_BITSHIFT 16
163#define PTHREAD_START_POLICY_MASK 0xff
164#define PTHREAD_START_IMPORTANCE_MASK 0xffff
165
166#define SCHED_OTHER      POLICY_TIMESHARE
167#define SCHED_FIFO       POLICY_FIFO
168#define SCHED_RR         POLICY_RR
169
170int
171_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
172{
173	kern_return_t kret;
174	void * sright;
175	int error = 0;
176	int allocated = 0;
177	mach_vm_offset_t stackaddr;
178	mach_vm_size_t th_allocsize = 0;
179	mach_vm_size_t user_stacksize;
180	mach_vm_size_t th_stacksize;
181	mach_vm_size_t th_guardsize;
182	mach_vm_offset_t th_stackaddr;
183	mach_vm_offset_t th_stack;
184	mach_vm_offset_t th_pthread;
185	mach_port_name_t th_thport;
186	thread_t th;
187	vm_map_t vmap = pthread_kern->current_map();
188	task_t ctask = current_task();
189	unsigned int policy, importance;
190
191	int isLP64 = 0;
192
193	if (pthread_kern->proc_get_register(p) == 0) {
194		return EINVAL;
195	}
196
197	PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
198
199	isLP64 = proc_is64bit(p);
200	th_guardsize = vm_map_page_size(vmap);
201
202#if defined(__i386__) || defined(__x86_64__)
203	stackaddr = 0xB0000000;
204#else
205#error Need to define a stack address hint for this architecture
206#endif
207	kret = pthread_kern->thread_create(ctask, &th);
208	if (kret != KERN_SUCCESS)
209		return(ENOMEM);
210	thread_reference(th);
211
212	sright = (void *)pthread_kern->convert_thread_to_port(th);
213	th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
214
215	if ((flags & PTHREAD_START_CUSTOM) == 0) {
216		th_stacksize = (mach_vm_size_t)user_stack;		/* if it is custom them it is stacksize */
217		th_allocsize = th_stacksize + th_guardsize + pthread_kern->proc_get_pthsize(p);
218
219		kret = mach_vm_map(vmap, &stackaddr,
220    				th_allocsize,
221    				page_size-1,
222    				VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
223    				0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
224    				VM_INHERIT_DEFAULT);
225    		if (kret != KERN_SUCCESS)
226    			kret = mach_vm_allocate(vmap,
227    					&stackaddr, th_allocsize,
228    					VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
229    		if (kret != KERN_SUCCESS) {
230			error = ENOMEM;
231			goto out;
232    		}
233
234		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
235
236		th_stackaddr = stackaddr;
237		allocated = 1;
238     		/*
239		 * The guard page is at the lowest address
240     		 * The stack base is the highest address
241		 */
242		kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
243
244    		if (kret != KERN_SUCCESS) {
245			error = ENOMEM;
246			goto out1;
247    		}
248		th_stack = (stackaddr + th_stacksize + th_guardsize);
249		th_pthread = (stackaddr + th_stacksize + th_guardsize);
250		user_stacksize = th_stacksize;
251
252	       /*
253		* Pre-fault the first page of the new thread's stack and the page that will
254		* contain the pthread_t structure.
255		*/
256		vm_fault( vmap,
257		  vm_map_trunc_page_mask(th_stack - PAGE_SIZE_64, vm_map_page_mask(vmap)),
258		  VM_PROT_READ | VM_PROT_WRITE,
259		  FALSE,
260		  THREAD_UNINT, NULL, 0);
261
262		vm_fault( vmap,
263		  vm_map_trunc_page_mask(th_pthread, vm_map_page_mask(vmap)),
264		  VM_PROT_READ | VM_PROT_WRITE,
265		  FALSE,
266		  THREAD_UNINT, NULL, 0);
267	} else {
268		th_stack = user_stack;
269		user_stacksize = user_stack;
270		th_pthread = user_pthread;
271
272		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
273	}
274
275#if defined(__i386__) || defined(__x86_64__)
276	/*
277	 * Set up i386 registers & function call.
278	 */
279	if (isLP64 == 0) {
280		x86_thread_state32_t state;
281		x86_thread_state32_t *ts = &state;
282
283		ts->eip = (unsigned int)pthread_kern->proc_get_threadstart(p);
284		ts->eax = (unsigned int)th_pthread;
285		ts->ebx = (unsigned int)th_thport;
286		ts->ecx = (unsigned int)user_func;
287		ts->edx = (unsigned int)user_funcarg;
288		ts->edi = (unsigned int)user_stacksize;
289		ts->esi = (unsigned int)flags;
290		/*
291		 * set stack pointer
292		 */
293		ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
294
295		error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
296		if (error != KERN_SUCCESS) {
297			error = EINVAL;
298			goto out;
299		}
300	} else {
301		x86_thread_state64_t state64;
302		x86_thread_state64_t *ts64 = &state64;
303
304		ts64->rip = (uint64_t)pthread_kern->proc_get_threadstart(p);
305		ts64->rdi = (uint64_t)th_pthread;
306		ts64->rsi = (uint64_t)(th_thport);
307		ts64->rdx = (uint64_t)user_func;
308		ts64->rcx = (uint64_t)user_funcarg;
309		ts64->r8 = (uint64_t)user_stacksize;
310		ts64->r9 = (uint64_t)flags;
311		/*
312		 * set stack pointer aligned to 16 byte boundary
313		 */
314		ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
315
316		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
317		if (error != KERN_SUCCESS) {
318			error = EINVAL;
319			goto out;
320		}
321
322	}
323#elif defined(__arm__)
324	arm_thread_state_t state;
325	arm_thread_state_t *ts = &state;
326
327	ts->pc = (int)pthread_kern->proc_get_threadstart(p);
328	ts->r[0] = (unsigned int)th_pthread;
329	ts->r[1] = (unsigned int)th_thport;
330	ts->r[2] = (unsigned int)user_func;
331	ts->r[3] = (unsigned int)user_funcarg;
332	ts->r[4] = (unsigned int)user_stacksize;
333	ts->r[5] = (unsigned int)flags;
334
335	/* Set r7 & lr to 0 for better back tracing */
336	ts->r[7] = 0;
337	ts->lr = 0;
338
339	/*
340	 * set stack pointer
341	 */
342	ts->sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
343
344	(void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
345
346#else
347#error bsdthread_create  not defined for this architecture
348#endif
349
350	if ((flags & PTHREAD_START_SETSCHED) != 0) {
351		/* Set scheduling parameters if needed */
352		thread_extended_policy_data_t    extinfo;
353		thread_precedence_policy_data_t   precedinfo;
354
355		importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
356		policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
357
358		if (policy == SCHED_OTHER) {
359			extinfo.timeshare = 1;
360		} else {
361			extinfo.timeshare = 0;
362		}
363
364		thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
365
366#define BASEPRI_DEFAULT 31
367		precedinfo.importance = (importance - BASEPRI_DEFAULT);
368		thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
369	} else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
370		/* Set thread QoS class if requested. */
371		pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
372
373		thread_qos_policy_data_t qos;
374		qos.qos_tier = pthread_priority_get_qos_class(priority);
375		qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
376				_pthread_priority_get_relpri(priority);
377
378		pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
379	}
380
381	kret = pthread_kern->thread_resume(th);
382	if (kret != KERN_SUCCESS) {
383		error = EINVAL;
384		goto out1;
385	}
386	thread_deallocate(th);	/* drop the creator reference */
387
388	PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
389
390	*retval = th_pthread;
391
392	return(0);
393
394out1:
395	if (allocated != 0) {
396		(void)mach_vm_deallocate(vmap,  stackaddr, th_allocsize);
397	}
398out:
399	(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
400	(void)thread_terminate(th);
401	(void)thread_deallocate(th);
402	return(error);
403}
404
405int
406_bsdthread_terminate(__unused struct proc *p,
407		     user_addr_t stackaddr,
408		     size_t size,
409		     uint32_t kthport,
410		     uint32_t sem,
411		     __unused int32_t *retval)
412{
413	mach_vm_offset_t freeaddr;
414	mach_vm_size_t freesize;
415	kern_return_t kret;
416
417	freeaddr = (mach_vm_offset_t)stackaddr;
418	freesize = size;
419
420	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
421
422	if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
423		kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
424		if (kret != KERN_SUCCESS) {
425			PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
426			return(EINVAL);
427		}
428	}
429
430	(void) thread_terminate(current_thread());
431	if (sem != MACH_PORT_NULL) {
432		 kret = pthread_kern->semaphore_signal_internal_trap(sem);
433		if (kret != KERN_SUCCESS) {
434			PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
435			return(EINVAL);
436		}
437	}
438
439	if (kthport != MACH_PORT_NULL) {
440		pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
441	}
442
443	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
444
445	pthread_kern->thread_exception_return();
446	panic("bsdthread_terminate: still running\n");
447
448	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
449
450	return(0);
451}
452
453int
454_bsdthread_register(struct proc *p,
455		    user_addr_t threadstart,
456		    user_addr_t wqthread,
457		    int pthsize,
458		    user_addr_t pthread_init_data,
459		    user_addr_t targetconc_ptr,
460		    uint64_t dispatchqueue_offset,
461		    int32_t *retval)
462{
463	/* prevent multiple registrations */
464	if (pthread_kern->proc_get_register(p) != 0) {
465		return(EINVAL);
466	}
467	/* syscall randomizer test can pass bogus values */
468	if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
469		return(EINVAL);
470	}
471	pthread_kern->proc_set_threadstart(p, threadstart);
472	pthread_kern->proc_set_wqthread(p, wqthread);
473	pthread_kern->proc_set_pthsize(p, pthsize);
474	pthread_kern->proc_set_register(p);
475
476	/* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
477	if (pthread_init_data != 0) {
478		thread_qos_policy_data_t qos;
479
480		struct _pthread_registration_data data;
481		size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)targetconc_ptr);
482
483		kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz);
484		if (kr != KERN_SUCCESS) {
485			return EINVAL;
486		}
487
488		/* Incoming data from the data structure */
489		pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
490
491		/* Outgoing data that userspace expects as a reply */
492		if (pthread_kern->qos_main_thread_active()) {
493			mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
494			boolean_t gd = FALSE;
495
496			kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
497			if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
498				/* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
499				qos.qos_tier = THREAD_QOS_LEGACY;
500				qos.tier_importance = 0;
501
502				kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
503			}
504
505			if (kr == KERN_SUCCESS) {
506				data.main_qos = pthread_qos_class_get_priority(qos.qos_tier);
507			} else {
508				data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
509			}
510		} else {
511			data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
512		}
513
514		kr = copyout(&data, pthread_init_data, pthread_init_sz);
515		if (kr != KERN_SUCCESS) {
516			return EINVAL;
517		}
518	} else {
519		pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset);
520		pthread_kern->proc_set_targconc(p, targetconc_ptr);
521	}
522
523	/* return the supported feature set as the return value. */
524	*retval = PTHREAD_FEATURE_SUPPORTED;
525
526	return(0);
527}
528
529int
530_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
531{
532 	kern_return_t kr;
533	thread_t th;
534
535	pthread_priority_t priority;
536
537	/* Unused parameters must be zero. */
538	if (arg3 != 0) {
539		return EINVAL;
540	}
541
542	/* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
543	if (proc_is64bit(p)) {
544		uint64_t v;
545		kr = copyin(tsd_priority_addr, &v, sizeof(v));
546		if (kr != KERN_SUCCESS) {
547			return kr;
548		}
549		priority = (int)(v & 0xffffffff);
550	} else {
551		uint32_t v;
552		kr = copyin(tsd_priority_addr, &v, sizeof(v));
553		if (kr != KERN_SUCCESS) {
554			return kr;
555		}
556		priority = v;
557	}
558
559	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
560		return ESRCH;
561	}
562
563	/* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
564	if (th != current_thread()) {
565		thread_deallocate(th);
566		return EPERM;
567	}
568
569	int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
570
571	/* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
572	/* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
573
574	thread_deallocate(th);
575
576	return rv;
577}
578
579static inline struct threadlist *
580util_get_thread_threadlist_entry(thread_t th)
581{
582	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
583	if (uth) {
584		struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
585		return tl;
586	}
587	return NULL;
588}
589
590static inline void
591wq_thread_override_reset(thread_t th)
592{
593	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
594	struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
595
596	if (tl) {
597		/*
598		 * Drop all outstanding overrides on this thread, done outside the wq lock
599		 * because proc_usynch_thread_qos_remove_override takes a spinlock that
600		 * could cause us to panic.
601		 */
602		uint32_t count = tl->th_dispatch_override_count;
603		while (!OSCompareAndSwap(count, 0, &tl->th_dispatch_override_count)) {
604			count = tl->th_dispatch_override_count;
605		}
606
607		PTHREAD_TRACE(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, count, 0, 0, 0);
608
609		for (int i=count; i>0; i--) {
610			pthread_kern->proc_usynch_thread_qos_remove_override(uth, 0);
611		}
612	}
613}
614
615int
616_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
617{
618	thread_qos_policy_data_t qos;
619	mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
620	boolean_t gd = FALSE;
621
622	kern_return_t kr;
623	int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
624
625	if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
626		kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
627		if (kr != KERN_SUCCESS) {
628			qos_rv = EINVAL;
629			goto voucher;
630		}
631
632		/* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
633		if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
634			qos_rv = EPERM;
635			goto voucher;
636		}
637
638		/* Get the work queue for tracing, also the threadlist for bucket manipluation. */
639		struct workqueue *wq = NULL;
640		struct threadlist *tl = util_get_thread_threadlist_entry(current_thread());
641		if (tl) {
642			wq = tl->th_workq;
643		}
644
645		PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
646
647		qos.qos_tier = pthread_priority_get_qos_class(priority);
648		qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
649
650		kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
651		if (kr != KERN_SUCCESS) {
652			qos_rv = EINVAL;
653			goto voucher;
654		}
655
656		/* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
657		if (tl) {
658			workqueue_lock_spin(p);
659
660			/* Fix up counters. */
661			uint8_t old_bucket = tl->th_priority;
662			uint8_t new_bucket = pthread_priority_get_class_index(priority);
663
664			uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
665			OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
666
667			wq->wq_thscheduled_count[old_bucket]--;
668			wq->wq_thscheduled_count[new_bucket]++;
669
670			tl->th_priority = new_bucket;
671
672			/* If we were at the ceiling of non-overcommitted threads for a given bucket, we have to
673			 * reevaluate whether we should start more work.
674			 */
675			if (old_active == wq->wq_reqconc[old_bucket]) {
676				/* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
677				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
678			} else {
679				workqueue_unlock(p);
680			}
681		}
682
683		PTHREAD_TRACE(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
684	}
685
686voucher:
687	if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
688		kr = pthread_kern->thread_set_voucher_name(voucher);
689		if (kr != KERN_SUCCESS) {
690			voucher_rv = ENOENT;
691			goto fixedpri;
692		}
693	}
694
695fixedpri:
696	if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
697		thread_extended_policy_data_t extpol;
698		thread_t thread = current_thread();
699
700		extpol.timeshare = 0;
701
702		struct threadlist *tl = util_get_thread_threadlist_entry(thread);
703		if (tl) {
704			/* Not allowed on workqueue threads, since there is no symmetric clear function */
705			fixedpri_rv = ENOTSUP;
706			goto done;
707		}
708
709		kr = pthread_kern->thread_policy_set_internal(thread, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
710		if (kr != KERN_SUCCESS) {
711			fixedpri_rv = EINVAL;
712			goto done;
713		}
714	}
715
716done:
717	if (qos_rv && voucher_rv) {
718		/* Both failed, give that a unique error. */
719		return EBADMSG;
720	}
721
722	if (qos_rv) {
723		return qos_rv;
724	}
725
726	if (voucher_rv) {
727		return voucher_rv;
728	}
729
730	if (fixedpri_rv) {
731		return fixedpri_rv;
732	}
733
734	return 0;
735}
736
737int
738_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int __unused *retval)
739{
740	thread_t th;
741	int rv = 0;
742
743	if (arg3 != 0) {
744		return EINVAL;
745	}
746
747	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
748		return ESRCH;
749	}
750
751	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
752	int override_qos = pthread_priority_get_qos_class(priority);
753
754	struct threadlist *tl = util_get_thread_threadlist_entry(th);
755	if (tl) {
756		/* Workqueue threads count their overrides, so they can forcibly balance any outstanding
757		 * overrides when they return to the kernel.
758		 */
759		uint32_t o = OSAddAtomic(1, &tl->th_override_count);
760		PTHREAD_TRACE(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o+1, priority, 0);
761	}
762
763	/* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
764	pthread_kern->proc_usynch_thread_qos_add_override(uth, 0, override_qos, TRUE);
765
766	thread_deallocate(th);
767	return rv;
768}
769
770int
771_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t arg2, user_addr_t arg3, int __unused *retval)
772{
773	thread_t th;
774	int rv = 0;
775
776	if (arg2 != 0 || arg3 != 0) {
777		return EINVAL;
778	}
779
780	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
781		return ESRCH;
782	}
783
784	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
785
786	struct threadlist *tl = util_get_thread_threadlist_entry(th);
787	if (tl) {
788		uint32_t o = OSAddAtomic(-1, &tl->th_override_count);
789
790		PTHREAD_TRACE(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o-1, 0, 0);
791
792		if (o == 0) {
793			/* underflow! */
794			thread_deallocate(th);
795			return EFAULT;
796		}
797	}
798
799	pthread_kern->proc_usynch_thread_qos_remove_override(uth, 0);
800
801	thread_deallocate(th);
802	return rv;
803}
804
805int
806_bsdthread_ctl_qos_override_dispatch(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int __unused *retval)
807{
808	thread_t th;
809	int rv = 0;
810
811	if (arg3 != 0) {
812		return EINVAL;
813	}
814
815	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
816		return ESRCH;
817	}
818
819	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
820	int override_qos = pthread_priority_get_qos_class(priority);
821
822	struct threadlist *tl = util_get_thread_threadlist_entry(th);
823	if (!tl) {
824		thread_deallocate(th);
825		return EPERM;
826	}
827
828	/* Workqueue threads count their overrides, so they can forcibly balance any outstanding
829	 * overrides when they return to the kernel.
830	 */
831	uint32_t o = OSAddAtomic(1, &tl->th_dispatch_override_count);
832	PTHREAD_TRACE(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), o+1, priority, 0);
833
834	/* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
835	pthread_kern->proc_usynch_thread_qos_add_override(uth, 0, override_qos, TRUE);
836
837	thread_deallocate(th);
838	return rv;
839}
840
841int
842_bsdthread_ctl_qos_override_reset(struct proc __unused *p, user_addr_t __unused cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int __unused *retval)
843{
844	thread_t th;
845	struct threadlist *tl;
846	int rv = 0;
847
848	if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
849		return EINVAL;
850	}
851
852	th = current_thread();
853	tl = util_get_thread_threadlist_entry(th);
854
855	if (tl) {
856		wq_thread_override_reset(th);
857	} else {
858		rv = EPERM;
859	}
860
861	return rv;
862}
863
864int
865_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
866{
867	switch (cmd) {
868		case BSDTHREAD_CTL_SET_QOS:
869			return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
870		case BSDTHREAD_CTL_QOS_OVERRIDE_START:
871			return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
872		case BSDTHREAD_CTL_QOS_OVERRIDE_END:
873			return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
874		case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
875			return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
876		case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
877			return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
878		case BSDTHREAD_CTL_SET_SELF:
879			return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
880		default:
881			return EINVAL;
882	}
883}
884
885uint32_t wq_yielded_threshold		= WQ_YIELDED_THRESHOLD;
886uint32_t wq_yielded_window_usecs	= WQ_YIELDED_WINDOW_USECS;
887uint32_t wq_stalled_window_usecs	= WQ_STALLED_WINDOW_USECS;
888uint32_t wq_reduce_pool_window_usecs	= WQ_REDUCE_POOL_WINDOW_USECS;
889uint32_t wq_max_timer_interval_usecs	= WQ_MAX_TIMER_INTERVAL_USECS;
890uint32_t wq_max_threads			= WORKQUEUE_MAXTHREADS;
891uint32_t wq_max_constrained_threads	= WORKQUEUE_MAXTHREADS / 8;
892
893
894SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
895	   &wq_yielded_threshold, 0, "");
896
897SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
898	   &wq_yielded_window_usecs, 0, "");
899
900SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
901	   &wq_stalled_window_usecs, 0, "");
902
903SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
904	   &wq_reduce_pool_window_usecs, 0, "");
905
906SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
907	   &wq_max_timer_interval_usecs, 0, "");
908
909SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
910	   &wq_max_threads, 0, "");
911
912SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
913	   &wq_max_constrained_threads, 0, "");
914
915
916static uint32_t wq_init_constrained_limit = 1;
917
918
919void
920_workqueue_init_lock(proc_t p)
921{
922	lck_spin_init(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp, pthread_lck_attr);
923	*(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
924}
925
926void
927_workqueue_destroy_lock(proc_t p)
928{
929	lck_spin_destroy(pthread_kern->proc_get_wqlockptr(p), pthread_lck_grp);
930}
931
932
933static void
934workqueue_lock_spin(proc_t p)
935{
936	lck_spin_lock(pthread_kern->proc_get_wqlockptr(p));
937}
938
939static void
940workqueue_unlock(proc_t p)
941{
942	lck_spin_unlock(pthread_kern->proc_get_wqlockptr(p));
943}
944
945
946static void
947workqueue_interval_timer_start(struct workqueue *wq)
948{
949	uint64_t deadline;
950
951	if (wq->wq_timer_interval == 0) {
952		wq->wq_timer_interval = wq_stalled_window_usecs;
953
954	} else {
955		wq->wq_timer_interval = wq->wq_timer_interval * 2;
956
957		if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
958			wq->wq_timer_interval = wq_max_timer_interval_usecs;
959		}
960	}
961	clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
962
963	thread_call_enter_delayed(wq->wq_atimer_call, deadline);
964
965	PTHREAD_TRACE(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
966}
967
968
969static boolean_t
970wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
971{
972	clock_sec_t	secs;
973	clock_usec_t	usecs;
974	uint64_t lastblocked_ts;
975	uint64_t elapsed;
976
977	/*
978	 * the timestamp is updated atomically w/o holding the workqueue lock
979	 * so we need to do an atomic read of the 64 bits so that we don't see
980	 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
981	 * independent fashion by using OSCompareAndSwap64 to write back the
982	 * value we grabbed... if it succeeds, then we have a good timestamp to
983	 * evaluate... if it fails, we straddled grabbing the timestamp while it
984	 * was being updated... treat a failed update as a busy thread since
985	 * it implies we are about to see a really fresh timestamp anyway
986	 */
987	lastblocked_ts = *lastblocked_tsp;
988
989	if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
990		return (TRUE);
991
992	if (lastblocked_ts >= cur_ts) {
993		/*
994		 * because the update of the timestamp when a thread blocks isn't
995		 * serialized against us looking at it (i.e. we don't hold the workq lock)
996		 * it's possible to have a timestamp that matches the current time or
997		 * that even looks to be in the future relative to when we grabbed the current
998		 * time... just treat this as a busy thread since it must have just blocked.
999		 */
1000		return (TRUE);
1001	}
1002	elapsed = cur_ts - lastblocked_ts;
1003
1004	pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1005
1006	if (secs == 0 && usecs < wq_stalled_window_usecs)
1007		return (TRUE);
1008	return (FALSE);
1009}
1010
1011
1012#define WQ_TIMER_NEEDED(wq, start_timer) do {		\
1013	int oldflags = wq->wq_flags;			\
1014							\
1015	if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) {	\
1016		if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
1017			start_timer = TRUE;			\
1018	}							\
1019} while (0)
1020
1021
1022
1023static void
1024workqueue_add_timer(struct workqueue *wq, __unused int param1)
1025{
1026	proc_t		p;
1027	boolean_t	start_timer = FALSE;
1028	boolean_t	retval;
1029	boolean_t	add_thread;
1030	uint32_t	busycount;
1031
1032	PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
1033
1034	p = wq->wq_proc;
1035
1036	workqueue_lock_spin(p);
1037
1038	/*
1039	 * because workqueue_callback now runs w/o taking the workqueue lock
1040	 * we are unsynchronized w/r to a change in state of the running threads...
1041	 * to make sure we always evaluate that change, we allow it to start up
1042	 * a new timer if the current one is actively evalutating the state
1043	 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
1044	 * and we certainly do not want 2 active timers evaluating the state
1045	 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
1046	 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
1047	 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
1048	 * and set atomimcally since the callback function needs to manipulate it
1049	 * w/o holding the workq lock...
1050	 *
1051	 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY   ==   no pending timer, no active timer
1052	 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY    ==   no pending timer, 1 active timer
1053	 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY    ==   1 pending timer, no active timer
1054	 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY     ==   1 pending timer, 1 active timer
1055	 */
1056	while (wq->wq_lflags & WQL_ATIMER_BUSY) {
1057		wq->wq_lflags |= WQL_ATIMER_WAITING;
1058
1059		assert_wait((caddr_t)wq, (THREAD_UNINT));
1060		workqueue_unlock(p);
1061
1062		thread_block(THREAD_CONTINUE_NULL);
1063
1064		workqueue_lock_spin(p);
1065	}
1066	wq->wq_lflags |= WQL_ATIMER_BUSY;
1067
1068	/*
1069	 * the workq lock will protect us from seeing WQ_EXITING change state, but we
1070	 * still need to update this atomically in case someone else tries to start
1071	 * the timer just as we're releasing it
1072	 */
1073	while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags)));
1074
1075again:
1076	retval = TRUE;
1077	add_thread = FALSE;
1078
1079	if ( !(wq->wq_flags & WQ_EXITING)) {
1080		/*
1081		 * check to see if the stall frequency was beyond our tolerance
1082		 * or we have work on the queue, but haven't scheduled any
1083		 * new work within our acceptable time interval because
1084		 * there were no idle threads left to schedule
1085		 */
1086		if (wq->wq_reqcount) {
1087			uint32_t	priclass;
1088			uint32_t	thactive_count;
1089			uint32_t	i;
1090			uint64_t	curtime;
1091
1092			for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1093				if (wq->wq_requests[priclass])
1094					break;
1095			}
1096			assert(priclass < WORKQUEUE_NUM_BUCKETS);
1097
1098			curtime = mach_absolute_time();
1099			busycount = 0;
1100			thactive_count = 0;
1101
1102			/*
1103			 * check for conditions under which we would not add a thread, either
1104			 *   a) we've got as many running threads as we want in this priority
1105			 *      band and the priority bands above it
1106			 *
1107			 *   b) check to see if the priority group has blocked threads, if the
1108			 *      last blocked timestamp is old enough, we will have already passed
1109			 *      (a) where we would have stopped if we had enough active threads.
1110			 */
1111			for (i = 0; i <= priclass; i++) {
1112
1113				thactive_count += wq->wq_thactive_count[i];
1114
1115				if (wq->wq_thscheduled_count[i]) {
1116					if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
1117						busycount++;
1118				}
1119			}
1120			if (thactive_count + busycount < wq->wq_max_concurrency) {
1121
1122				if (wq->wq_thidlecount == 0) {
1123					/*
1124					 * if we have no idle threads, try to add one
1125					 */
1126					retval = workqueue_addnewthread(wq, FALSE);
1127				}
1128				add_thread = TRUE;
1129			}
1130
1131			if (wq->wq_reqcount) {
1132				/*
1133				 * as long as we have threads to schedule, and we successfully
1134				 * scheduled new work, keep trying
1135				 */
1136				while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
1137					/*
1138					 * workqueue_run_nextreq is responsible for
1139					 * dropping the workqueue lock in all cases
1140					 */
1141					retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
1142					workqueue_lock_spin(p);
1143
1144					if (retval == FALSE)
1145						break;
1146				}
1147				if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
1148
1149					if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
1150						goto again;
1151
1152					if (wq->wq_thidlecount == 0 || busycount)
1153						WQ_TIMER_NEEDED(wq, start_timer);
1154
1155					PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
1156				}
1157			}
1158		}
1159	}
1160	if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
1161		wq->wq_timer_interval = 0;
1162
1163	wq->wq_lflags &= ~WQL_ATIMER_BUSY;
1164
1165	if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
1166		/*
1167		 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
1168		 * to finish getting out of the way
1169		 */
1170		wq->wq_lflags &= ~WQL_ATIMER_WAITING;
1171		wakeup(wq);
1172	}
1173
1174	PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
1175
1176	workqueue_unlock(p);
1177
1178        if (start_timer == TRUE)
1179	        workqueue_interval_timer_start(wq);
1180}
1181
1182
1183void
1184_workqueue_thread_yielded(void)
1185{
1186	struct workqueue *wq;
1187	proc_t p;
1188
1189	p = current_proc();
1190
1191	if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0)
1192		return;
1193
1194	workqueue_lock_spin(p);
1195
1196	if (wq->wq_reqcount) {
1197		uint64_t	curtime;
1198		uint64_t	elapsed;
1199		clock_sec_t	secs;
1200		clock_usec_t	usecs;
1201
1202		if (wq->wq_thread_yielded_count++ == 0)
1203			wq->wq_thread_yielded_timestamp = mach_absolute_time();
1204
1205		if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
1206			workqueue_unlock(p);
1207			return;
1208		}
1209
1210		PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
1211
1212		wq->wq_thread_yielded_count = 0;
1213
1214		curtime = mach_absolute_time();
1215		elapsed = curtime - wq->wq_thread_yielded_timestamp;
1216		pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1217
1218		if (secs == 0 && usecs < wq_yielded_window_usecs) {
1219
1220			if (wq->wq_thidlecount == 0) {
1221				workqueue_addnewthread(wq, TRUE);
1222				/*
1223				 * 'workqueue_addnewthread' drops the workqueue lock
1224				 * when creating the new thread and then retakes it before
1225				 * returning... this window allows other threads to process
1226				 * requests, so we need to recheck for available work
1227				 * if none found, we just return...  the newly created thread
1228				 * will eventually get used (if it hasn't already)...
1229				 */
1230				if (wq->wq_reqcount == 0) {
1231					workqueue_unlock(p);
1232					return;
1233				}
1234			}
1235			if (wq->wq_thidlecount) {
1236				uint32_t	priority;
1237				boolean_t	overcommit = FALSE;
1238				boolean_t	force_oc = FALSE;
1239
1240				for (priority = 0; priority < WORKQUEUE_NUM_BUCKETS; priority++) {
1241					if (wq->wq_requests[priority]) {
1242						break;
1243					}
1244				}
1245				assert(priority < WORKQUEUE_NUM_BUCKETS);
1246
1247				wq->wq_reqcount--;
1248				wq->wq_requests[priority]--;
1249
1250				if (wq->wq_ocrequests[priority]) {
1251					wq->wq_ocrequests[priority]--;
1252					overcommit = TRUE;
1253				} else
1254					force_oc = TRUE;
1255
1256				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, pthread_priority_from_class_index(priority));
1257				/*
1258				 * workqueue_run_nextreq is responsible for
1259				 * dropping the workqueue lock in all cases
1260				 */
1261				PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
1262
1263				return;
1264			}
1265		}
1266		PTHREAD_TRACE(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
1267	}
1268	workqueue_unlock(p);
1269}
1270
1271
1272
1273static void
1274workqueue_callback(int type, thread_t thread)
1275{
1276	struct uthread    *uth;
1277	struct threadlist *tl;
1278	struct workqueue  *wq;
1279
1280	uth = pthread_kern->get_bsdthread_info(thread);
1281	tl = pthread_kern->uthread_get_threadlist(uth);
1282	wq = tl->th_workq;
1283
1284	switch (type) {
1285	case SCHED_CALL_BLOCK: {
1286		uint32_t	old_activecount;
1287		boolean_t	start_timer = FALSE;
1288
1289		old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
1290
1291		if (old_activecount == wq->wq_reqconc[tl->th_priority]) {
1292			uint64_t	curtime;
1293			UInt64		*lastblocked_ptr;
1294
1295			/*
1296			 * the number of active threads at this priority
1297			 * has fallen below the maximum number of concurrent
1298			 * threads that we're allowed to run
1299			 */
1300			lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority];
1301			curtime = mach_absolute_time();
1302
1303			/*
1304			 * if we collide with another thread trying to update the last_blocked (really unlikely
1305			 * since another thread would have to get scheduled and then block after we start down
1306			 * this path), it's not a problem.  Either timestamp is adequate, so no need to retry
1307			 */
1308
1309			OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
1310
1311			if (wq->wq_reqcount) {
1312				/*
1313				 * we have work to do so start up the timer
1314				 * if it's not running... we'll let it sort
1315				 * out whether we really need to start up
1316				 * another thread
1317				 */
1318				WQ_TIMER_NEEDED(wq, start_timer);
1319			}
1320
1321			if (start_timer == TRUE) {
1322				workqueue_interval_timer_start(wq);
1323			}
1324		}
1325		PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread));
1326		break;
1327	}
1328	case SCHED_CALL_UNBLOCK:
1329		/*
1330		 * we cannot take the workqueue_lock here...
1331		 * an UNBLOCK can occur from a timer event which
1332		 * is run from an interrupt context... if the workqueue_lock
1333		 * is already held by this processor, we'll deadlock...
1334		 * the thread lock for the thread being UNBLOCKED
1335		 * is also held
1336		 */
1337		OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]);
1338
1339		PTHREAD_TRACE1(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread));
1340
1341		break;
1342	}
1343}
1344
1345sched_call_t
1346_workqueue_get_sched_callback(void)
1347{
1348	return workqueue_callback;
1349}
1350
1351static void
1352workqueue_removethread(struct threadlist *tl, int fromexit)
1353{
1354	struct workqueue *wq;
1355	struct uthread * uth;
1356
1357	/*
1358	 * If fromexit is set, the call is from workqueue_exit(,
1359	 * so some cleanups are to be avoided.
1360	 */
1361	wq = tl->th_workq;
1362
1363	TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1364
1365	if (fromexit == 0) {
1366		wq->wq_nthreads--;
1367		wq->wq_thidlecount--;
1368	}
1369
1370	/*
1371	 * Clear the threadlist pointer in uthread so
1372	 * blocked thread on wakeup for termination will
1373	 * not access the thread list as it is going to be
1374	 * freed.
1375	 */
1376	pthread_kern->thread_sched_call(tl->th_thread, NULL);
1377
1378	uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1379	if (uth != (struct uthread *)0) {
1380		pthread_kern->uthread_set_threadlist(uth, NULL);
1381	}
1382	if (fromexit == 0) {
1383		/* during exit the lock is not held */
1384		workqueue_unlock(wq->wq_proc);
1385	}
1386
1387	if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
1388		/*
1389		 * thread was created, but never used...
1390		 * need to clean up the stack and port ourselves
1391		 * since we're not going to spin up through the
1392		 * normal exit path triggered from Libc
1393		 */
1394		if (fromexit == 0) {
1395			/* vm map is already deallocated when this is called from exit */
1396			(void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
1397		}
1398		(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
1399
1400		PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1401	} else {
1402
1403		PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
1404	}
1405	/*
1406	 * drop our ref on the thread
1407	 */
1408	thread_deallocate(tl->th_thread);
1409
1410	kfree(tl, sizeof(struct threadlist));
1411}
1412
1413
1414/*
1415 * called with workq lock held
1416 * dropped and retaken around thread creation
1417 * return with workq lock held
1418 */
1419static boolean_t
1420workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
1421{
1422	struct threadlist *tl;
1423	struct uthread	*uth;
1424	kern_return_t	kret;
1425	thread_t	th;
1426	proc_t		p;
1427	void 	 	*sright;
1428	mach_vm_offset_t stackaddr;
1429	mach_vm_size_t guardsize;
1430
1431	if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
1432		return (FALSE);
1433
1434	if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (pthread_kern->config_thread_max - 20)) {
1435		wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1436		return (FALSE);
1437	}
1438	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1439
1440	if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1441		/*
1442		 * if we're not creating this thread to service an overcommit request,
1443		 * then check the size of the constrained thread pool...  if we've already
1444		 * reached our max for threads scheduled from this pool, don't create a new
1445		 * one... the callers of this function are prepared for failure.
1446		 */
1447		wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1448		return (FALSE);
1449	}
1450	if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
1451		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1452
1453	wq->wq_nthreads++;
1454
1455	p = wq->wq_proc;
1456	workqueue_unlock(p);
1457
1458	kret = pthread_kern->thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
1459 	if (kret != KERN_SUCCESS) {
1460		goto failed;
1461	}
1462
1463	tl = kalloc(sizeof(struct threadlist));
1464	bzero(tl, sizeof(struct threadlist));
1465
1466#if defined(__i386__) || defined(__x86_64__)
1467	stackaddr = 0xB0000000;
1468#else
1469#error Need to define a stack address hint for this architecture
1470#endif
1471
1472	guardsize = vm_map_page_size(wq->wq_map);
1473	tl->th_allocsize = PTH_DEFAULT_STACKSIZE + guardsize + pthread_kern->proc_get_pthsize(p);
1474
1475	kret = mach_vm_map(wq->wq_map, &stackaddr,
1476    			tl->th_allocsize,
1477    			page_size-1,
1478    			VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1479    			0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1480    			VM_INHERIT_DEFAULT);
1481
1482	if (kret != KERN_SUCCESS) {
1483	        kret = mach_vm_allocate(wq->wq_map,
1484    					&stackaddr, tl->th_allocsize,
1485    					VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1486	}
1487	if (kret == KERN_SUCCESS) {
1488	        /*
1489		 * The guard page is at the lowest address
1490		 * The stack base is the highest address
1491		 */
1492	        kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
1493
1494		if (kret != KERN_SUCCESS)
1495		        (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1496	}
1497	if (kret != KERN_SUCCESS) {
1498		(void) thread_terminate(th);
1499		thread_deallocate(th);
1500
1501		kfree(tl, sizeof(struct threadlist));
1502		goto failed;
1503	}
1504	thread_reference(th);
1505
1506	sright = (void *)pthread_kern->convert_thread_to_port(th);
1507	tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task));
1508
1509	pthread_kern->thread_static_param(th, TRUE);
1510
1511	tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1512
1513	tl->th_thread = th;
1514	tl->th_workq = wq;
1515	tl->th_stackaddr = stackaddr;
1516	tl->th_priority = WORKQUEUE_NUM_BUCKETS;
1517	tl->th_policy = -1;
1518
1519	uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1520
1521	workqueue_lock_spin(p);
1522
1523	pthread_kern->uthread_set_threadlist(uth, tl);
1524	TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
1525
1526	wq->wq_thidlecount++;
1527
1528	PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread));
1529
1530	return (TRUE);
1531
1532failed:
1533	workqueue_lock_spin(p);
1534	wq->wq_nthreads--;
1535
1536	return (FALSE);
1537}
1538
1539
1540int
1541_workq_open(struct proc *p, __unused int32_t *retval)
1542{
1543	struct workqueue * wq;
1544	int wq_size;
1545	char * ptr;
1546	uint32_t i;
1547	uint32_t num_cpus;
1548	int error = 0;
1549	boolean_t need_wakeup = FALSE;
1550
1551	if (pthread_kern->proc_get_register(p) == 0) {
1552		return EINVAL;
1553	}
1554
1555	num_cpus = pthread_kern->ml_get_max_cpus();
1556
1557	if (wq_init_constrained_limit) {
1558		uint32_t limit;
1559		/*
1560		 * set up the limit for the constrained pool
1561		 * this is a virtual pool in that we don't
1562		 * maintain it on a separate idle and run list
1563		 */
1564		limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
1565
1566		if (limit > wq_max_constrained_threads)
1567			wq_max_constrained_threads = limit;
1568
1569		wq_init_constrained_limit = 0;
1570	}
1571	workqueue_lock_spin(p);
1572
1573	if (pthread_kern->proc_get_wqptr(p) == NULL) {
1574
1575		while (*pthread_kern->proc_get_wqinitingptr(p) == TRUE) {
1576
1577			assert_wait((caddr_t)pthread_kern->proc_get_wqinitingptr(p), THREAD_UNINT);
1578			workqueue_unlock(p);
1579
1580			thread_block(THREAD_CONTINUE_NULL);
1581
1582			workqueue_lock_spin(p);
1583		}
1584		if (pthread_kern->proc_get_wqptr(p) != NULL) {
1585			goto out;
1586		}
1587
1588		*(pthread_kern->proc_get_wqinitingptr(p)) = TRUE;
1589
1590		workqueue_unlock(p);
1591
1592		wq_size = sizeof(struct workqueue);
1593
1594		ptr = (char *)kalloc(wq_size);
1595		bzero(ptr, wq_size);
1596
1597		wq = (struct workqueue *)ptr;
1598		wq->wq_flags = WQ_LIST_INITED;
1599		wq->wq_proc = p;
1600		wq->wq_max_concurrency = num_cpus;
1601		wq->wq_task = current_task();
1602		wq->wq_map  = pthread_kern->current_map();
1603
1604		for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
1605			wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
1606
1607		TAILQ_INIT(&wq->wq_thrunlist);
1608		TAILQ_INIT(&wq->wq_thidlelist);
1609
1610		wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
1611
1612		workqueue_lock_spin(p);
1613
1614		pthread_kern->proc_set_wqptr(p, wq);
1615		pthread_kern->proc_set_wqsize(p, wq_size);
1616
1617		*(pthread_kern->proc_get_wqinitingptr(p)) = FALSE;
1618		need_wakeup = TRUE;
1619	}
1620out:
1621	workqueue_unlock(p);
1622
1623	if (need_wakeup == TRUE) {
1624		wakeup(pthread_kern->proc_get_wqinitingptr(p));
1625	}
1626	return(error);
1627}
1628
1629
1630int
1631_workq_kernreturn(struct proc *p,
1632		  int options,
1633		  __unused user_addr_t item,
1634		  int arg2,
1635		  int arg3,
1636		  __unused int32_t *retval)
1637{
1638	struct workqueue *wq;
1639	int error	= 0;
1640
1641	if (pthread_kern->proc_get_register(p) == 0) {
1642		return EINVAL;
1643	}
1644
1645	switch (options) {
1646	case WQOPS_QUEUE_NEWSPISUPP: {
1647		/*
1648		 * arg2 = offset of serialno into dispatch queue
1649		 */
1650		int offset = arg2;
1651
1652		pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
1653		break;
1654	}
1655	case WQOPS_QUEUE_REQTHREADS: {
1656		/*
1657		 * arg2 = number of threads to start
1658		 * arg3 = priority
1659		 */
1660		boolean_t overcommit = FALSE;
1661		int reqcount	     = arg2;
1662		pthread_priority_t priority = arg3;
1663		int class;
1664
1665		overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
1666		class = pthread_priority_get_class_index(priority);
1667
1668		if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS)) {
1669			error = EINVAL;
1670			break;
1671		}
1672
1673		workqueue_lock_spin(p);
1674
1675		if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
1676			workqueue_unlock(p);
1677
1678			error = EINVAL;
1679			break;
1680		}
1681
1682		if (!overcommit) {
1683			wq->wq_reqcount += reqcount;
1684			wq->wq_requests[class] += reqcount;
1685
1686			PTHREAD_TRACE(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1687
1688			while (wq->wq_reqcount) {
1689				if (!workqueue_run_one(p, wq, overcommit, priority))
1690					break;
1691			}
1692		} else {
1693			PTHREAD_TRACE(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1694
1695			while (reqcount) {
1696				if (!workqueue_run_one(p, wq, overcommit, priority))
1697					break;
1698				reqcount--;
1699			}
1700			if (reqcount) {
1701				/*
1702				 * we need to delay starting some of the overcommit requests...
1703				 * we should only fail to create the overcommit threads if
1704				 * we're at the max thread limit... as existing threads
1705				 * return to the kernel, we'll notice the ocrequests
1706				 * and spin them back to user space as the overcommit variety
1707				 */
1708				wq->wq_reqcount += reqcount;
1709				wq->wq_requests[class] += reqcount;
1710				wq->wq_ocrequests[class] += reqcount;
1711
1712				PTHREAD_TRACE(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
1713			}
1714		}
1715		workqueue_unlock(p);
1716		break;
1717	}
1718
1719	case WQOPS_THREAD_RETURN: {
1720		thread_t th = current_thread();
1721		struct uthread *uth = pthread_kern->get_bsdthread_info(th);
1722		struct threadlist *tl = util_get_thread_threadlist_entry(th);
1723
1724		/* reset signal mask on the workqueue thread to default state */
1725		if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
1726			pthread_kern->proc_lock(p);
1727			pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
1728			pthread_kern->proc_unlock(p);
1729		}
1730
1731		/* dropping WQ override counts has to be done outside the wq lock. */
1732		wq_thread_override_reset(th);
1733
1734		workqueue_lock_spin(p);
1735
1736		if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL || !tl) {
1737			workqueue_unlock(p);
1738
1739			error = EINVAL;
1740			break;
1741		}
1742		PTHREAD_TRACE(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
1743
1744
1745		(void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0);
1746		/*
1747		 * workqueue_run_nextreq is responsible for
1748		 * dropping the workqueue lock in all cases
1749		 */
1750		break;
1751	}
1752
1753	default:
1754		error = EINVAL;
1755		break;
1756	}
1757	return (error);
1758}
1759
1760/*
1761 * Routine:	workqueue_mark_exiting
1762 *
1763 * Function:	Mark the work queue such that new threads will not be added to the
1764 *		work queue after we return.
1765 *
1766 * Conditions:	Called against the current process.
1767 */
1768void
1769_workqueue_mark_exiting(struct proc *p)
1770{
1771	struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
1772
1773	if (wq != NULL) {
1774
1775		PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1776
1777		workqueue_lock_spin(p);
1778
1779		/*
1780		 * we now arm the timer in the callback function w/o holding the workq lock...
1781		 * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1782		 * insure only a single timer if running and to notice that WQ_EXITING has
1783		 * been set (we don't want to start a timer once WQ_EXITING is posted)
1784		 *
1785		 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1786		 * therefor no need to clear the timer state atomically from the flags
1787		 *
1788		 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1789		 * the check for and sleep until clear is protected
1790		 */
1791		while (!(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags)));
1792
1793		if (wq->wq_flags & WQ_ATIMER_RUNNING) {
1794			if (thread_call_cancel(wq->wq_atimer_call) == TRUE) {
1795				wq->wq_flags &= ~WQ_ATIMER_RUNNING;
1796			}
1797		}
1798		while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) {
1799			assert_wait((caddr_t)wq, (THREAD_UNINT));
1800			workqueue_unlock(p);
1801
1802			thread_block(THREAD_CONTINUE_NULL);
1803
1804			workqueue_lock_spin(p);
1805		}
1806		workqueue_unlock(p);
1807
1808		PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1809	}
1810}
1811
1812/*
1813 * Routine:	workqueue_exit
1814 *
1815 * Function:	clean up the work queue structure(s) now that there are no threads
1816 *		left running inside the work queue (except possibly current_thread).
1817 *
1818 * Conditions:	Called by the last thread in the process.
1819 *		Called against current process.
1820 */
1821void
1822_workqueue_exit(struct proc *p)
1823{
1824	struct workqueue  * wq;
1825	struct threadlist  * tl, *tlist;
1826	struct uthread	*uth;
1827	int wq_size = 0;
1828
1829	wq = pthread_kern->proc_get_wqptr(p);
1830	if (wq != NULL) {
1831
1832		PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1833
1834		wq_size = pthread_kern->proc_get_wqsize(p);
1835		pthread_kern->proc_set_wqptr(p, NULL);
1836		pthread_kern->proc_set_wqsize(p, 0);
1837
1838		/*
1839		 * Clean up workqueue data structures for threads that exited and
1840		 * didn't get a chance to clean up after themselves.
1841		 */
1842		TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1843			pthread_kern->thread_sched_call(tl->th_thread, NULL);
1844
1845			uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1846			if (uth != (struct uthread *)0) {
1847				pthread_kern->uthread_set_threadlist(uth, NULL);
1848			}
1849			TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1850
1851			/*
1852			 * drop our last ref on the thread
1853			 */
1854			thread_deallocate(tl->th_thread);
1855
1856			kfree(tl, sizeof(struct threadlist));
1857		}
1858		TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
1859			workqueue_removethread(tl, 1);
1860		}
1861		thread_call_free(wq->wq_atimer_call);
1862
1863		kfree(wq, wq_size);
1864
1865		PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1866	}
1867}
1868
1869
1870static boolean_t
1871workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
1872{
1873	boolean_t	ran_one;
1874
1875	if (wq->wq_thidlecount == 0) {
1876		if (overcommit == FALSE) {
1877			if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
1878				workqueue_addnewthread(wq, overcommit);
1879		} else {
1880			workqueue_addnewthread(wq, overcommit);
1881
1882			if (wq->wq_thidlecount == 0)
1883				return (FALSE);
1884		}
1885	}
1886	ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority);
1887	/*
1888	 * workqueue_run_nextreq is responsible for
1889	 * dropping the workqueue lock in all cases
1890	 */
1891	workqueue_lock_spin(p);
1892
1893	return (ran_one);
1894}
1895
1896
1897
1898/*
1899 * workqueue_run_nextreq:
1900 *   called with the workqueue lock held...
1901 *   responsible for dropping it in all cases
1902 */
1903static boolean_t
1904workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
1905		      boolean_t force_oc, boolean_t overcommit, pthread_priority_t oc_prio)
1906{
1907	thread_t th_to_run = THREAD_NULL;
1908	thread_t th_to_park = THREAD_NULL;
1909	int wake_thread = 0;
1910	int reuse_thread = WQ_FLAG_THREAD_REUSE;
1911	uint32_t priclass, orig_class;
1912	uint32_t us_to_wait;
1913	struct threadlist *tl = NULL;
1914	struct uthread *uth = NULL;
1915	boolean_t start_timer = FALSE;
1916	boolean_t adjust_counters = TRUE;
1917	uint64_t	curtime;
1918	uint32_t	thactive_count;
1919	uint32_t	busycount;
1920
1921	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
1922
1923	if (thread != THREAD_NULL) {
1924		uth = pthread_kern->get_bsdthread_info(thread);
1925
1926		if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
1927			panic("wq thread with no threadlist");
1928		}
1929	}
1930
1931	/*
1932	 * from here until we drop the workq lock
1933	 * we can't be pre-empted since we hold
1934	 * the lock in spin mode... this is important
1935	 * since we have to independently update the priority that
1936	 * the thread is associated with and the priorty based
1937	 * counters that "workqueue_callback" also changes and bases
1938	 * decisons on.
1939	 */
1940dispatch_overcommit:
1941
1942	if (overcommit || force_oc) {
1943		priclass = pthread_priority_get_class_index(oc_prio);
1944
1945		if (thread != THREAD_NULL) {
1946			th_to_run = thread;
1947			goto pick_up_work;
1948		}
1949		goto grab_idle_thread;
1950	}
1951	if (wq->wq_reqcount) {
1952		for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1953			if (wq->wq_requests[priclass])
1954				break;
1955		}
1956		assert(priclass < WORKQUEUE_NUM_BUCKETS);
1957
1958		if (wq->wq_ocrequests[priclass] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
1959			/*
1960			 * handle delayed overcommit request...
1961			 * they have priority over normal requests
1962			 * within a given priority level
1963			 */
1964			wq->wq_reqcount--;
1965			wq->wq_requests[priclass]--;
1966			wq->wq_ocrequests[priclass]--;
1967
1968			oc_prio = pthread_priority_from_class_index(priclass);
1969			overcommit = TRUE;
1970
1971			goto dispatch_overcommit;
1972		}
1973	}
1974	/*
1975	 * if we get here, the work should be handled by a constrained thread
1976	 */
1977	if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1978		/*
1979		 * no work to do, or we're already at or over the scheduling limit for
1980		 * constrained threads...  just return or park the thread...
1981		 * do not start the timer for this condition... if we don't have any work,
1982		 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1983		 * constrained threads to return to the kernel before we can dispatch additional work
1984		 */
1985	        if ((th_to_park = thread) == THREAD_NULL)
1986		        goto out_of_work;
1987		goto parkit;
1988	}
1989
1990	thactive_count = 0;
1991	busycount = 0;
1992
1993	curtime = mach_absolute_time();
1994
1995	thactive_count += wq->wq_thactive_count[priclass];
1996
1997	if (wq->wq_thscheduled_count[priclass]) {
1998		if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[priclass])) {
1999			busycount++;
2000		}
2001	}
2002
2003	if (thread != THREAD_NULL) {
2004		if (tl->th_priority == priclass) {
2005			/*
2006			 * dont't count this thread as currently active
2007			 */
2008			thactive_count--;
2009		}
2010	}
2011	if (thactive_count + busycount >= wq->wq_max_concurrency) {
2012		if (busycount) {
2013				/*
2014				 * we found at least 1 thread in the
2015				 * 'busy' state... make sure we start
2016				 * the timer because if they are the only
2017				 * threads keeping us from scheduling
2018				 * this work request, we won't get a callback
2019				 * to kick off the timer... we need to
2020				 * start it now...
2021				 */
2022				WQ_TIMER_NEEDED(wq, start_timer);
2023		}
2024
2025		PTHREAD_TRACE(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, (start_timer ? 1<<7 : 0) | pthread_priority_from_class_index(priclass), thactive_count, busycount, 0);
2026
2027		if ((th_to_park = thread) == THREAD_NULL) {
2028			goto out_of_work;
2029		}
2030
2031		goto parkit;
2032	}
2033
2034	if (thread != THREAD_NULL) {
2035		/*
2036		 * thread is non-NULL here when we return from userspace
2037		 * in workq_kernreturn, rather than trying to find a thread
2038		 * we pick up new work for this specific thread.
2039		 */
2040		th_to_run = thread;
2041		goto pick_up_work;
2042	}
2043
2044grab_idle_thread:
2045	if (wq->wq_thidlecount == 0) {
2046		/*
2047		 * we have no additional threads waiting to pick up
2048		 * work, however, there is additional work to do.
2049		 */
2050		WQ_TIMER_NEEDED(wq, start_timer);
2051
2052		PTHREAD_TRACE(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
2053
2054		goto no_thread_to_run;
2055	}
2056
2057	/*
2058	 * we already know there is both work available
2059	 * and an idle thread, so activate a thread and then
2060	 * fall into the code that pulls a new work request...
2061	 */
2062	tl = TAILQ_FIRST(&wq->wq_thidlelist);
2063	TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2064	wq->wq_thidlecount--;
2065
2066	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
2067
2068	if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
2069		tl->th_flags &= ~TH_LIST_SUSPENDED;
2070		reuse_thread = 0;
2071
2072	} else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
2073		tl->th_flags &= ~TH_LIST_BLOCKED;
2074		wake_thread = 1;
2075	}
2076	tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
2077
2078	wq->wq_threads_scheduled++;
2079	wq->wq_thscheduled_count[priclass]++;
2080	OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2081
2082	adjust_counters = FALSE;
2083	th_to_run = tl->th_thread;
2084
2085pick_up_work:
2086	if (!overcommit && !force_oc) {
2087		wq->wq_reqcount--;
2088		wq->wq_requests[priclass]--;
2089
2090		if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
2091			wq->wq_constrained_threads_scheduled++;
2092			tl->th_flags |= TH_LIST_CONSTRAINED;
2093		}
2094	} else {
2095		if (tl->th_flags & TH_LIST_CONSTRAINED) {
2096			wq->wq_constrained_threads_scheduled--;
2097			tl->th_flags &= ~TH_LIST_CONSTRAINED;
2098		}
2099	}
2100
2101	orig_class = tl->th_priority;
2102	tl->th_priority = (uint8_t)priclass;
2103
2104	if (adjust_counters && (orig_class != priclass)) {
2105		/*
2106		 * we need to adjust these counters based on this
2107		 * thread's new disposition w/r to priority
2108		 */
2109		OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]);
2110		OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2111
2112		wq->wq_thscheduled_count[orig_class]--;
2113		wq->wq_thscheduled_count[priclass]++;
2114	}
2115	wq->wq_thread_yielded_count = 0;
2116
2117	workqueue_unlock(p);
2118
2119	if (orig_class != priclass) {
2120		pthread_priority_t pri = pthread_priority_from_class_index(priclass);
2121
2122		thread_qos_policy_data_t qosinfo;
2123
2124		/* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
2125		qosinfo.qos_tier = pthread_priority_get_qos_class(pri);
2126		qosinfo.tier_importance = 0;
2127
2128		PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(orig_class), 0, 0);
2129
2130		/* All the previous implementation here now boils down to setting the QoS policy on the thread. */
2131		pthread_kern->thread_policy_set_internal(th_to_run, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
2132
2133		PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(priclass), qosinfo.qos_tier, 0);
2134	}
2135
2136	/*
2137	 * if current thread is reused for work request, does not return via unix_syscall
2138	 */
2139	wq_runreq(p, overcommit, pthread_priority_from_class_index(priclass), th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
2140
2141	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
2142
2143	return (TRUE);
2144
2145out_of_work:
2146	/*
2147	 * we have no work to do or we are fully booked
2148	 * w/r to running threads...
2149	 */
2150no_thread_to_run:
2151	workqueue_unlock(p);
2152
2153	if (start_timer)
2154		workqueue_interval_timer_start(wq);
2155
2156	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(thread), start_timer, 2, 0);
2157
2158	return (FALSE);
2159
2160parkit:
2161	/*
2162	 * this is a workqueue thread with no more
2163	 * work to do... park it for now
2164	 */
2165	TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
2166	tl->th_flags &= ~TH_LIST_RUNNING;
2167
2168	tl->th_flags |= TH_LIST_BLOCKED;
2169	TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
2170
2171	pthread_kern->thread_sched_call(th_to_park, NULL);
2172
2173	OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
2174	wq->wq_thscheduled_count[tl->th_priority]--;
2175	wq->wq_threads_scheduled--;
2176
2177	if (tl->th_flags & TH_LIST_CONSTRAINED) {
2178		wq->wq_constrained_threads_scheduled--;
2179		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2180		tl->th_flags &= ~TH_LIST_CONSTRAINED;
2181	}
2182	if (wq->wq_thidlecount < 100)
2183		us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
2184	else
2185		us_to_wait = wq_reduce_pool_window_usecs / 100;
2186
2187	wq->wq_thidlecount++;
2188	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
2189
2190	assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
2191			TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
2192			wq_reduce_pool_window_usecs, NSEC_PER_USEC);
2193
2194	workqueue_unlock(p);
2195
2196	if (start_timer)
2197		workqueue_interval_timer_start(wq);
2198
2199	PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
2200	PTHREAD_TRACE(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
2201
2202	thread_block((thread_continue_t)wq_unpark_continue);
2203	/* NOT REACHED */
2204
2205	return (FALSE);
2206}
2207
2208
2209static void
2210wq_unsuspend_continue(void)
2211{
2212	struct uthread *uth = NULL;
2213	thread_t th_to_unsuspend;
2214	struct threadlist *tl;
2215	proc_t	p;
2216
2217	th_to_unsuspend = current_thread();
2218	uth = pthread_kern->get_bsdthread_info(th_to_unsuspend);
2219
2220	if (uth != NULL && (tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2221
2222		if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2223			/*
2224			 * most likely a normal resume of this thread occurred...
2225			 * it's also possible that the thread was aborted after we
2226			 * finished setting it up so that it could be dispatched... if
2227			 * so, thread_bootstrap_return will notice the abort and put
2228			 * the thread on the path to self-destruction
2229			 */
2230normal_resume_to_user:
2231			pthread_kern->thread_sched_call(th_to_unsuspend, workqueue_callback);
2232			pthread_kern->thread_bootstrap_return();
2233		}
2234		/*
2235		 * if we get here, it's because we've been resumed due to
2236		 * an abort of this thread (process is crashing)
2237		 */
2238		p = current_proc();
2239
2240		workqueue_lock_spin(p);
2241
2242		if (tl->th_flags & TH_LIST_SUSPENDED) {
2243			/*
2244			 * thread has been aborted while still on our idle
2245			 * queue... remove it from our domain...
2246			 * workqueue_removethread consumes the lock
2247			 */
2248			workqueue_removethread(tl, 0);
2249			pthread_kern->thread_bootstrap_return();
2250		}
2251		while ((tl->th_flags & TH_LIST_BUSY)) {
2252			/*
2253			 * this thread was aborted after we started making
2254			 * it runnable, but before we finished dispatching it...
2255			 * we need to wait for that process to finish,
2256			 * and we need to ask for a wakeup instead of a
2257			 * thread_resume since the abort has already resumed us
2258			 */
2259			tl->th_flags |= TH_LIST_NEED_WAKEUP;
2260
2261			assert_wait((caddr_t)tl, (THREAD_UNINT));
2262
2263			workqueue_unlock(p);
2264			thread_block(THREAD_CONTINUE_NULL);
2265			workqueue_lock_spin(p);
2266		}
2267		workqueue_unlock(p);
2268		/*
2269		 * we have finished setting up the thread's context...
2270		 * thread_bootstrap_return will take us through the abort path
2271		 * where the thread will self destruct
2272		 */
2273		goto normal_resume_to_user;
2274	}
2275	pthread_kern->thread_bootstrap_return();
2276}
2277
2278
2279static void
2280wq_unpark_continue(void)
2281{
2282	struct uthread *uth = NULL;
2283	struct threadlist *tl;
2284	thread_t th_to_unpark;
2285	proc_t 	p;
2286
2287	th_to_unpark = current_thread();
2288	uth = pthread_kern->get_bsdthread_info(th_to_unpark);
2289
2290	if (uth != NULL) {
2291		if ((tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
2292
2293			if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2294				/*
2295				 * a normal wakeup of this thread occurred... no need
2296				 * for any synchronization with the timer and wq_runreq
2297				 */
2298normal_return_to_user:
2299				pthread_kern->thread_sched_call(th_to_unpark, workqueue_callback);
2300
2301				PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
2302
2303				pthread_kern->thread_exception_return();
2304			}
2305			p = current_proc();
2306
2307			workqueue_lock_spin(p);
2308
2309			if ( !(tl->th_flags & TH_LIST_RUNNING)) {
2310				/*
2311				 * the timer popped us out and we've not
2312				 * been moved off of the idle list
2313				 * so we should now self-destruct
2314				 *
2315				 * workqueue_removethread consumes the lock
2316				 */
2317				workqueue_removethread(tl, 0);
2318				pthread_kern->thread_exception_return();
2319			}
2320			/*
2321			 * the timer woke us up, but we have already
2322			 * started to make this a runnable thread,
2323			 * but have not yet finished that process...
2324			 * so wait for the normal wakeup
2325			 */
2326			while ((tl->th_flags & TH_LIST_BUSY)) {
2327
2328				assert_wait((caddr_t)tl, (THREAD_UNINT));
2329
2330				workqueue_unlock(p);
2331
2332				thread_block(THREAD_CONTINUE_NULL);
2333
2334				workqueue_lock_spin(p);
2335			}
2336			/*
2337			 * we have finished setting up the thread's context
2338			 * now we can return as if we got a normal wakeup
2339			 */
2340			workqueue_unlock(p);
2341
2342			goto normal_return_to_user;
2343		}
2344	}
2345	pthread_kern->thread_exception_return();
2346}
2347
2348
2349
2350static void
2351wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
2352	   int reuse_thread, int wake_thread, int return_directly)
2353{
2354	int ret = 0;
2355	boolean_t need_resume = FALSE;
2356
2357	PTHREAD_TRACE1(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, overcommit, priority, thread_tid(current_thread()), thread_tid(th));
2358
2359	ret = _setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
2360
2361	if (ret != 0)
2362		panic("setup_wqthread failed  %x\n", ret);
2363
2364	if (return_directly) {
2365		PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
2366
2367		pthread_kern->thread_exception_return();
2368		panic("wq_runreq: thread_exception_return returned ...\n");
2369	}
2370	if (wake_thread) {
2371		workqueue_lock_spin(p);
2372
2373		tl->th_flags &= ~TH_LIST_BUSY;
2374		wakeup(tl);
2375
2376		workqueue_unlock(p);
2377	} else {
2378	        PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
2379
2380		workqueue_lock_spin(p);
2381
2382		if (tl->th_flags & TH_LIST_NEED_WAKEUP) {
2383			wakeup(tl);
2384		} else {
2385			need_resume = TRUE;
2386		}
2387
2388		tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
2389
2390		workqueue_unlock(p);
2391
2392		if (need_resume) {
2393			/*
2394			 * need to do this outside of the workqueue spin lock
2395			 * since thread_resume locks the thread via a full mutex
2396			 */
2397			pthread_kern->thread_resume(th);
2398		}
2399	}
2400}
2401
2402
2403int
2404_setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl)
2405{
2406	uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
2407	mach_vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
2408	int error = 0;
2409
2410	if (overcommit) {
2411		flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2412	}
2413
2414	/* Put the QoS class value into the lower bits of the reuse_thread register, this is where
2415	 * the thread priority used to be stored anyway.
2416	 */
2417	flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
2418
2419#if defined(__i386__) || defined(__x86_64__)
2420	int isLP64 = proc_is64bit(p);
2421
2422	/*
2423	 * Set up i386 registers & function call.
2424	 */
2425	if (isLP64 == 0) {
2426		x86_thread_state32_t state;
2427		x86_thread_state32_t *ts = &state;
2428
2429		ts->eip = (unsigned int)pthread_kern->proc_get_wqthread(p);
2430		ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2431		ts->ebx = (unsigned int)tl->th_thport;
2432		ts->ecx = (unsigned int)(tl->th_stackaddr + guardsize);
2433		ts->edx = (unsigned int)0;
2434		ts->edi = (unsigned int)flags;
2435		ts->esi = (unsigned int)0;
2436		/*
2437		 * set stack pointer
2438		 */
2439		ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_32_STK_ALIGN));
2440
2441		(void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
2442
2443	} else {
2444		x86_thread_state64_t state64;
2445		x86_thread_state64_t *ts64 = &state64;
2446
2447		ts64->rip = (uint64_t)pthread_kern->proc_get_wqthread(p);
2448		ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
2449		ts64->rsi = (uint64_t)(tl->th_thport);
2450		ts64->rdx = (uint64_t)(tl->th_stackaddr + guardsize);
2451		ts64->rcx = (uint64_t)0;
2452		ts64->r8 = (uint64_t)flags;
2453		ts64->r9 = (uint64_t)0;
2454
2455		/*
2456		 * set stack pointer aligned to 16 byte boundary
2457		 */
2458		ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_64_REDZONE_LEN);
2459
2460		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
2461		if (error != KERN_SUCCESS) {
2462			error = EINVAL;
2463		}
2464	}
2465#else
2466#error setup_wqthread  not defined for this architecture
2467#endif
2468
2469	return error;
2470}
2471
2472int
2473_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
2474{
2475	struct workqueue * wq;
2476	int error = 0;
2477	int	activecount;
2478	uint32_t pri;
2479
2480	workqueue_lock_spin(p);
2481	if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
2482		error = EINVAL;
2483		goto out;
2484	}
2485	activecount = 0;
2486
2487	for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) {
2488		activecount += wq->wq_thactive_count[pri];
2489	}
2490	pwqinfo->pwq_nthreads = wq->wq_nthreads;
2491	pwqinfo->pwq_runthreads = activecount;
2492	pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
2493	pwqinfo->pwq_state = 0;
2494
2495	if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT) {
2496		pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2497	}
2498
2499	if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT) {
2500		pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
2501	}
2502
2503out:
2504	workqueue_unlock(p);
2505	return(error);
2506}
2507
2508int
2509_thread_selfid(__unused struct proc *p, uint64_t *retval)
2510{
2511	thread_t thread = current_thread();
2512	*retval = thread_tid(thread);
2513	return KERN_SUCCESS;
2514}
2515
2516void
2517_pthread_init(void)
2518{
2519	pthread_lck_grp_attr = lck_grp_attr_alloc_init();
2520	pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
2521
2522	/*
2523	 * allocate the lock attribute for pthread synchronizers
2524	 */
2525	pthread_lck_attr = lck_attr_alloc_init();
2526
2527	_workqueue_init_lock((proc_t)get_bsdtask_info(kernel_task));
2528	pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
2529
2530	pth_global_hashinit();
2531	psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
2532	psynch_zoneinit();
2533
2534	/*
2535	 * register sysctls
2536	 */
2537	sysctl_register_oid(&sysctl__kern_wq_yielded_threshold);
2538	sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs);
2539	sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
2540	sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
2541	sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
2542	sysctl_register_oid(&sysctl__kern_wq_max_threads);
2543	sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
2544	sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
2545}
2546