1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/types.h>
30#include <sys/proc.h>
31#include <sys/proc_internal.h>
32#include <sys/systm.h>
33#include <sys/user.h>
34#include <sys/dtrace_ptss.h>
35
36#include <mach/vm_param.h>
37#include <mach/mach_vm.h>
38
39#include <kern/task.h>
40
41#include <vm/vm_map.h>
42
43/*
44 * This function requires the sprlock to be held
45 *
46 * In general, it will not block. If it needs to allocate a new
47 * page of memory, the underlying kernel _MALLOC may block.
48 */
49struct dtrace_ptss_page_entry*
50dtrace_ptss_claim_entry_locked(struct proc* p) {
51	lck_mtx_assert(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
52
53	struct dtrace_ptss_page_entry* entry = NULL;
54
55	while (TRUE) {
56		struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
57
58		if (temp == NULL) {
59			// Nothing on the free list. Allocate a new page, its okay if multiple threads race here.
60			struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p);
61
62			// Make sure we actually got a page
63			if (page == NULL)
64				return NULL;
65
66			// Add the page to the page list
67			page->next = p->p_dtrace_ptss_pages;
68			p->p_dtrace_ptss_pages = page;
69
70			// CAS the entries onto the free list.
71			do {
72				page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next = p->p_dtrace_ptss_free_list;
73			} while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next,
74						   (void *)&page->entries[0],
75						   (void * volatile *)&p->p_dtrace_ptss_free_list));
76
77			// Now that we've added to the free list, try again.
78			continue;
79		}
80
81		// Claim temp
82		if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list))
83			continue;
84
85		// At this point, we own temp.
86		entry = temp;
87
88		break;
89	}
90
91	return entry;
92}
93
94/*
95 * This function does not require any locks to be held on entry.
96 */
97struct dtrace_ptss_page_entry*
98dtrace_ptss_claim_entry(struct proc* p) {
99	// Verify no locks held on entry
100	lck_mtx_assert(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
101	lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
102
103	struct dtrace_ptss_page_entry* entry = NULL;
104
105	while (TRUE) {
106		struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
107
108		if (temp == NULL) {
109			lck_mtx_lock(&p->p_dtrace_sprlock);
110			temp = dtrace_ptss_claim_entry_locked(p);
111			lck_mtx_unlock(&p->p_dtrace_sprlock);
112			return temp;
113		}
114
115		// Claim temp
116		if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list))
117			continue;
118
119		// At this point, we own temp.
120		entry = temp;
121
122		break;
123	}
124
125	return entry;
126}
127
128/*
129 * This function does not require any locks to be held on entry.
130 *
131 * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can
132 * no longer be referenced safely. When found in this state, the chore
133 * of releasing an entry to the free list is ignored.
134 */
135void
136dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e) {
137	if (p && p->p_dtrace_ptss_pages && e) {
138		do {
139			e->next = p->p_dtrace_ptss_free_list;
140		} while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list));
141	}
142}
143
144/*
145 * This function allocates a new page in the target process's address space.
146 *
147 * It returns a dtrace_ptss_page that has its entries chained, with the last
148 * entries next field set to NULL. It does not add the page or the entries to
149 * the process's page/entry lists.
150 *
151 * This function does not require that any locks be held when it is invoked.
152 */
153struct dtrace_ptss_page*
154dtrace_ptss_allocate_page(struct proc* p)
155{
156	// Allocate the kernel side data
157	struct dtrace_ptss_page* ptss_page = _MALLOC(sizeof(struct dtrace_ptss_page), M_TEMP, M_ZERO | M_WAITOK);
158	if (ptss_page == NULL)
159		return NULL;
160
161	// Now allocate a page in user space and set its protections to allow execute.
162	task_t task = p->task;
163	vm_map_t map = get_task_map_reference(task);
164	if (map == NULL)
165	  goto err;
166
167	vm_prot_t cur_protection = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
168	vm_prot_t max_protection = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
169
170	mach_vm_offset_t addr = 0;
171	mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
172	kern_return_t kr = mach_vm_map(map, &addr, size, 0, VM_FLAGS_ANYWHERE, IPC_PORT_NULL, 0, FALSE, cur_protection, max_protection, VM_INHERIT_DEFAULT);
173	if (kr != KERN_SUCCESS) {
174		goto err;
175	}
176
177	// Chain the page entries.
178	int i;
179	for (i=0; i<DTRACE_PTSS_ENTRIES_PER_PAGE; i++) {
180		ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
181		ptss_page->entries[i].next = &ptss_page->entries[i+1];
182	}
183
184	// The last entry should point to NULL
185	ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next = NULL;
186
187	vm_map_deallocate(map);
188
189	return ptss_page;
190
191err:
192	_FREE(ptss_page, M_TEMP);
193
194	if (map)
195	  vm_map_deallocate(map);
196
197	return NULL;
198}
199
200/*
201 * This function frees an existing page in the target process's address space.
202 *
203 * It does not alter any of the process's page/entry lists.
204 *
205 * TODO: Inline in dtrace_ptrace_exec_exit?
206 */
207void
208dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page)
209{
210	// Grab the task and get a reference to its vm_map
211	task_t task = p->task;
212	vm_map_t map = get_task_map_reference(task);
213
214	mach_vm_address_t addr = ptss_page->entries[0].addr;
215	mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
216
217	// Silent failures, no point in checking return code.
218	mach_vm_deallocate(map, addr, size);
219
220	vm_map_deallocate(map);
221}
222
223/*
224 * This function assumes that the target process has been
225 * suspended, and the proc_lock & sprlock is held
226 */
227void
228dtrace_ptss_enable(struct proc* p) {
229	lck_mtx_assert(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
230	lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_OWNED);
231
232	struct uthread* uth;
233	/*
234	 * XXX There has been a concern raised about holding the proc_lock
235	 * while calling dtrace_ptss_claim_entry(), due to the fact
236	 * that dtrace_ptss_claim_entry() can potentially malloc.
237	 */
238	TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
239		uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p);
240	}
241}
242
243/*
244 * This function is not thread safe.
245 *
246 * It assumes the sprlock is held, and the proc_lock is not.
247 */
248void
249dtrace_ptss_exec_exit(struct proc* p) {
250	/*
251	 * Should hold sprlock to touch the pages list. Must not
252	 * hold the proc lock to avoid deadlock.
253	 */
254	lck_mtx_assert(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
255	lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
256
257	p->p_dtrace_ptss_free_list = NULL;
258
259	struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages;
260	p->p_dtrace_ptss_pages = NULL;
261
262	while (temp != NULL) {
263		struct dtrace_ptss_page* next = temp->next;
264
265		// Do we need to specifically mach_vm_deallocate the user pages?
266		// This can be called when the process is exiting, I believe the proc's
267		// vm_map_t may already be toast.
268
269		// Must be certain to free the kernel memory!
270		_FREE(temp, M_TEMP);
271		temp = next;
272	}
273}
274
275/*
276 * This function is not thread safe. It is not used for vfork.
277 *
278 * The child proc ptss fields are initialized to NULL at fork time.
279 * Pages allocated in the parent are copied as part of the vm_map copy, though.
280 * We need to deallocate those pages.
281 *
282 * Parent and child sprlock should be held, and proc_lock must NOT be held.
283 */
284void
285dtrace_ptss_fork(struct proc* parent, struct proc* child) {
286	// The child should not have any pages/entries allocated at this point.
287	// ASSERT(child->p_dtrace_ptss_pages == NULL);
288	// ASSERT(child->p_dtrace_ptss_free_list == NULL);
289
290	/*
291	 * The parent's sprlock should be held, to protect its pages list
292	 * from changing while the child references it. The child's sprlock
293	 * must also be held, because we are modifying its pages list.
294	 * Finally, to prevent a deadlock with the fasttrap cleanup code,
295	 * neither the parent or child proc_lock should be held.
296	 */
297	lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
298	lck_mtx_assert(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
299	lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
300	lck_mtx_assert(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
301
302	// Get page list from *PARENT*
303	struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages;
304
305	while (temp != NULL) {
306		// Freeing the page in the *CHILD*
307		dtrace_ptss_free_page(child, temp);
308
309		// Do not free the kernel memory, it belong to the parent.
310		temp = temp->next;
311	}
312}
313