1/*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	ipc/ipc_entry.c
60 *	Author:	Rich Draves
61 *	Date:	1989
62 *
63 *	Primitive functions to manipulate translation entries.
64 */
65
66#include <mach_debug.h>
67
68#include <mach/kern_return.h>
69#include <mach/port.h>
70#include <kern/assert.h>
71#include <kern/sched_prim.h>
72#include <kern/zalloc.h>
73#include <kern/misc_protos.h>
74#include <ipc/port.h>
75#include <ipc/ipc_entry.h>
76#include <ipc/ipc_space.h>
77#include <ipc/ipc_object.h>
78#include <ipc/ipc_hash.h>
79#include <ipc/ipc_table.h>
80#include <ipc/ipc_port.h>
81#include <string.h>
82
83/*
84 *	Routine:	ipc_entry_lookup
85 *	Purpose:
86 *		Searches for an entry, given its name.
87 *	Conditions:
88 *		The space must be read or write locked throughout.
89 *		The space must be active.
90 */
91
92ipc_entry_t
93ipc_entry_lookup(
94	ipc_space_t		space,
95	mach_port_name_t	name)
96{
97	mach_port_index_t index;
98	ipc_entry_t entry;
99
100	assert(is_active(space));
101
102
103	index = MACH_PORT_INDEX(name);
104	if (index <  space->is_table_size) {
105                entry = &space->is_table[index];
106		if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) ||
107		    IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
108			entry = IE_NULL;
109	}
110	else {
111		entry = IE_NULL;
112	}
113
114	assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits));
115	return entry;
116}
117
118/*
119 *	Routine:	ipc_entry_get
120 *	Purpose:
121 *		Tries to allocate an entry out of the space.
122 *	Conditions:
123 *		The space is write-locked and active throughout.
124 *		An object may be locked.  Will not allocate memory.
125 *	Returns:
126 *		KERN_SUCCESS		A free entry was found.
127 *		KERN_NO_SPACE		No entry allocated.
128 */
129
130kern_return_t
131ipc_entry_get(
132	ipc_space_t		space,
133	mach_port_name_t	*namep,
134	ipc_entry_t		*entryp)
135{
136	ipc_entry_t table;
137	mach_port_index_t first_free;
138	ipc_entry_t free_entry;
139
140	assert(is_active(space));
141
142	{
143		table = space->is_table;
144		first_free = table->ie_next;
145
146		if (first_free == 0)
147			return KERN_NO_SPACE;
148
149		assert(first_free < space->is_table_size);
150		free_entry = &table[first_free];
151		table->ie_next = free_entry->ie_next;
152	}
153
154	/*
155	 *	Initialize the new entry.  We need only
156	 *	increment the generation number and clear ie_request.
157	 */
158	{
159		mach_port_name_t new_name;
160		mach_port_gen_t gen;
161
162		gen = IE_BITS_NEW_GEN(free_entry->ie_bits);
163		free_entry->ie_bits = gen;
164		free_entry->ie_request = IE_REQ_NONE;
165
166		/*
167		 *	The new name can't be MACH_PORT_NULL because index
168		 *	is non-zero.  It can't be MACH_PORT_DEAD because
169		 *	the table isn't allowed to grow big enough.
170		 *	(See comment in ipc/ipc_table.h.)
171		 */
172		new_name = MACH_PORT_MAKE(first_free, gen);
173		assert(MACH_PORT_VALID(new_name));
174		*namep = new_name;
175	}
176
177	assert(free_entry->ie_object == IO_NULL);
178
179	*entryp = free_entry;
180	return KERN_SUCCESS;
181}
182
183/*
184 *	Routine:	ipc_entry_alloc
185 *	Purpose:
186 *		Allocate an entry out of the space.
187 *	Conditions:
188 *		The space is not locked before, but it is write-locked after
189 *		if the call is successful.  May allocate memory.
190 *	Returns:
191 *		KERN_SUCCESS		An entry was allocated.
192 *		KERN_INVALID_TASK	The space is dead.
193 *		KERN_NO_SPACE		No room for an entry in the space.
194 *		KERN_RESOURCE_SHORTAGE	Couldn't allocate memory for an entry.
195 */
196
197kern_return_t
198ipc_entry_alloc(
199	ipc_space_t		space,
200	mach_port_name_t	*namep,
201	ipc_entry_t		*entryp)
202{
203	kern_return_t kr;
204
205	is_write_lock(space);
206
207	for (;;) {
208		if (!is_active(space)) {
209			is_write_unlock(space);
210			return KERN_INVALID_TASK;
211		}
212
213		kr = ipc_entry_get(space, namep, entryp);
214		if (kr == KERN_SUCCESS)
215			return kr;
216
217		kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
218		if (kr != KERN_SUCCESS)
219			return kr; /* space is unlocked */
220	}
221}
222
223/*
224 *	Routine:	ipc_entry_alloc_name
225 *	Purpose:
226 *		Allocates/finds an entry with a specific name.
227 *		If an existing entry is returned, its type will be nonzero.
228 *	Conditions:
229 *		The space is not locked before, but it is write-locked after
230 *		if the call is successful.  May allocate memory.
231 *	Returns:
232 *		KERN_SUCCESS		Found existing entry with same name.
233 *		KERN_SUCCESS		Allocated a new entry.
234 *		KERN_INVALID_TASK	The space is dead.
235 *		KERN_RESOURCE_SHORTAGE	Couldn't allocate memory.
236 *		KERN_FAILURE		Couldn't allocate requested name.
237 */
238
239kern_return_t
240ipc_entry_alloc_name(
241	ipc_space_t		space,
242	mach_port_name_t	name,
243	ipc_entry_t		*entryp)
244{
245	mach_port_index_t index = MACH_PORT_INDEX(name);
246	mach_port_gen_t gen = MACH_PORT_GEN(name);
247
248	assert(MACH_PORT_VALID(name));
249
250
251	is_write_lock(space);
252
253	for (;;) {
254		ipc_entry_t entry;
255
256		if (!is_active(space)) {
257			is_write_unlock(space);
258			return KERN_INVALID_TASK;
259		}
260
261		/*
262		 *	If we are under the table cutoff,
263		 *	there are usually four cases:
264		 *		1) The entry is reserved (index 0)
265		 *		2) The entry is inuse, for the same name
266		 *		3) The entry is inuse, for a different name
267		 *		4) The entry is free
268		 *	For a task with a "fast" IPC space, we disallow
269		 *	cases 1) and 3), because ports cannot be renamed.
270		 */
271		if (index < space->is_table_size) {
272			ipc_entry_t table = space->is_table;
273
274			entry = &table[index];
275
276			if (index == 0) {
277				/* case #1 - the entry is reserved */
278				assert(!IE_BITS_TYPE(entry->ie_bits));
279				assert(!IE_BITS_GEN(entry->ie_bits));
280				is_write_unlock(space);
281				return KERN_FAILURE;
282			} else if (IE_BITS_TYPE(entry->ie_bits)) {
283				if (IE_BITS_GEN(entry->ie_bits) == gen) {
284					/* case #2 -- the entry is inuse, for the same name */
285					*entryp = entry;
286					return KERN_SUCCESS;
287				} else {
288					/* case #3 -- the entry is inuse, for a different name. */
289					/* Collisions are not allowed */
290					is_write_unlock(space);
291					return KERN_FAILURE;
292				}
293			} else {
294				mach_port_index_t free_index, next_index;
295
296				/*
297				 *      case #4 -- the entry is free
298				 *	Rip the entry out of the free list.
299				 */
300
301				for (free_index = 0;
302				     (next_index = table[free_index].ie_next)
303							!= index;
304				     free_index = next_index)
305					continue;
306
307				table[free_index].ie_next =
308					table[next_index].ie_next;
309
310				/* mark the previous entry modified - reconstructing the name */
311				ipc_entry_modified(space,
312						   MACH_PORT_MAKE(free_index,
313						   	IE_BITS_GEN(table[free_index].ie_bits)),
314						   &table[free_index]);
315
316				entry->ie_bits = gen;
317				entry->ie_request = IE_REQ_NONE;
318				*entryp = entry;
319
320				assert(entry->ie_object == IO_NULL);
321				return KERN_SUCCESS;
322			}
323		}
324
325		/*
326		 *      We grow the table so that the name
327		 *	index fits in the array space.
328		 *      Because the space will be unlocked,
329		 *      we must restart.
330		 */
331                kern_return_t kr;
332		kr = ipc_entry_grow_table(space, index + 1);
333		assert(kr != KERN_NO_SPACE);
334		if (kr != KERN_SUCCESS) {
335			/* space is unlocked */
336			return kr;
337		}
338		continue;
339	}
340}
341
342/*
343 *	Routine:	ipc_entry_dealloc
344 *	Purpose:
345 *		Deallocates an entry from a space.
346 *	Conditions:
347 *		The space must be write-locked throughout.
348 *		The space must be active.
349 */
350
351void
352ipc_entry_dealloc(
353	ipc_space_t		space,
354	mach_port_name_t	name,
355	ipc_entry_t		entry)
356{
357	ipc_entry_t table;
358	ipc_entry_num_t size;
359	mach_port_index_t index;
360
361	assert(is_active(space));
362	assert(entry->ie_object == IO_NULL);
363	assert(entry->ie_request == IE_REQ_NONE);
364
365#if 1
366	if (entry->ie_request != IE_REQ_NONE)
367		panic("ipc_entry_dealloc()\n");
368#endif
369
370	index = MACH_PORT_INDEX(name);
371	table = space->is_table;
372	size = space->is_table_size;
373
374	if ((index < size) && (entry == &table[index])) {
375		assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
376		entry->ie_bits &= IE_BITS_GEN_MASK;
377		entry->ie_next = table->ie_next;
378		table->ie_next = index;
379	} else {
380		/*
381		 * Nothing to do.  The entry does not match
382		 * so there is nothing to deallocate.
383		 */
384                assert(index < size);
385		assert(entry == &table[index]);
386		assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
387	}
388	ipc_entry_modified(space, name, entry);
389}
390
391/*
392 *	Routine:	ipc_entry_modified
393 *	Purpose:
394 *		Note that an entry was modified in a space.
395 *	Conditions:
396 *		Assumes exclusive write access to the space,
397 *		either through a write lock or being the cleaner
398 *		on an inactive space.
399 */
400
401void
402ipc_entry_modified(
403	ipc_space_t		space,
404	mach_port_name_t	name,
405	__assert_only ipc_entry_t entry)
406{
407	ipc_entry_t table;
408	ipc_entry_num_t size;
409	mach_port_index_t index;
410
411	index = MACH_PORT_INDEX(name);
412	table = space->is_table;
413	size = space->is_table_size;
414
415	assert(index < size);
416	assert(entry == &table[index]);
417
418	assert(space->is_low_mod <= size);
419	assert(space->is_high_mod < size);
420
421	if (index < space->is_low_mod)
422		space->is_low_mod = index;
423	if (index > space->is_high_mod)
424		space->is_high_mod = index;
425}
426
427#define IPC_ENTRY_GROW_STATS 1
428#if IPC_ENTRY_GROW_STATS
429static uint64_t ipc_entry_grow_count = 0;
430static uint64_t ipc_entry_grow_rescan = 0;
431static uint64_t ipc_entry_grow_rescan_max = 0;
432static uint64_t ipc_entry_grow_rescan_entries = 0;
433static uint64_t ipc_entry_grow_rescan_entries_max = 0;
434static uint64_t	ipc_entry_grow_freelist_entries = 0;
435static uint64_t	ipc_entry_grow_freelist_entries_max = 0;
436#endif
437
438/*
439 *	Routine:	ipc_entry_grow_table
440 *	Purpose:
441 *		Grows the table in a space.
442 *	Conditions:
443 *		The space must be write-locked and active before.
444 *		If successful, the space is also returned locked.
445 *		On failure, the space is returned unlocked.
446 *		Allocates memory.
447 *	Returns:
448 *		KERN_SUCCESS		Grew the table.
449 *		KERN_SUCCESS		Somebody else grew the table.
450 *		KERN_SUCCESS		The space died.
451 *		KERN_NO_SPACE		Table has maximum size already.
452 *		KERN_RESOURCE_SHORTAGE	Couldn't allocate a new table.
453 */
454
455kern_return_t
456ipc_entry_grow_table(
457	ipc_space_t		space,
458	ipc_table_elems_t	target_size)
459{
460	ipc_entry_num_t osize, size, nsize, psize;
461
462	ipc_entry_t otable, table;
463	ipc_table_size_t oits, its, nits;
464	mach_port_index_t i, free_index;
465	mach_port_index_t low_mod, hi_mod;
466	ipc_table_index_t sanity;
467#if IPC_ENTRY_GROW_STATS
468	uint64_t rescan_count = 0;
469#endif
470	assert(is_active(space));
471
472	if (is_growing(space)) {
473		/*
474		 *	Somebody else is growing the table.
475		 *	We just wait for them to finish.
476		 */
477
478		is_write_sleep(space);
479		return KERN_SUCCESS;
480	}
481
482	otable = space->is_table;
483
484	its = space->is_table_next;
485	size = its->its_size;
486
487	/*
488	 * Since is_table_next points to the next natural size
489	 * we can identify the current size entry.
490	 */
491	oits = its - 1;
492	osize = oits->its_size;
493
494	/*
495	 * If there is no target size, then the new size is simply
496	 * specified by is_table_next.  If there is a target
497	 * size, then search for the next entry.
498	 */
499	if (target_size != ITS_SIZE_NONE) {
500		if (target_size <= osize) {
501			/* the space is locked */
502			return KERN_SUCCESS;
503		}
504
505		psize = osize;
506		while ((psize != size) && (target_size > size)) {
507			psize = size;
508			its++;
509			size = its->its_size;
510		}
511		if (psize == size) {
512			is_write_unlock(space);
513			return KERN_NO_SPACE;
514		}
515	}
516
517	if (osize == size) {
518		is_write_unlock(space);
519		return KERN_NO_SPACE;
520	}
521
522	nits = its + 1;
523	nsize = nits->its_size;
524	assert((osize < size) && (size <= nsize));
525
526	/*
527	 * We'll attempt to grow the table.
528	 *
529	 * Because we will be copying without the space lock, reset
530	 * the lowest_mod index to just beyond the end of the current
531	 * table.  Modification of entries (other than hashes) will
532	 * bump this downward, and we only have to reprocess entries
533	 * above that mark.  Eventually, we'll get done.
534	 */
535	is_start_growing(space);
536	space->is_low_mod = osize;
537	space->is_high_mod = 0;
538#if IPC_ENTRY_GROW_STATS
539	ipc_entry_grow_count++;
540#endif
541	is_write_unlock(space);
542
543	table = it_entries_alloc(its);
544	if (table == IE_NULL) {
545		is_write_lock(space);
546		is_done_growing(space);
547		is_write_unlock(space);
548		thread_wakeup((event_t) space);
549		return KERN_RESOURCE_SHORTAGE;
550	}
551
552	/* initialize new entries (free chain in backwards order) */
553	for (i = osize; i < size; i++) {
554		table[i].ie_object = IO_NULL;
555		table[i].ie_bits = IE_BITS_GEN_MASK;
556		table[i].ie_index = 0;
557		table[i].ie_next = i + 1;
558	}
559	table[size-1].ie_next = 0;
560
561	/* clear out old entries in new table */
562	memset((void *)table, 0, osize * sizeof(*table));
563
564	low_mod = 0;
565	hi_mod = osize - 1;
566 rescan:
567	/*
568	 * Within the range of the table that changed, determine what we
569	 * have to take action on. For each entry, take a snapshot of the
570	 * corresponding entry in the old table (so it won't change
571	 * during this iteration). The snapshot may not be self-consistent
572	 * (if we caught it in the middle of being changed), so be very
573	 * cautious with the values.
574	 */
575	for (i = low_mod; i <= hi_mod; i++) {
576		ipc_entry_t entry = &table[i];
577		struct ipc_entry osnap = otable[i];
578
579		if (entry->ie_object != osnap.ie_object ||
580		    IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap.ie_bits)) {
581
582			if (entry->ie_object != IO_NULL &&
583			    IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
584				ipc_hash_table_delete(table, size, entry->ie_object, i, entry);
585
586			entry->ie_object = osnap.ie_object;
587			entry->ie_bits = osnap.ie_bits;
588			entry->ie_request = osnap.ie_request; /* or ie_next */
589
590			if (entry->ie_object != IO_NULL &&
591			    IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND)
592				ipc_hash_table_insert(table, size, entry->ie_object, i, entry);
593		} else {
594			assert(entry->ie_object == osnap.ie_object);
595			entry->ie_bits = osnap.ie_bits;
596			entry->ie_request = osnap.ie_request; /* or ie_next */
597		}
598
599	}
600	table[0].ie_next = otable[0].ie_next;  /* always rebase the freelist */
601
602	/*
603	 * find the end of the freelist (should be short). But be careful,
604	 * the list items can change so only follow through truly free entries
605	 * (no problem stopping short in those cases, because we'll rescan).
606	 */
607	free_index = 0;
608	for (sanity = 0; sanity < osize; sanity++) {
609		if (table[free_index].ie_object != IPC_OBJECT_NULL)
610			break;
611		i = table[free_index].ie_next;
612		if (i == 0 || i >= osize)
613			break;
614		free_index = i;
615	}
616#if IPC_ENTRY_GROW_STATS
617	ipc_entry_grow_freelist_entries += sanity;
618	if (sanity > ipc_entry_grow_freelist_entries_max)
619		ipc_entry_grow_freelist_entries_max = sanity;
620#endif
621
622	is_write_lock(space);
623
624	/*
625	 *	We need to do a wakeup on the space,
626	 *	to rouse waiting threads.  We defer
627	 *	this until the space is unlocked,
628	 *	because we don't want them to spin.
629	 */
630
631	if (!is_active(space)) {
632		/*
633		 *	The space died while it was unlocked.
634		 */
635
636		is_done_growing(space);
637		is_write_unlock(space);
638		thread_wakeup((event_t) space);
639		it_entries_free(its, table);
640		is_write_lock(space);
641		return KERN_SUCCESS;
642	}
643
644	/* If the space changed while unlocked, go back and process the changes */
645	if (space->is_low_mod < osize) {
646		assert(space->is_high_mod > 0);
647		low_mod = space->is_low_mod;
648		space->is_low_mod = osize;
649		hi_mod = space->is_high_mod;
650		space->is_high_mod = 0;
651		is_write_unlock(space);
652#if IPC_ENTRY_GROW_STATS
653		rescan_count++;
654		if (rescan_count > ipc_entry_grow_rescan_max)
655			ipc_entry_grow_rescan_max = rescan_count;
656
657		ipc_entry_grow_rescan++;
658		ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1;
659		if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max)
660			ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1;
661#endif
662		goto rescan;
663	}
664
665	/* link new free entries onto the rest of the freelist */
666	assert(table[free_index].ie_next == 0 &&
667	       table[free_index].ie_object == IO_NULL);
668	table[free_index].ie_next = osize;
669
670	assert(space->is_table == otable);
671	assert((space->is_table_next == its) ||
672	    (target_size != ITS_SIZE_NONE));
673	assert(space->is_table_size == osize);
674
675	space->is_table = table;
676	space->is_table_size = size;
677	space->is_table_next = nits;
678
679	is_done_growing(space);
680	is_write_unlock(space);
681
682	thread_wakeup((event_t) space);
683
684	/*
685	 *	Now we need to free the old table.
686	 */
687	it_entries_free(oits, otable);
688	is_write_lock(space);
689
690	return KERN_SUCCESS;
691}
692