subr_extent.c revision 1.43
1/*	$OpenBSD: subr_extent.c,v 1.43 2010/06/20 17:57:09 phessler Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/pool.h>
46#include <ddb/db_output.h>
47#else
48/*
49 * user-land definitions, so it can fit into a testing harness.
50 */
51#include <sys/param.h>
52#include <sys/pool.h>
53#include <sys/extent.h>
54#include <sys/queue.h>
55#include <errno.h>
56#include <stdlib.h>
57#include <stdio.h>
58#include <string.h>
59
60#define	malloc(s, t, flags)		malloc(s)
61#define	free(p, t)			free(p)
62#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
63#define	wakeup(chan)			((void)0)
64#define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
65#define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
66#define	pool_setipl(pool, ipl)		/* nothing */
67#define	pool_put(pool, rp)		free((rp), 0)
68#define	panic				printf
69#endif
70
71#if defined(DIAGNOSTIC) || defined(DDB)
72void	extent_print1(struct extent *, int (*)(const char *, ...));
73#endif
74
75static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
76	    struct extent_region *, struct extent_region *);
77static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
78static	void extent_free_region_descriptor(struct extent *,
79	    struct extent_region *);
80
81/*
82 * Shortcut to align to an arbitrary power-of-two boundary.
83 */
84static __inline__ u_long
85extent_align(u_long start, u_long align, u_long skew)
86{
87	return ((((start - skew) + (align - 1)) & (-align)) + skew);
88}
89
90
91#if defined(DIAGNOSTIC) || defined(DDB)
92/*
93 * Register the extent on a doubly linked list.
94 * Should work, no?
95 */
96static LIST_HEAD(listhead, extent) ext_list;
97static	void extent_register(struct extent *);
98
99static void
100extent_register(struct extent *ex)
101{
102#ifdef DIAGNOSTIC
103	struct extent *ep;
104#endif
105	static int initialized;
106
107	if (!initialized){
108		LIST_INIT(&ext_list);
109		initialized = 1;
110	}
111
112#ifdef DIAGNOSTIC
113	LIST_FOREACH(ep, &ext_list, ex_link) {
114		if (ep == ex)
115			panic("extent_register: already registered");
116	}
117#endif
118
119	/* Insert into list */
120	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
121}
122#endif	/* DIAGNOSTIC || DDB */
123
124struct pool ex_region_pl;
125
126static void
127extent_pool_init(void)
128{
129	static int inited;
130
131	if (!inited) {
132		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
133		    "extentpl", NULL);
134		pool_setipl(&ex_region_pl, IPL_VM);
135		inited = 1;
136	}
137}
138
139#ifdef DDB
140/*
141 * Print out all extents registered.  This is used in
142 * DDB show extents
143 */
144void
145extent_print_all(void)
146{
147	struct extent *ep;
148
149	LIST_FOREACH(ep, &ext_list, ex_link) {
150		extent_print1(ep, db_printf);
151	}
152}
153#endif
154
155/*
156 * Allocate and initialize an extent map.
157 */
158struct extent *
159extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
160    size_t storagesize, int flags)
161{
162	struct extent *ex;
163	caddr_t cp = storage;
164	size_t sz = storagesize;
165	struct extent_region *rp;
166	int fixed_extent = (storage != NULL);
167
168#ifdef DIAGNOSTIC
169	/* Check arguments. */
170	if (name == NULL)
171		panic("extent_create: name == NULL");
172	if (end < start) {
173		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
174		    name, start, end);
175		panic("extent_create: end < start");
176	}
177	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
178		panic("extent_create: fixed extent, bad storagesize 0x%lx",
179		    (u_long)storagesize);
180	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
181		panic("extent_create: storage provided for non-fixed");
182#endif
183
184	extent_pool_init();
185
186	/* Allocate extent descriptor. */
187	if (fixed_extent) {
188		struct extent_fixed *fex;
189
190		bzero(storage, storagesize);
191
192		/*
193		 * Align all descriptors on "long" boundaries.
194		 */
195		fex = (struct extent_fixed *)cp;
196		ex = (struct extent *)fex;
197		cp += ALIGN(sizeof(struct extent_fixed));
198		sz -= ALIGN(sizeof(struct extent_fixed));
199		fex->fex_storage = storage;
200		fex->fex_storagesize = storagesize;
201
202		/*
203		 * In a fixed extent, we have to pre-allocate region
204		 * descriptors and place them in the extent's freelist.
205		 */
206		LIST_INIT(&fex->fex_freelist);
207		while (sz >= ALIGN(sizeof(struct extent_region))) {
208			rp = (struct extent_region *)cp;
209			cp += ALIGN(sizeof(struct extent_region));
210			sz -= ALIGN(sizeof(struct extent_region));
211			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
212		}
213	} else {
214		ex = (struct extent *)malloc(sizeof(struct extent),
215		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
216		if (ex == NULL)
217			return (NULL);
218	}
219
220	/* Fill in the extent descriptor and return it to the caller. */
221	LIST_INIT(&ex->ex_regions);
222	ex->ex_name = name;
223	ex->ex_start = start;
224	ex->ex_end = end;
225	ex->ex_mtype = mtype;
226	ex->ex_flags = 0;
227	if (fixed_extent)
228		ex->ex_flags |= EXF_FIXED;
229	if (flags & EX_NOCOALESCE)
230		ex->ex_flags |= EXF_NOCOALESCE;
231
232	if (flags & EX_FILLED) {
233		rp = extent_alloc_region_descriptor(ex, flags);
234		if (rp == NULL) {
235			if (!fixed_extent)
236				free(ex, mtype);
237			return (NULL);
238		}
239		rp->er_start = start;
240		rp->er_end = end;
241		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
242	}
243
244#if defined(DIAGNOSTIC) || defined(DDB)
245	extent_register(ex);
246#endif
247	return (ex);
248}
249
250/*
251 * Destroy an extent map.
252 */
253void
254extent_destroy(struct extent *ex)
255{
256	struct extent_region *rp, *orp;
257
258#ifdef DIAGNOSTIC
259	/* Check arguments. */
260	if (ex == NULL)
261		panic("extent_destroy: NULL extent");
262#endif
263
264	/* Free all region descriptors in extent. */
265	for (rp = LIST_FIRST(&ex->ex_regions);
266	    rp != LIST_END(&ex->ex_regions); ) {
267		orp = rp;
268		rp = LIST_NEXT(rp, er_link);
269		LIST_REMOVE(orp, er_link);
270		extent_free_region_descriptor(ex, orp);
271	}
272
273#if defined(DIAGNOSTIC) || defined(DDB)
274	/* Remove from the list of all extents. */
275	LIST_REMOVE(ex, ex_link);
276#endif
277
278	/* If we're not a fixed extent, free the extent descriptor itself. */
279	if ((ex->ex_flags & EXF_FIXED) == 0)
280		free(ex, ex->ex_mtype);
281}
282
283/*
284 * Insert a region descriptor into the sorted region list after the
285 * entry "after" or at the head of the list (if "after" is NULL).
286 * The region descriptor we insert is passed in "rp".  We must
287 * allocate the region descriptor before calling this function!
288 * If we don't need the region descriptor, it will be freed here.
289 */
290static void
291extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
292    struct extent_region *after, struct extent_region *rp)
293{
294	struct extent_region *nextr;
295	int appended = 0;
296
297	if (after == NULL) {
298		/*
299		 * We're the first in the region list.  If there's
300		 * a region after us, attempt to coalesce to save
301		 * descriptor overhead.
302		 */
303		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
304		    !LIST_EMPTY(&ex->ex_regions) &&
305		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
306			/*
307			 * We can coalesce.  Prepend us to the first region.
308			 */
309			LIST_FIRST(&ex->ex_regions)->er_start = start;
310			extent_free_region_descriptor(ex, rp);
311			return;
312		}
313
314		/*
315		 * Can't coalesce.  Fill in the region descriptor
316		 * in, and insert us at the head of the region list.
317		 */
318		rp->er_start = start;
319		rp->er_end = start + (size - 1);
320		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
321		return;
322	}
323
324	/*
325	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
326	 */
327	if (ex->ex_flags & EXF_NOCOALESCE)
328		goto cant_coalesce;
329
330	/*
331	 * Attempt to coalesce with the region before us.
332	 */
333	if ((after->er_end + 1) == start) {
334		/*
335		 * We can coalesce.  Append ourselves and make
336		 * note of it.
337		 */
338		after->er_end = start + (size - 1);
339		appended = 1;
340	}
341
342	/*
343	 * Attempt to coalesce with the region after us.
344	 */
345	if (LIST_NEXT(after, er_link) != NULL &&
346	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
347		/*
348		 * We can coalesce.  Note that if we appended ourselves
349		 * to the previous region, we exactly fit the gap, and
350		 * can free the "next" region descriptor.
351		 */
352		if (appended) {
353			/*
354			 * Yup, we can free it up.
355			 */
356			after->er_end = LIST_NEXT(after, er_link)->er_end;
357			nextr = LIST_NEXT(after, er_link);
358			LIST_REMOVE(nextr, er_link);
359			extent_free_region_descriptor(ex, nextr);
360		} else {
361			/*
362			 * Nope, just prepend us to the next region.
363			 */
364			LIST_NEXT(after, er_link)->er_start = start;
365		}
366
367		extent_free_region_descriptor(ex, rp);
368		return;
369	}
370
371	/*
372	 * We weren't able to coalesce with the next region, but
373	 * we don't need to allocate a region descriptor if we
374	 * appended ourselves to the previous region.
375	 */
376	if (appended) {
377		extent_free_region_descriptor(ex, rp);
378		return;
379	}
380
381 cant_coalesce:
382
383	/*
384	 * Fill in the region descriptor and insert ourselves
385	 * into the region list.
386	 */
387	rp->er_start = start;
388	rp->er_end = start + (size - 1);
389	LIST_INSERT_AFTER(after, rp, er_link);
390}
391
392/*
393 * Allocate a specific region in an extent map.
394 */
395int
396extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
397{
398	struct extent_region *rp, *last, *myrp;
399	u_long end = start + (size - 1);
400	int error;
401
402#ifdef DIAGNOSTIC
403	/* Check arguments. */
404	if (ex == NULL)
405		panic("extent_alloc_region: NULL extent");
406	if (size < 1) {
407		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
408		    ex->ex_name, size);
409		panic("extent_alloc_region: bad size");
410	}
411	if (end < start) {
412		printf(
413		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
414		 ex->ex_name, start, size);
415		panic("extent_alloc_region: overflow");
416	}
417	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
418		panic("extent_alloc_region: EX_CONFLICTOK and EX_WAITSPACE "
419		    "are mutually exclusive");
420#endif
421
422	/*
423	 * Make sure the requested region lies within the
424	 * extent.
425	 */
426	if ((start < ex->ex_start) || (end > ex->ex_end)) {
427#ifdef DIAGNOSTIC
428		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
429		    ex->ex_name, ex->ex_start, ex->ex_end);
430		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
431		    start, end);
432		panic("extent_alloc_region: region lies outside extent");
433#else
434		return (EINVAL);
435#endif
436	}
437
438	/*
439	 * Allocate the region descriptor.  It will be freed later
440	 * if we can coalesce with another region.
441	 */
442	myrp = extent_alloc_region_descriptor(ex, flags);
443	if (myrp == NULL) {
444#ifdef DIAGNOSTIC
445		printf(
446		    "extent_alloc_region: can't allocate region descriptor\n");
447#endif
448		return (ENOMEM);
449	}
450
451 alloc_start:
452	/*
453	 * Attempt to place ourselves in the desired area of the
454	 * extent.  We save ourselves some work by keeping the list sorted.
455	 * In other words, if the start of the current region is greater
456	 * than the end of our region, we don't have to search any further.
457	 */
458
459	/*
460	 * Keep a pointer to the last region we looked at so
461	 * that we don't have to traverse the list again when
462	 * we insert ourselves.  If "last" is NULL when we
463	 * finally insert ourselves, we go at the head of the
464	 * list.  See extent_insert_and_optimize() for details.
465	 */
466	last = NULL;
467
468	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
469		if (rp->er_start > end) {
470			/*
471			 * We lie before this region and don't
472			 * conflict.
473			 */
474			break;
475		}
476
477		/*
478		 * The current region begins before we end.
479		 * Check for a conflict.
480		 */
481		if (rp->er_end >= start) {
482			/*
483			 * We conflict.  If we can (and want to) wait,
484			 * do so.
485			 */
486			if (flags & EX_WAITSPACE) {
487				ex->ex_flags |= EXF_WANTED;
488				error = tsleep(ex,
489				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
490				    "extnt", 0);
491				if (error)
492					return (error);
493				goto alloc_start;
494			}
495
496			/*
497			 * If we tolerate conflicts adjust things such
498			 * that all space in the requested region is
499			 * allocated.
500			 */
501			if (flags & EX_CONFLICTOK) {
502				/*
503				 * There are four possibilities:
504				 *
505				 * 1. The current region overlaps with
506				 *    the start of the requested region.
507				 *    Adjust the requested region to
508				 *    start at the end of the current
509				 *    region and try again.
510				 *
511				 * 2. The current region falls
512				 *    completely within the requested
513				 *    region.  Free the current region
514				 *    and try again.
515				 *
516				 * 3. The current region overlaps with
517				 *    the end of the requested region.
518				 *    Adjust the requested region to
519				 *    end at the start of the current
520				 *    region and try again.
521				 *
522				 * 4. The requested region falls
523				 *    completely within the current
524				 *    region.  We're done.
525				 */
526				if (rp->er_start <= start) {
527					if (rp->er_end < ex->ex_end) {
528						start = rp->er_end + 1;
529						size = end - start + 1;
530						goto alloc_start;
531					}
532				} else if (rp->er_end < end) {
533					LIST_REMOVE(rp, er_link);
534					extent_free_region_descriptor(ex, rp);
535					goto alloc_start;
536				} else if (rp->er_start < end) {
537					if (rp->er_start > ex->ex_start) {
538						end = rp->er_start - 1;
539						size = end - start + 1;
540						goto alloc_start;
541					}
542				}
543				return (0);
544			}
545
546			extent_free_region_descriptor(ex, myrp);
547			return (EAGAIN);
548		}
549		/*
550		 * We don't conflict, but this region lies before
551		 * us.  Keep a pointer to this region, and keep
552		 * trying.
553		 */
554		last = rp;
555	}
556
557	/*
558	 * We don't conflict with any regions.  "last" points
559	 * to the region we fall after, or is NULL if we belong
560	 * at the beginning of the region list.  Insert ourselves.
561	 */
562	extent_insert_and_optimize(ex, start, size, last, myrp);
563	return (0);
564}
565
566/*
567 * Macro to check (x + y) <= z.  This check is designed to fail
568 * if an overflow occurs.
569 */
570#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
571
572/*
573 * Allocate a region in an extent map subregion.
574 *
575 * If EX_FAST is specified, we return the first fit in the map.
576 * Otherwise, we try to minimize fragmentation by finding the
577 * smallest gap that will hold the request.
578 *
579 * The allocated region is aligned to "alignment", which must be
580 * a power of 2.
581 */
582int
583extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
584    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
585    u_long *result)
586{
587	struct extent_region *rp, *myrp, *last, *bestlast;
588	u_long newstart, newend, exend, beststart, bestovh, ovh;
589	u_long dontcross;
590	int error;
591
592#ifdef DIAGNOSTIC
593	/* Check arguments. */
594	if (ex == NULL)
595		panic("extent_alloc_subregion: NULL extent");
596	if (result == NULL)
597		panic("extent_alloc_subregion: NULL result pointer");
598	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
599	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
600  printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
601		    ex->ex_name, ex->ex_start, ex->ex_end);
602		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
603		    substart, subend);
604		panic("extent_alloc_subregion: bad subregion");
605	}
606	if ((size < 1) || ((size - 1) > (subend - substart))) {
607		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
608		    ex->ex_name, size);
609		panic("extent_alloc_subregion: bad size");
610	}
611	if (alignment == 0)
612		panic("extent_alloc_subregion: bad alignment");
613	if (boundary && (boundary < size)) {
614		printf(
615		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
616		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
617		panic("extent_alloc_subregion: bad boundary");
618	}
619#endif
620
621	/*
622	 * Allocate the region descriptor.  It will be freed later
623	 * if we can coalesce with another region.
624	 */
625	myrp = extent_alloc_region_descriptor(ex, flags);
626	if (myrp == NULL) {
627#ifdef DIAGNOSTIC
628		printf(
629		 "extent_alloc_subregion: can't allocate region descriptor\n");
630#endif
631		return (ENOMEM);
632	}
633
634 alloc_start:
635	/*
636	 * Keep a pointer to the last region we looked at so
637	 * that we don't have to traverse the list again when
638	 * we insert ourselves.  If "last" is NULL when we
639	 * finally insert ourselves, we go at the head of the
640	 * list.  See extent_insert_and_optimize() for details.
641	 */
642	last = NULL;
643
644	/*
645	 * Keep track of size and location of the smallest
646	 * chunk we fit in.
647	 *
648	 * Since the extent can be as large as the numeric range
649	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
650	 * best overhead value can be the maximum unsigned integer.
651	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
652	 * into the region list immediately on an exact match (which
653	 * is the only case where "bestovh" would be set to 0).
654	 */
655	bestovh = 0;
656	beststart = 0;
657	bestlast = NULL;
658
659	/*
660	 * Keep track of end of free region.  This is either the end of extent
661	 * or the start of a region past the subend.
662	 */
663	exend = ex->ex_end;
664
665	/*
666	 * For N allocated regions, we must make (N + 1)
667	 * checks for unallocated space.  The first chunk we
668	 * check is the area from the beginning of the subregion
669	 * to the first allocated region after that point.
670	 */
671	newstart = extent_align(substart, alignment, skew);
672	if (newstart < ex->ex_start) {
673#ifdef DIAGNOSTIC
674		printf(
675      "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
676		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
677		panic("extent_alloc_subregion: overflow after alignment");
678#else
679		extent_free_region_descriptor(ex, myrp);
680		return (EINVAL);
681#endif
682	}
683
684	/*
685	 * Find the first allocated region that begins on or after
686	 * the subregion start, advancing the "last" pointer along
687	 * the way.
688	 */
689	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
690		if (rp->er_start >= newstart)
691			break;
692		last = rp;
693	}
694
695	/*
696	 * Relocate the start of our candidate region to the end of
697	 * the last allocated region (if there was one overlapping
698	 * our subrange).
699	 */
700	if (last != NULL && last->er_end >= newstart)
701		newstart = extent_align((last->er_end + 1), alignment, skew);
702
703	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
704		/*
705		 * If the region pasts the subend, bail out and see
706		 * if we fit against the subend.
707		 */
708		if (rp->er_start > subend) {
709			exend = rp->er_start;
710			break;
711		}
712
713		/*
714		 * Check the chunk before "rp".  Note that our
715		 * comparison is safe from overflow conditions.
716		 */
717		if (LE_OV(newstart, size, rp->er_start)) {
718			/*
719			 * Do a boundary check, if necessary.  Note
720			 * that a region may *begin* on the boundary,
721			 * but it must end before the boundary.
722			 */
723			if (boundary) {
724				newend = newstart + (size - 1);
725
726				/*
727				 * Calculate the next boundary after the start
728				 * of this region.
729				 */
730				dontcross = extent_align(newstart+1, boundary,
731				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
732				    - 1;
733
734#if 0
735				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
736				    newstart, newend, ex->ex_start, ex->ex_end,
737				    boundary, dontcross);
738#endif
739
740				/* Check for overflow */
741				if (dontcross < ex->ex_start)
742					dontcross = ex->ex_end;
743				else if (newend > dontcross) {
744					/*
745					 * Candidate region crosses boundary.
746					 * Throw away the leading part and see
747					 * if we still fit.
748					 */
749					newstart = dontcross + 1;
750					newend = newstart + (size - 1);
751					dontcross += boundary;
752					if (!LE_OV(newstart, size, rp->er_start))
753						goto skip;
754				}
755
756				/*
757				 * If we run past the end of
758				 * the extent or the boundary
759				 * overflows, then the request
760				 * can't fit.
761				 */
762				if (newstart + size - 1 > ex->ex_end ||
763				    dontcross < newstart)
764					goto fail;
765			}
766
767			/*
768			 * We would fit into this space.  Calculate
769			 * the overhead (wasted space).  If we exactly
770			 * fit, or we're taking the first fit, insert
771			 * ourselves into the region list.
772			 */
773			ovh = rp->er_start - newstart - size;
774			if ((flags & EX_FAST) || (ovh == 0))
775				goto found;
776
777			/*
778			 * Don't exactly fit, but check to see
779			 * if we're better than any current choice.
780			 */
781			if ((bestovh == 0) || (ovh < bestovh)) {
782				bestovh = ovh;
783				beststart = newstart;
784				bestlast = last;
785			}
786		}
787
788skip:
789		/*
790		 * Skip past the current region and check again.
791		 */
792		newstart = extent_align((rp->er_end + 1), alignment, skew);
793		if (newstart < rp->er_end) {
794			/*
795			 * Overflow condition.  Don't error out, since
796			 * we might have a chunk of space that we can
797			 * use.
798			 */
799			goto fail;
800		}
801
802		last = rp;
803	}
804
805	/*
806	 * The final check is from the current starting point to the
807	 * end of the subregion.  If there were no allocated regions,
808	 * "newstart" is set to the beginning of the subregion, or
809	 * just past the end of the last allocated region, adjusted
810	 * for alignment in either case.
811	 */
812	if (LE_OV(newstart, (size - 1), subend)) {
813		/*
814		 * Do a boundary check, if necessary.  Note
815		 * that a region may *begin* on the boundary,
816		 * but it must end before the boundary.
817		 */
818		if (boundary) {
819			newend = newstart + (size - 1);
820
821			/*
822			 * Calculate the next boundary after the start
823			 * of this region.
824			 */
825			dontcross = extent_align(newstart+1, boundary,
826			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
827			    - 1;
828
829#if 0
830			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
831			    newstart, newend, ex->ex_start, ex->ex_end,
832			    boundary, dontcross);
833#endif
834
835			/* Check for overflow */
836			if (dontcross < ex->ex_start)
837				dontcross = ex->ex_end;
838			else if (newend > dontcross) {
839				/*
840				 * Candidate region crosses boundary.
841				 * Throw away the leading part and see
842				 * if we still fit.
843				 */
844				newstart = dontcross + 1;
845				newend = newstart + (size - 1);
846				dontcross += boundary;
847				if (!LE_OV(newstart, (size - 1), subend))
848					goto fail;
849			}
850
851			/*
852			 * If we run past the end of
853			 * the extent or the boundary
854			 * overflows, then the request
855			 * can't fit.
856			 */
857			if (newstart + size - 1 > ex->ex_end ||
858			    dontcross < newstart)
859				goto fail;
860		}
861
862		/*
863		 * We would fit into this space.  Calculate
864		 * the overhead (wasted space).  If we exactly
865		 * fit, or we're taking the first fit, insert
866		 * ourselves into the region list.
867		 */
868		ovh = exend - newstart - (size - 1);
869		if ((flags & EX_FAST) || (ovh == 0))
870			goto found;
871
872		/*
873		 * Don't exactly fit, but check to see
874		 * if we're better than any current choice.
875		 */
876		if ((bestovh == 0) || (ovh < bestovh)) {
877			bestovh = ovh;
878			beststart = newstart;
879			bestlast = last;
880		}
881	}
882
883 fail:
884	/*
885	 * One of the following two conditions have
886	 * occurred:
887	 *
888	 *	There is no chunk large enough to hold the request.
889	 *
890	 *	If EX_FAST was not specified, there is not an
891	 *	exact match for the request.
892	 *
893	 * Note that if we reach this point and EX_FAST is
894	 * set, then we know there is no space in the extent for
895	 * the request.
896	 */
897	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
898		/*
899		 * We have a match that's "good enough".
900		 */
901		newstart = beststart;
902		last = bestlast;
903		goto found;
904	}
905
906	/*
907	 * No space currently available.  Wait for it to free up,
908	 * if possible.
909	 */
910	if (flags & EX_WAITSPACE) {
911		ex->ex_flags |= EXF_WANTED;
912		error = tsleep(ex,
913		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
914		if (error)
915			return (error);
916		goto alloc_start;
917	}
918
919	extent_free_region_descriptor(ex, myrp);
920	return (EAGAIN);
921
922 found:
923	/*
924	 * Insert ourselves into the region list.
925	 */
926	extent_insert_and_optimize(ex, newstart, size, last, myrp);
927	*result = newstart;
928	return (0);
929}
930
931int
932extent_free(struct extent *ex, u_long start, u_long size, int flags)
933{
934	struct extent_region *rp, *nrp = NULL;
935	u_long end = start + (size - 1);
936	int exflags;
937
938#ifdef DIAGNOSTIC
939	/* Check arguments. */
940	if (ex == NULL)
941		panic("extent_free: NULL extent");
942	if ((start < ex->ex_start) || (end > ex->ex_end)) {
943		extent_print(ex);
944		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
945		    ex->ex_name, start, size);
946		panic("extent_free: extent `%s', region not within extent",
947		    ex->ex_name);
948	}
949	/* Check for an overflow. */
950	if (end < start) {
951		extent_print(ex);
952		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
953		    ex->ex_name, start, size);
954		panic("extent_free: overflow");
955	}
956#endif
957
958	/*
959	 * If we're allowing coalescing, we must allocate a region
960	 * descriptor now, since it might block.
961	 *
962	 * XXX Make a static, create-time flags word, so we don't
963	 * XXX have to lock to read it!
964	 */
965	exflags = ex->ex_flags;
966
967	if ((exflags & EXF_NOCOALESCE) == 0) {
968		/* Allocate a region descriptor. */
969		nrp = extent_alloc_region_descriptor(ex, flags);
970		if (nrp == NULL)
971			return (ENOMEM);
972	}
973
974	/*
975	 * Find region and deallocate.  Several possibilities:
976	 *
977	 *	1. (start == er_start) && (end == er_end):
978	 *	   Free descriptor.
979	 *
980	 *	2. (start == er_start) && (end < er_end):
981	 *	   Adjust er_start.
982	 *
983	 *	3. (start > er_start) && (end == er_end):
984	 *	   Adjust er_end.
985	 *
986	 *	4. (start > er_start) && (end < er_end):
987	 *	   Fragment region.  Requires descriptor alloc.
988	 *
989	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
990	 * is not set.
991	 */
992	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
993		/*
994		 * Save ourselves some comparisons; does the current
995		 * region end before chunk to be freed begins?  If so,
996		 * then we haven't found the appropriate region descriptor.
997		 */
998		if (rp->er_end < start)
999			continue;
1000
1001		/*
1002		 * Save ourselves some traversal; does the current
1003		 * region begin after the chunk to be freed ends?  If so,
1004		 * then we've already passed any possible region descriptors
1005		 * that might have contained the chunk to be freed.
1006		 */
1007		if (rp->er_start > end)
1008			break;
1009
1010		/* Case 1. */
1011		if ((start == rp->er_start) && (end == rp->er_end)) {
1012			LIST_REMOVE(rp, er_link);
1013			extent_free_region_descriptor(ex, rp);
1014			goto done;
1015		}
1016
1017		/*
1018		 * The following cases all require that EXF_NOCOALESCE
1019		 * is not set.
1020		 */
1021		if (ex->ex_flags & EXF_NOCOALESCE)
1022			continue;
1023
1024		/* Case 2. */
1025		if ((start == rp->er_start) && (end < rp->er_end)) {
1026			rp->er_start = (end + 1);
1027			goto done;
1028		}
1029
1030		/* Case 3. */
1031		if ((start > rp->er_start) && (end == rp->er_end)) {
1032			rp->er_end = (start - 1);
1033			goto done;
1034		}
1035
1036		/* Case 4. */
1037		if ((start > rp->er_start) && (end < rp->er_end)) {
1038			/* Fill in new descriptor. */
1039			nrp->er_start = end + 1;
1040			nrp->er_end = rp->er_end;
1041
1042			/* Adjust current descriptor. */
1043			rp->er_end = start - 1;
1044
1045			/* Insert new descriptor after current. */
1046			LIST_INSERT_AFTER(rp, nrp, er_link);
1047
1048			/* We used the new descriptor, so don't free it below */
1049			nrp = NULL;
1050			goto done;
1051		}
1052	}
1053
1054	/* Region not found, or request otherwise invalid. */
1055#if defined(DIAGNOSTIC) || defined(DDB)
1056	extent_print(ex);
1057#endif
1058	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1059	panic("extent_free: region not found");
1060
1061 done:
1062	if (nrp != NULL)
1063		extent_free_region_descriptor(ex, nrp);
1064	if (ex->ex_flags & EXF_WANTED) {
1065		ex->ex_flags &= ~EXF_WANTED;
1066		wakeup(ex);
1067	}
1068	return (0);
1069}
1070
1071static struct extent_region *
1072extent_alloc_region_descriptor(struct extent *ex, int flags)
1073{
1074	struct extent_region *rp;
1075
1076	if (ex->ex_flags & EXF_FIXED) {
1077		struct extent_fixed *fex = (struct extent_fixed *)ex;
1078
1079		while (LIST_EMPTY(&fex->fex_freelist)) {
1080			if (flags & EX_MALLOCOK)
1081				goto alloc;
1082
1083			if ((flags & EX_WAITOK) == 0)
1084				return (NULL);
1085			ex->ex_flags |= EXF_FLWANTED;
1086			if (tsleep(&fex->fex_freelist,
1087			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1088			    "extnt", 0))
1089				return (NULL);
1090		}
1091		rp = LIST_FIRST(&fex->fex_freelist);
1092		LIST_REMOVE(rp, er_link);
1093
1094		/*
1095		 * Don't muck with flags after pulling it off the
1096		 * freelist; it may be a dynamically allocated
1097		 * region pointer that was kindly given to us,
1098		 * and we need to preserve that information.
1099		 */
1100
1101		return (rp);
1102	}
1103
1104 alloc:
1105	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK : 0);
1106	if (rp != NULL)
1107		rp->er_flags = ER_ALLOC;
1108
1109	return (rp);
1110}
1111
1112static void
1113extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1114{
1115	if (ex->ex_flags & EXF_FIXED) {
1116		struct extent_fixed *fex = (struct extent_fixed *)ex;
1117
1118		/*
1119		 * If someone's waiting for a region descriptor,
1120		 * be nice and give them this one, rather than
1121		 * just free'ing it back to the system.
1122		 */
1123		if (rp->er_flags & ER_ALLOC) {
1124			if (ex->ex_flags & EXF_FLWANTED) {
1125				/* Clear all but ER_ALLOC flag. */
1126				rp->er_flags = ER_ALLOC;
1127				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1128				    er_link);
1129				goto wake_em_up;
1130			} else {
1131				pool_put(&ex_region_pl, rp);
1132			}
1133		} else {
1134			/* Clear all flags. */
1135			rp->er_flags = 0;
1136			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1137		}
1138
1139		if (ex->ex_flags & EXF_FLWANTED) {
1140 wake_em_up:
1141			ex->ex_flags &= ~EXF_FLWANTED;
1142			wakeup(&fex->fex_freelist);
1143		}
1144		return;
1145	}
1146
1147	/*
1148	 * We know it's dynamically allocated if we get here.
1149	 */
1150	pool_put(&ex_region_pl, rp);
1151}
1152
1153
1154#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1155
1156void
1157extent_print(struct extent *ex)
1158{
1159	extent_print1(ex, printf);
1160}
1161
1162void
1163extent_print1(struct extent *ex, int (*pr)(const char *, ...))
1164{
1165	struct extent_region *rp;
1166
1167	if (ex == NULL)
1168		panic("extent_print: NULL extent");
1169
1170#ifdef _KERNEL
1171	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1172	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1173#else
1174	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1175	    ex->ex_start, ex->ex_end, ex->ex_flags);
1176#endif
1177
1178	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1179		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1180}
1181#endif
1182