subr_extent.c revision 1.54
1/*	$OpenBSD: subr_extent.c,v 1.54 2014/09/14 14:17:25 jsg Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/queue.h>
44#include <sys/pool.h>
45#include <ddb/db_output.h>
46#else
47/*
48 * user-land definitions, so it can fit into a testing harness.
49 */
50#include <sys/param.h>
51#include <sys/extent.h>
52#include <sys/queue.h>
53#include <errno.h>
54#include <err.h>
55#include <stdlib.h>
56#include <stdio.h>
57#include <string.h>
58
59#define	malloc(s, t, flags)		malloc(s)
60#define	free(p, t, s)			free(p)
61
62#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
63#define	wakeup(chan)			((void)0)
64
65struct pool {
66	size_t pr_size;
67};
68
69#define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
70#define	pool_setipl(pp, ipl)		/* nothing */
71#define pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
72#define	pool_put(pp, rp)		free((rp), 0, 0)
73
74#define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
75#endif
76
77#if defined(DIAGNOSTIC) || defined(DDB)
78void	extent_print1(struct extent *, int (*)(const char *, ...)
79	    __attribute__((__format__(__kprintf__,1,2))));
80#endif
81
82static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
83	    struct extent_region *, struct extent_region *);
84static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
85static	void extent_free_region_descriptor(struct extent *,
86	    struct extent_region *);
87int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
88	    u_long, u_long, int, struct extent_region *, u_long *);
89
90/*
91 * Shortcut to align to an arbitrary power-of-two boundary.
92 */
93static __inline__ u_long
94extent_align(u_long start, u_long align, u_long skew)
95{
96	return ((((start - skew) + (align - 1)) & (-align)) + skew);
97}
98
99
100#if defined(DIAGNOSTIC) || defined(DDB)
101/*
102 * Register the extent on a doubly linked list.
103 * Should work, no?
104 */
105static LIST_HEAD(listhead, extent) ext_list;
106static	void extent_register(struct extent *);
107
108static void
109extent_register(struct extent *ex)
110{
111#ifdef DIAGNOSTIC
112	struct extent *ep;
113#endif
114	static int initialized;
115
116	if (!initialized){
117		LIST_INIT(&ext_list);
118		initialized = 1;
119	}
120
121#ifdef DIAGNOSTIC
122	LIST_FOREACH(ep, &ext_list, ex_link) {
123		if (ep == ex)
124			panic("%s: already registered", __func__);
125	}
126#endif
127
128	/* Insert into list */
129	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
130}
131#endif	/* DIAGNOSTIC || DDB */
132
133struct pool ex_region_pl;
134
135static void
136extent_pool_init(void)
137{
138	static int inited;
139
140	if (!inited) {
141		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
142		    "extentpl", NULL);
143		pool_setipl(&ex_region_pl, IPL_VM);
144		inited = 1;
145	}
146}
147
148#ifdef DDB
149/*
150 * Print out all extents registered.  This is used in
151 * DDB show extents
152 */
153void
154extent_print_all(void)
155{
156	struct extent *ep;
157
158	LIST_FOREACH(ep, &ext_list, ex_link) {
159		extent_print1(ep, db_printf);
160	}
161}
162#endif
163
164/*
165 * Allocate and initialize an extent map.
166 */
167struct extent *
168extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
169    size_t storagesize, int flags)
170{
171	struct extent *ex;
172	caddr_t cp = storage;
173	size_t sz = storagesize;
174	struct extent_region *rp;
175	int fixed_extent = (storage != NULL);
176
177#ifdef DIAGNOSTIC
178	/* Check arguments. */
179	if (name == NULL)
180		panic("%s: name == NULL", __func__);
181	if (end < start) {
182		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
183		    __func__, name, start, end);
184		panic("%s: end < start", __func__);
185	}
186	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
187		panic("%s: fixed extent, bad storagesize 0x%lx",
188		    __func__, (u_long)storagesize);
189	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
190		panic("%s: storage provided for non-fixed", __func__);
191#endif
192
193	extent_pool_init();
194
195	/* Allocate extent descriptor. */
196	if (fixed_extent) {
197		struct extent_fixed *fex;
198
199		memset(storage, 0, storagesize);
200
201		/*
202		 * Align all descriptors on "long" boundaries.
203		 */
204		fex = (struct extent_fixed *)cp;
205		ex = (struct extent *)fex;
206		cp += ALIGN(sizeof(struct extent_fixed));
207		sz -= ALIGN(sizeof(struct extent_fixed));
208		fex->fex_storage = storage;
209		fex->fex_storagesize = storagesize;
210
211		/*
212		 * In a fixed extent, we have to pre-allocate region
213		 * descriptors and place them in the extent's freelist.
214		 */
215		LIST_INIT(&fex->fex_freelist);
216		while (sz >= ALIGN(sizeof(struct extent_region))) {
217			rp = (struct extent_region *)cp;
218			cp += ALIGN(sizeof(struct extent_region));
219			sz -= ALIGN(sizeof(struct extent_region));
220			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
221		}
222	} else {
223		ex = (struct extent *)malloc(sizeof(struct extent),
224		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
225		if (ex == NULL)
226			return (NULL);
227	}
228
229	/* Fill in the extent descriptor and return it to the caller. */
230	LIST_INIT(&ex->ex_regions);
231	ex->ex_name = name;
232	ex->ex_start = start;
233	ex->ex_end = end;
234	ex->ex_mtype = mtype;
235	ex->ex_flags = 0;
236	if (fixed_extent)
237		ex->ex_flags |= EXF_FIXED;
238	if (flags & EX_NOCOALESCE)
239		ex->ex_flags |= EXF_NOCOALESCE;
240
241	if (flags & EX_FILLED) {
242		rp = extent_alloc_region_descriptor(ex, flags);
243		if (rp == NULL) {
244			if (!fixed_extent)
245				free(ex, mtype, 0);
246			return (NULL);
247		}
248		rp->er_start = start;
249		rp->er_end = end;
250		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
251	}
252
253#if defined(DIAGNOSTIC) || defined(DDB)
254	extent_register(ex);
255#endif
256	return (ex);
257}
258
259/*
260 * Destroy an extent map.
261 */
262void
263extent_destroy(struct extent *ex)
264{
265	struct extent_region *rp, *orp;
266
267#ifdef DIAGNOSTIC
268	/* Check arguments. */
269	if (ex == NULL)
270		panic("%s: NULL extent", __func__);
271#endif
272
273	/* Free all region descriptors in extent. */
274	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
275		orp = rp;
276		rp = LIST_NEXT(rp, er_link);
277		LIST_REMOVE(orp, er_link);
278		extent_free_region_descriptor(ex, orp);
279	}
280
281#if defined(DIAGNOSTIC) || defined(DDB)
282	/* Remove from the list of all extents. */
283	LIST_REMOVE(ex, ex_link);
284#endif
285
286	/* If we're not a fixed extent, free the extent descriptor itself. */
287	if ((ex->ex_flags & EXF_FIXED) == 0)
288		free(ex, ex->ex_mtype, 0);
289}
290
291/*
292 * Insert a region descriptor into the sorted region list after the
293 * entry "after" or at the head of the list (if "after" is NULL).
294 * The region descriptor we insert is passed in "rp".  We must
295 * allocate the region descriptor before calling this function!
296 * If we don't need the region descriptor, it will be freed here.
297 */
298static void
299extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
300    struct extent_region *after, struct extent_region *rp)
301{
302	struct extent_region *nextr;
303	int appended = 0;
304
305	if (after == NULL) {
306		/*
307		 * We're the first in the region list.  If there's
308		 * a region after us, attempt to coalesce to save
309		 * descriptor overhead.
310		 */
311		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
312		    !LIST_EMPTY(&ex->ex_regions) &&
313		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
314			/*
315			 * We can coalesce.  Prepend us to the first region.
316			 */
317			LIST_FIRST(&ex->ex_regions)->er_start = start;
318			extent_free_region_descriptor(ex, rp);
319			return;
320		}
321
322		/*
323		 * Can't coalesce.  Fill in the region descriptor
324		 * in, and insert us at the head of the region list.
325		 */
326		rp->er_start = start;
327		rp->er_end = start + (size - 1);
328		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
329		return;
330	}
331
332	/*
333	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
334	 */
335	if (ex->ex_flags & EXF_NOCOALESCE)
336		goto cant_coalesce;
337
338	/*
339	 * Attempt to coalesce with the region before us.
340	 */
341	if ((after->er_end + 1) == start) {
342		/*
343		 * We can coalesce.  Append ourselves and make
344		 * note of it.
345		 */
346		after->er_end = start + (size - 1);
347		appended = 1;
348	}
349
350	/*
351	 * Attempt to coalesce with the region after us.
352	 */
353	if (LIST_NEXT(after, er_link) != NULL &&
354	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
355		/*
356		 * We can coalesce.  Note that if we appended ourselves
357		 * to the previous region, we exactly fit the gap, and
358		 * can free the "next" region descriptor.
359		 */
360		if (appended) {
361			/*
362			 * Yup, we can free it up.
363			 */
364			after->er_end = LIST_NEXT(after, er_link)->er_end;
365			nextr = LIST_NEXT(after, er_link);
366			LIST_REMOVE(nextr, er_link);
367			extent_free_region_descriptor(ex, nextr);
368		} else {
369			/*
370			 * Nope, just prepend us to the next region.
371			 */
372			LIST_NEXT(after, er_link)->er_start = start;
373		}
374
375		extent_free_region_descriptor(ex, rp);
376		return;
377	}
378
379	/*
380	 * We weren't able to coalesce with the next region, but
381	 * we don't need to allocate a region descriptor if we
382	 * appended ourselves to the previous region.
383	 */
384	if (appended) {
385		extent_free_region_descriptor(ex, rp);
386		return;
387	}
388
389 cant_coalesce:
390
391	/*
392	 * Fill in the region descriptor and insert ourselves
393	 * into the region list.
394	 */
395	rp->er_start = start;
396	rp->er_end = start + (size - 1);
397	LIST_INSERT_AFTER(after, rp, er_link);
398}
399
400/*
401 * Allocate a specific region in an extent map.
402 */
403int
404extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
405{
406	struct extent_region *rp, *last, *myrp;
407	u_long end = start + (size - 1);
408	int error;
409
410#ifdef DIAGNOSTIC
411	/* Check arguments. */
412	if (ex == NULL)
413		panic("%s: NULL extent", __func__);
414	if (size < 1) {
415		printf("%s: extent `%s', size 0x%lx\n",
416		    __func__, ex->ex_name, size);
417		panic("%s: bad size", __func__);
418	}
419	if (end < start) {
420		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
421		    __func__, ex->ex_name, start, size);
422		panic("%s: overflow", __func__);
423	}
424	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
425		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
426		    "are mutually exclusive", __func__);
427#endif
428
429	/*
430	 * Make sure the requested region lies within the
431	 * extent.
432	 */
433	if ((start < ex->ex_start) || (end > ex->ex_end)) {
434#ifdef DIAGNOSTIC
435		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
436		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
437		printf("%s: start 0x%lx, end 0x%lx\n",
438		    __func__, start, end);
439		panic("%s: region lies outside extent", __func__);
440#else
441		return (EINVAL);
442#endif
443	}
444
445	/*
446	 * Allocate the region descriptor.  It will be freed later
447	 * if we can coalesce with another region.
448	 */
449	myrp = extent_alloc_region_descriptor(ex, flags);
450	if (myrp == NULL) {
451#ifdef DIAGNOSTIC
452		printf(
453		    "%s: can't allocate region descriptor\n", __func__);
454#endif
455		return (ENOMEM);
456	}
457
458 alloc_start:
459	/*
460	 * Attempt to place ourselves in the desired area of the
461	 * extent.  We save ourselves some work by keeping the list sorted.
462	 * In other words, if the start of the current region is greater
463	 * than the end of our region, we don't have to search any further.
464	 */
465
466	/*
467	 * Keep a pointer to the last region we looked at so
468	 * that we don't have to traverse the list again when
469	 * we insert ourselves.  If "last" is NULL when we
470	 * finally insert ourselves, we go at the head of the
471	 * list.  See extent_insert_and_optimize() for details.
472	 */
473	last = NULL;
474
475	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
476		if (rp->er_start > end) {
477			/*
478			 * We lie before this region and don't
479			 * conflict.
480			 */
481			break;
482		}
483
484		/*
485		 * The current region begins before we end.
486		 * Check for a conflict.
487		 */
488		if (rp->er_end >= start) {
489			/*
490			 * We conflict.  If we can (and want to) wait,
491			 * do so.
492			 */
493			if (flags & EX_WAITSPACE) {
494				ex->ex_flags |= EXF_WANTED;
495				error = tsleep(ex,
496				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
497				    "extnt", 0);
498				if (error)
499					return (error);
500				goto alloc_start;
501			}
502
503			/*
504			 * If we tolerate conflicts adjust things such
505			 * that all space in the requested region is
506			 * allocated.
507			 */
508			if (flags & EX_CONFLICTOK) {
509				/*
510				 * There are four possibilities:
511				 *
512				 * 1. The current region overlaps with
513				 *    the start of the requested region.
514				 *    Adjust the requested region to
515				 *    start at the end of the current
516				 *    region and try again.
517				 *
518				 * 2. The current region falls
519				 *    completely within the requested
520				 *    region.  Free the current region
521				 *    and try again.
522				 *
523				 * 3. The current region overlaps with
524				 *    the end of the requested region.
525				 *    Adjust the requested region to
526				 *    end at the start of the current
527				 *    region and try again.
528				 *
529				 * 4. The requested region falls
530				 *    completely within the current
531				 *    region.  We're done.
532				 */
533				if (rp->er_start <= start) {
534					if (rp->er_end < ex->ex_end) {
535						start = rp->er_end + 1;
536						size = end - start + 1;
537						goto alloc_start;
538					}
539				} else if (rp->er_end < end) {
540					LIST_REMOVE(rp, er_link);
541					extent_free_region_descriptor(ex, rp);
542					goto alloc_start;
543				} else if (rp->er_start < end) {
544					if (rp->er_start > ex->ex_start) {
545						end = rp->er_start - 1;
546						size = end - start + 1;
547						goto alloc_start;
548					}
549				}
550				return (0);
551			}
552
553			extent_free_region_descriptor(ex, myrp);
554			return (EAGAIN);
555		}
556		/*
557		 * We don't conflict, but this region lies before
558		 * us.  Keep a pointer to this region, and keep
559		 * trying.
560		 */
561		last = rp;
562	}
563
564	/*
565	 * We don't conflict with any regions.  "last" points
566	 * to the region we fall after, or is NULL if we belong
567	 * at the beginning of the region list.  Insert ourselves.
568	 */
569	extent_insert_and_optimize(ex, start, size, last, myrp);
570	return (0);
571}
572
573/*
574 * Macro to check (x + y) <= z.  This check is designed to fail
575 * if an overflow occurs.
576 */
577#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
578
579/*
580 * Allocate a region in an extent map subregion.
581 *
582 * If EX_FAST is specified, we return the first fit in the map.
583 * Otherwise, we try to minimize fragmentation by finding the
584 * smallest gap that will hold the request.
585 *
586 * The allocated region is aligned to "alignment", which must be
587 * a power of 2.
588 */
589int
590extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
591    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
592    struct extent_region *myrp, u_long *result)
593{
594	struct extent_region *rp, *last, *bestlast;
595	u_long newstart, newend, exend, beststart, bestovh, ovh;
596	u_long dontcross;
597	int error;
598
599#ifdef DIAGNOSTIC
600	/* Check arguments. */
601	if (ex == NULL)
602		panic("%s: NULL extent", __func__);
603	if (myrp == NULL)
604		panic("%s: NULL region descriptor", __func__);
605	if (result == NULL)
606		panic("%s: NULL result pointer", __func__);
607	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
608	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
609		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
610		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
611		printf("%s: substart 0x%lx, subend 0x%lx\n",
612		    __func__, substart, subend);
613		panic("%s: bad subregion", __func__);
614	}
615	if ((size < 1) || ((size - 1) > (subend - substart))) {
616		printf("%s: extent `%s', size 0x%lx\n",
617		    __func__, ex->ex_name, size);
618		panic("%s: bad size", __func__);
619	}
620	if (alignment == 0)
621		panic("%s: bad alignment", __func__);
622	if (boundary && (boundary < size)) {
623		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
624		    __func__, ex->ex_name, size, boundary);
625		panic("%s: bad boundary", __func__);
626	}
627#endif
628
629 alloc_start:
630	/*
631	 * Keep a pointer to the last region we looked at so
632	 * that we don't have to traverse the list again when
633	 * we insert ourselves.  If "last" is NULL when we
634	 * finally insert ourselves, we go at the head of the
635	 * list.  See extent_insert_and_optimize() for details.
636	 */
637	last = NULL;
638
639	/*
640	 * Keep track of size and location of the smallest
641	 * chunk we fit in.
642	 *
643	 * Since the extent can be as large as the numeric range
644	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
645	 * best overhead value can be the maximum unsigned integer.
646	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
647	 * into the region list immediately on an exact match (which
648	 * is the only case where "bestovh" would be set to 0).
649	 */
650	bestovh = 0;
651	beststart = 0;
652	bestlast = NULL;
653
654	/*
655	 * Keep track of end of free region.  This is either the end of extent
656	 * or the start of a region past the subend.
657	 */
658	exend = ex->ex_end;
659
660	/*
661	 * For N allocated regions, we must make (N + 1)
662	 * checks for unallocated space.  The first chunk we
663	 * check is the area from the beginning of the subregion
664	 * to the first allocated region after that point.
665	 */
666	newstart = extent_align(substart, alignment, skew);
667	if (newstart < ex->ex_start) {
668#ifdef DIAGNOSTIC
669		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
670		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
671		    alignment);
672		panic("%s: overflow after alignment", __func__);
673#else
674		extent_free_region_descriptor(ex, myrp);
675		return (EINVAL);
676#endif
677	}
678
679	/*
680	 * Find the first allocated region that begins on or after
681	 * the subregion start, advancing the "last" pointer along
682	 * the way.
683	 */
684	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
685		if (rp->er_start >= newstart)
686			break;
687		last = rp;
688	}
689
690	/*
691	 * Relocate the start of our candidate region to the end of
692	 * the last allocated region (if there was one overlapping
693	 * our subrange).
694	 */
695	if (last != NULL && last->er_end >= newstart)
696		newstart = extent_align((last->er_end + 1), alignment, skew);
697
698	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
699		/*
700		 * If the region pasts the subend, bail out and see
701		 * if we fit against the subend.
702		 */
703		if (rp->er_start > subend) {
704			exend = rp->er_start;
705			break;
706		}
707
708		/*
709		 * Check the chunk before "rp".  Note that our
710		 * comparison is safe from overflow conditions.
711		 */
712		if (LE_OV(newstart, size, rp->er_start)) {
713			/*
714			 * Do a boundary check, if necessary.  Note
715			 * that a region may *begin* on the boundary,
716			 * but it must end before the boundary.
717			 */
718			if (boundary) {
719				newend = newstart + (size - 1);
720
721				/*
722				 * Calculate the next boundary after the start
723				 * of this region.
724				 */
725				dontcross = extent_align(newstart+1, boundary,
726				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
727				    - 1;
728
729#if 0
730				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
731				    newstart, newend, ex->ex_start, ex->ex_end,
732				    boundary, dontcross);
733#endif
734
735				/* Check for overflow */
736				if (dontcross < ex->ex_start)
737					dontcross = ex->ex_end;
738				else if (newend > dontcross) {
739					/*
740					 * Candidate region crosses boundary.
741					 * Throw away the leading part and see
742					 * if we still fit.
743					 */
744					newstart = dontcross + 1;
745					newend = newstart + (size - 1);
746					dontcross += boundary;
747					if (!LE_OV(newstart, size, rp->er_start))
748						goto skip;
749				}
750
751				/*
752				 * If we run past the end of
753				 * the extent or the boundary
754				 * overflows, then the request
755				 * can't fit.
756				 */
757				if (newstart + size - 1 > ex->ex_end ||
758				    dontcross < newstart)
759					goto fail;
760			}
761
762			/*
763			 * We would fit into this space.  Calculate
764			 * the overhead (wasted space).  If we exactly
765			 * fit, or we're taking the first fit, insert
766			 * ourselves into the region list.
767			 */
768			ovh = rp->er_start - newstart - size;
769			if ((flags & EX_FAST) || (ovh == 0))
770				goto found;
771
772			/*
773			 * Don't exactly fit, but check to see
774			 * if we're better than any current choice.
775			 */
776			if ((bestovh == 0) || (ovh < bestovh)) {
777				bestovh = ovh;
778				beststart = newstart;
779				bestlast = last;
780			}
781		}
782
783skip:
784		/*
785		 * Skip past the current region and check again.
786		 */
787		newstart = extent_align((rp->er_end + 1), alignment, skew);
788		if (newstart < rp->er_end) {
789			/*
790			 * Overflow condition.  Don't error out, since
791			 * we might have a chunk of space that we can
792			 * use.
793			 */
794			goto fail;
795		}
796
797		last = rp;
798	}
799
800	/*
801	 * The final check is from the current starting point to the
802	 * end of the subregion.  If there were no allocated regions,
803	 * "newstart" is set to the beginning of the subregion, or
804	 * just past the end of the last allocated region, adjusted
805	 * for alignment in either case.
806	 */
807	if (LE_OV(newstart, (size - 1), subend)) {
808		/*
809		 * Do a boundary check, if necessary.  Note
810		 * that a region may *begin* on the boundary,
811		 * but it must end before the boundary.
812		 */
813		if (boundary) {
814			newend = newstart + (size - 1);
815
816			/*
817			 * Calculate the next boundary after the start
818			 * of this region.
819			 */
820			dontcross = extent_align(newstart+1, boundary,
821			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
822			    - 1;
823
824#if 0
825			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
826			    newstart, newend, ex->ex_start, ex->ex_end,
827			    boundary, dontcross);
828#endif
829
830			/* Check for overflow */
831			if (dontcross < ex->ex_start)
832				dontcross = ex->ex_end;
833			else if (newend > dontcross) {
834				/*
835				 * Candidate region crosses boundary.
836				 * Throw away the leading part and see
837				 * if we still fit.
838				 */
839				newstart = dontcross + 1;
840				newend = newstart + (size - 1);
841				dontcross += boundary;
842				if (!LE_OV(newstart, (size - 1), subend))
843					goto fail;
844			}
845
846			/*
847			 * If we run past the end of
848			 * the extent or the boundary
849			 * overflows, then the request
850			 * can't fit.
851			 */
852			if (newstart + size - 1 > ex->ex_end ||
853			    dontcross < newstart)
854				goto fail;
855		}
856
857		/*
858		 * We would fit into this space.  Calculate
859		 * the overhead (wasted space).  If we exactly
860		 * fit, or we're taking the first fit, insert
861		 * ourselves into the region list.
862		 */
863		ovh = exend - newstart - (size - 1);
864		if ((flags & EX_FAST) || (ovh == 0))
865			goto found;
866
867		/*
868		 * Don't exactly fit, but check to see
869		 * if we're better than any current choice.
870		 */
871		if ((bestovh == 0) || (ovh < bestovh)) {
872			bestovh = ovh;
873			beststart = newstart;
874			bestlast = last;
875		}
876	}
877
878 fail:
879	/*
880	 * One of the following two conditions have
881	 * occurred:
882	 *
883	 *	There is no chunk large enough to hold the request.
884	 *
885	 *	If EX_FAST was not specified, there is not an
886	 *	exact match for the request.
887	 *
888	 * Note that if we reach this point and EX_FAST is
889	 * set, then we know there is no space in the extent for
890	 * the request.
891	 */
892	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
893		/*
894		 * We have a match that's "good enough".
895		 */
896		newstart = beststart;
897		last = bestlast;
898		goto found;
899	}
900
901	/*
902	 * No space currently available.  Wait for it to free up,
903	 * if possible.
904	 */
905	if (flags & EX_WAITSPACE) {
906		ex->ex_flags |= EXF_WANTED;
907		error = tsleep(ex,
908		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
909		if (error)
910			return (error);
911		goto alloc_start;
912	}
913
914	extent_free_region_descriptor(ex, myrp);
915	return (EAGAIN);
916
917 found:
918	/*
919	 * Insert ourselves into the region list.
920	 */
921	extent_insert_and_optimize(ex, newstart, size, last, myrp);
922	*result = newstart;
923	return (0);
924}
925
926int
927extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
928    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
929    u_long *result)
930{
931	struct extent_region *rp;
932
933	/*
934	 * Allocate the region descriptor.  It will be freed later
935	 * if we can coalesce with another region.
936	 */
937	rp = extent_alloc_region_descriptor(ex, flags);
938	if (rp == NULL) {
939#ifdef DIAGNOSTIC
940		printf("%s: can't allocate region descriptor\n", __func__);
941#endif
942		return (ENOMEM);
943	}
944
945	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
946	    boundary, flags, rp, result);
947}
948
949int
950extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
951    u_long subend, u_long size, u_long alignment, u_long skew,
952    u_long boundary, int flags, struct extent_region *rp, u_long *result)
953{
954#ifdef DIAGNOSTIC
955	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
956		panic("%s: EX_NOCOALESCE not set", __func__);
957#endif
958
959	rp->er_flags = ER_DISCARD;
960	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
961	    boundary, flags, rp, result);
962}
963
964int
965extent_free(struct extent *ex, u_long start, u_long size, int flags)
966{
967	struct extent_region *rp, *nrp = NULL;
968	u_long end = start + (size - 1);
969	int exflags;
970
971#ifdef DIAGNOSTIC
972	/* Check arguments. */
973	if (ex == NULL)
974		panic("%s: NULL extent", __func__);
975	if ((start < ex->ex_start) || (end > ex->ex_end)) {
976		extent_print(ex);
977		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
978		    __func__, ex->ex_name, start, size);
979		panic("%s: extent `%s', region not within extent",
980		    __func__, ex->ex_name);
981	}
982	/* Check for an overflow. */
983	if (end < start) {
984		extent_print(ex);
985		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
986		    __func__, ex->ex_name, start, size);
987		panic("%s: overflow", __func__);
988	}
989#endif
990
991	/*
992	 * If we're allowing coalescing, we must allocate a region
993	 * descriptor now, since it might block.
994	 *
995	 * XXX Make a static, create-time flags word, so we don't
996	 * XXX have to lock to read it!
997	 */
998	exflags = ex->ex_flags;
999
1000	if ((exflags & EXF_NOCOALESCE) == 0) {
1001		/* Allocate a region descriptor. */
1002		nrp = extent_alloc_region_descriptor(ex, flags);
1003		if (nrp == NULL)
1004			return (ENOMEM);
1005	}
1006
1007	/*
1008	 * Find region and deallocate.  Several possibilities:
1009	 *
1010	 *	1. (start == er_start) && (end == er_end):
1011	 *	   Free descriptor.
1012	 *
1013	 *	2. (start == er_start) && (end < er_end):
1014	 *	   Adjust er_start.
1015	 *
1016	 *	3. (start > er_start) && (end == er_end):
1017	 *	   Adjust er_end.
1018	 *
1019	 *	4. (start > er_start) && (end < er_end):
1020	 *	   Fragment region.  Requires descriptor alloc.
1021	 *
1022	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1023	 * is not set.
1024	 */
1025	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1026		/*
1027		 * Save ourselves some comparisons; does the current
1028		 * region end before chunk to be freed begins?  If so,
1029		 * then we haven't found the appropriate region descriptor.
1030		 */
1031		if (rp->er_end < start)
1032			continue;
1033
1034		/*
1035		 * Save ourselves some traversal; does the current
1036		 * region begin after the chunk to be freed ends?  If so,
1037		 * then we've already passed any possible region descriptors
1038		 * that might have contained the chunk to be freed.
1039		 */
1040		if (rp->er_start > end)
1041			break;
1042
1043		/* Case 1. */
1044		if ((start == rp->er_start) && (end == rp->er_end)) {
1045			LIST_REMOVE(rp, er_link);
1046			extent_free_region_descriptor(ex, rp);
1047			goto done;
1048		}
1049
1050		/*
1051		 * The following cases all require that EXF_NOCOALESCE
1052		 * is not set.
1053		 */
1054		if (ex->ex_flags & EXF_NOCOALESCE)
1055			continue;
1056
1057		/* Case 2. */
1058		if ((start == rp->er_start) && (end < rp->er_end)) {
1059			rp->er_start = (end + 1);
1060			goto done;
1061		}
1062
1063		/* Case 3. */
1064		if ((start > rp->er_start) && (end == rp->er_end)) {
1065			rp->er_end = (start - 1);
1066			goto done;
1067		}
1068
1069		/* Case 4. */
1070		if ((start > rp->er_start) && (end < rp->er_end)) {
1071			/* Fill in new descriptor. */
1072			nrp->er_start = end + 1;
1073			nrp->er_end = rp->er_end;
1074
1075			/* Adjust current descriptor. */
1076			rp->er_end = start - 1;
1077
1078			/* Insert new descriptor after current. */
1079			LIST_INSERT_AFTER(rp, nrp, er_link);
1080
1081			/* We used the new descriptor, so don't free it below */
1082			nrp = NULL;
1083			goto done;
1084		}
1085	}
1086
1087	/* Region not found, or request otherwise invalid. */
1088#if defined(DIAGNOSTIC) || defined(DDB)
1089	extent_print(ex);
1090#endif
1091	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1092	panic("%s: region not found", __func__);
1093
1094 done:
1095	if (nrp != NULL)
1096		extent_free_region_descriptor(ex, nrp);
1097	if (ex->ex_flags & EXF_WANTED) {
1098		ex->ex_flags &= ~EXF_WANTED;
1099		wakeup(ex);
1100	}
1101	return (0);
1102}
1103
1104static struct extent_region *
1105extent_alloc_region_descriptor(struct extent *ex, int flags)
1106{
1107	struct extent_region *rp;
1108
1109	if (ex->ex_flags & EXF_FIXED) {
1110		struct extent_fixed *fex = (struct extent_fixed *)ex;
1111
1112		while (LIST_EMPTY(&fex->fex_freelist)) {
1113			if (flags & EX_MALLOCOK)
1114				goto alloc;
1115
1116			if ((flags & EX_WAITOK) == 0)
1117				return (NULL);
1118			ex->ex_flags |= EXF_FLWANTED;
1119			if (tsleep(&fex->fex_freelist,
1120			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1121			    "extnt", 0))
1122				return (NULL);
1123		}
1124		rp = LIST_FIRST(&fex->fex_freelist);
1125		LIST_REMOVE(rp, er_link);
1126
1127		/*
1128		 * Don't muck with flags after pulling it off the
1129		 * freelist; it may be a dynamically allocated
1130		 * region pointer that was kindly given to us,
1131		 * and we need to preserve that information.
1132		 */
1133
1134		return (rp);
1135	}
1136
1137 alloc:
1138	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1139	    PR_NOWAIT);
1140	if (rp != NULL)
1141		rp->er_flags = ER_ALLOC;
1142
1143	return (rp);
1144}
1145
1146static void
1147extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1148{
1149	if (rp->er_flags & ER_DISCARD)
1150		return;
1151
1152	if (ex->ex_flags & EXF_FIXED) {
1153		struct extent_fixed *fex = (struct extent_fixed *)ex;
1154
1155		/*
1156		 * If someone's waiting for a region descriptor,
1157		 * be nice and give them this one, rather than
1158		 * just free'ing it back to the system.
1159		 */
1160		if (rp->er_flags & ER_ALLOC) {
1161			if (ex->ex_flags & EXF_FLWANTED) {
1162				/* Clear all but ER_ALLOC flag. */
1163				rp->er_flags = ER_ALLOC;
1164				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1165				    er_link);
1166				goto wake_em_up;
1167			} else {
1168				pool_put(&ex_region_pl, rp);
1169			}
1170		} else {
1171			/* Clear all flags. */
1172			rp->er_flags = 0;
1173			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1174		}
1175
1176		if (ex->ex_flags & EXF_FLWANTED) {
1177 wake_em_up:
1178			ex->ex_flags &= ~EXF_FLWANTED;
1179			wakeup(&fex->fex_freelist);
1180		}
1181		return;
1182	}
1183
1184	/*
1185	 * We know it's dynamically allocated if we get here.
1186	 */
1187	pool_put(&ex_region_pl, rp);
1188}
1189
1190
1191#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1192
1193void
1194extent_print(struct extent *ex)
1195{
1196	extent_print1(ex, printf);
1197}
1198
1199void
1200extent_print1(struct extent *ex,
1201    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1202{
1203	struct extent_region *rp;
1204
1205	if (ex == NULL)
1206		panic("%s: NULL extent", __func__);
1207
1208#ifdef _KERNEL
1209	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1210	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1211#else
1212	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1213	    ex->ex_start, ex->ex_end, ex->ex_flags);
1214#endif
1215
1216	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1217		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1218}
1219#endif
1220