subr_extent.c revision 1.56
1/*	$OpenBSD: subr_extent.c,v 1.56 2015/03/14 03:38:50 jsg Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/systm.h>
42#include <sys/queue.h>
43#include <sys/pool.h>
44#include <ddb/db_output.h>
45#else
46/*
47 * user-land definitions, so it can fit into a testing harness.
48 */
49#include <sys/param.h>
50#include <sys/extent.h>
51#include <sys/queue.h>
52#include <errno.h>
53#include <err.h>
54#include <stdlib.h>
55#include <stdio.h>
56#include <string.h>
57
58#define	malloc(s, t, flags)		malloc(s)
59#define	free(p, t, s)			free(p)
60
61#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
62#define	wakeup(chan)			((void)0)
63
64struct pool {
65	size_t pr_size;
66};
67
68#define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
69#define	pool_setipl(pp, ipl)		/* nothing */
70#define pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
71#define	pool_put(pp, rp)		free((rp), 0, 0)
72
73#define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
74#endif
75
76#if defined(DIAGNOSTIC) || defined(DDB)
77void	extent_print1(struct extent *, int (*)(const char *, ...)
78	    __attribute__((__format__(__kprintf__,1,2))));
79#endif
80
81static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
82	    struct extent_region *, struct extent_region *);
83static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
84static	void extent_free_region_descriptor(struct extent *,
85	    struct extent_region *);
86int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
87	    u_long, u_long, int, struct extent_region *, u_long *);
88
89/*
90 * Shortcut to align to an arbitrary power-of-two boundary.
91 */
92static __inline__ u_long
93extent_align(u_long start, u_long align, u_long skew)
94{
95	return ((((start - skew) + (align - 1)) & (-align)) + skew);
96}
97
98
99#if defined(DIAGNOSTIC) || defined(DDB)
100/*
101 * Register the extent on a doubly linked list.
102 * Should work, no?
103 */
104static LIST_HEAD(listhead, extent) ext_list;
105static	void extent_register(struct extent *);
106
107static void
108extent_register(struct extent *ex)
109{
110#ifdef DIAGNOSTIC
111	struct extent *ep;
112#endif
113	static int initialized;
114
115	if (!initialized){
116		LIST_INIT(&ext_list);
117		initialized = 1;
118	}
119
120#ifdef DIAGNOSTIC
121	LIST_FOREACH(ep, &ext_list, ex_link) {
122		if (ep == ex)
123			panic("%s: already registered", __func__);
124	}
125#endif
126
127	/* Insert into list */
128	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
129}
130#endif	/* DIAGNOSTIC || DDB */
131
132struct pool ex_region_pl;
133
134static void
135extent_pool_init(void)
136{
137	static int inited;
138
139	if (!inited) {
140		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
141		    "extentpl", NULL);
142		pool_setipl(&ex_region_pl, IPL_VM);
143		inited = 1;
144	}
145}
146
147#ifdef DDB
148/*
149 * Print out all extents registered.  This is used in
150 * DDB show extents
151 */
152void
153extent_print_all(void)
154{
155	struct extent *ep;
156
157	LIST_FOREACH(ep, &ext_list, ex_link) {
158		extent_print1(ep, db_printf);
159	}
160}
161#endif
162
163/*
164 * Allocate and initialize an extent map.
165 */
166struct extent *
167extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
168    size_t storagesize, int flags)
169{
170	struct extent *ex;
171	caddr_t cp = storage;
172	size_t sz = storagesize;
173	struct extent_region *rp;
174	int fixed_extent = (storage != NULL);
175
176#ifdef DIAGNOSTIC
177	/* Check arguments. */
178	if (name == NULL)
179		panic("%s: name == NULL", __func__);
180	if (end < start) {
181		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
182		    __func__, name, start, end);
183		panic("%s: end < start", __func__);
184	}
185	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
186		panic("%s: fixed extent, bad storagesize 0x%lx",
187		    __func__, (u_long)storagesize);
188	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
189		panic("%s: storage provided for non-fixed", __func__);
190#endif
191
192	extent_pool_init();
193
194	/* Allocate extent descriptor. */
195	if (fixed_extent) {
196		struct extent_fixed *fex;
197
198		memset(storage, 0, storagesize);
199
200		/*
201		 * Align all descriptors on "long" boundaries.
202		 */
203		fex = (struct extent_fixed *)cp;
204		ex = (struct extent *)fex;
205		cp += ALIGN(sizeof(struct extent_fixed));
206		sz -= ALIGN(sizeof(struct extent_fixed));
207		fex->fex_storage = storage;
208		fex->fex_storagesize = storagesize;
209
210		/*
211		 * In a fixed extent, we have to pre-allocate region
212		 * descriptors and place them in the extent's freelist.
213		 */
214		LIST_INIT(&fex->fex_freelist);
215		while (sz >= ALIGN(sizeof(struct extent_region))) {
216			rp = (struct extent_region *)cp;
217			cp += ALIGN(sizeof(struct extent_region));
218			sz -= ALIGN(sizeof(struct extent_region));
219			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
220		}
221	} else {
222		ex = (struct extent *)malloc(sizeof(struct extent),
223		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
224		if (ex == NULL)
225			return (NULL);
226	}
227
228	/* Fill in the extent descriptor and return it to the caller. */
229	LIST_INIT(&ex->ex_regions);
230	ex->ex_name = name;
231	ex->ex_start = start;
232	ex->ex_end = end;
233	ex->ex_mtype = mtype;
234	ex->ex_flags = 0;
235	if (fixed_extent)
236		ex->ex_flags |= EXF_FIXED;
237	if (flags & EX_NOCOALESCE)
238		ex->ex_flags |= EXF_NOCOALESCE;
239
240	if (flags & EX_FILLED) {
241		rp = extent_alloc_region_descriptor(ex, flags);
242		if (rp == NULL) {
243			if (!fixed_extent)
244				free(ex, mtype, sizeof(struct extent));
245			return (NULL);
246		}
247		rp->er_start = start;
248		rp->er_end = end;
249		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
250	}
251
252#if defined(DIAGNOSTIC) || defined(DDB)
253	extent_register(ex);
254#endif
255	return (ex);
256}
257
258/*
259 * Destroy an extent map.
260 */
261void
262extent_destroy(struct extent *ex)
263{
264	struct extent_region *rp, *orp;
265
266#ifdef DIAGNOSTIC
267	/* Check arguments. */
268	if (ex == NULL)
269		panic("%s: NULL extent", __func__);
270#endif
271
272	/* Free all region descriptors in extent. */
273	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
274		orp = rp;
275		rp = LIST_NEXT(rp, er_link);
276		LIST_REMOVE(orp, er_link);
277		extent_free_region_descriptor(ex, orp);
278	}
279
280#if defined(DIAGNOSTIC) || defined(DDB)
281	/* Remove from the list of all extents. */
282	LIST_REMOVE(ex, ex_link);
283#endif
284
285	/* If we're not a fixed extent, free the extent descriptor itself. */
286	if ((ex->ex_flags & EXF_FIXED) == 0)
287		free(ex, ex->ex_mtype, 0);
288}
289
290/*
291 * Insert a region descriptor into the sorted region list after the
292 * entry "after" or at the head of the list (if "after" is NULL).
293 * The region descriptor we insert is passed in "rp".  We must
294 * allocate the region descriptor before calling this function!
295 * If we don't need the region descriptor, it will be freed here.
296 */
297static void
298extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
299    struct extent_region *after, struct extent_region *rp)
300{
301	struct extent_region *nextr;
302	int appended = 0;
303
304	if (after == NULL) {
305		/*
306		 * We're the first in the region list.  If there's
307		 * a region after us, attempt to coalesce to save
308		 * descriptor overhead.
309		 */
310		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
311		    !LIST_EMPTY(&ex->ex_regions) &&
312		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
313			/*
314			 * We can coalesce.  Prepend us to the first region.
315			 */
316			LIST_FIRST(&ex->ex_regions)->er_start = start;
317			extent_free_region_descriptor(ex, rp);
318			return;
319		}
320
321		/*
322		 * Can't coalesce.  Fill in the region descriptor
323		 * in, and insert us at the head of the region list.
324		 */
325		rp->er_start = start;
326		rp->er_end = start + (size - 1);
327		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
328		return;
329	}
330
331	/*
332	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
333	 */
334	if (ex->ex_flags & EXF_NOCOALESCE)
335		goto cant_coalesce;
336
337	/*
338	 * Attempt to coalesce with the region before us.
339	 */
340	if ((after->er_end + 1) == start) {
341		/*
342		 * We can coalesce.  Append ourselves and make
343		 * note of it.
344		 */
345		after->er_end = start + (size - 1);
346		appended = 1;
347	}
348
349	/*
350	 * Attempt to coalesce with the region after us.
351	 */
352	if (LIST_NEXT(after, er_link) != NULL &&
353	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
354		/*
355		 * We can coalesce.  Note that if we appended ourselves
356		 * to the previous region, we exactly fit the gap, and
357		 * can free the "next" region descriptor.
358		 */
359		if (appended) {
360			/*
361			 * Yup, we can free it up.
362			 */
363			after->er_end = LIST_NEXT(after, er_link)->er_end;
364			nextr = LIST_NEXT(after, er_link);
365			LIST_REMOVE(nextr, er_link);
366			extent_free_region_descriptor(ex, nextr);
367		} else {
368			/*
369			 * Nope, just prepend us to the next region.
370			 */
371			LIST_NEXT(after, er_link)->er_start = start;
372		}
373
374		extent_free_region_descriptor(ex, rp);
375		return;
376	}
377
378	/*
379	 * We weren't able to coalesce with the next region, but
380	 * we don't need to allocate a region descriptor if we
381	 * appended ourselves to the previous region.
382	 */
383	if (appended) {
384		extent_free_region_descriptor(ex, rp);
385		return;
386	}
387
388 cant_coalesce:
389
390	/*
391	 * Fill in the region descriptor and insert ourselves
392	 * into the region list.
393	 */
394	rp->er_start = start;
395	rp->er_end = start + (size - 1);
396	LIST_INSERT_AFTER(after, rp, er_link);
397}
398
399/*
400 * Allocate a specific region in an extent map.
401 */
402int
403extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
404{
405	struct extent_region *rp, *last, *myrp;
406	u_long end = start + (size - 1);
407	int error;
408
409#ifdef DIAGNOSTIC
410	/* Check arguments. */
411	if (ex == NULL)
412		panic("%s: NULL extent", __func__);
413	if (size < 1) {
414		printf("%s: extent `%s', size 0x%lx\n",
415		    __func__, ex->ex_name, size);
416		panic("%s: bad size", __func__);
417	}
418	if (end < start) {
419		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
420		    __func__, ex->ex_name, start, size);
421		panic("%s: overflow", __func__);
422	}
423	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
424		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
425		    "are mutually exclusive", __func__);
426#endif
427
428	/*
429	 * Make sure the requested region lies within the
430	 * extent.
431	 */
432	if ((start < ex->ex_start) || (end > ex->ex_end)) {
433#ifdef DIAGNOSTIC
434		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
435		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
436		printf("%s: start 0x%lx, end 0x%lx\n",
437		    __func__, start, end);
438		panic("%s: region lies outside extent", __func__);
439#else
440		return (EINVAL);
441#endif
442	}
443
444	/*
445	 * Allocate the region descriptor.  It will be freed later
446	 * if we can coalesce with another region.
447	 */
448	myrp = extent_alloc_region_descriptor(ex, flags);
449	if (myrp == NULL) {
450#ifdef DIAGNOSTIC
451		printf(
452		    "%s: can't allocate region descriptor\n", __func__);
453#endif
454		return (ENOMEM);
455	}
456
457 alloc_start:
458	/*
459	 * Attempt to place ourselves in the desired area of the
460	 * extent.  We save ourselves some work by keeping the list sorted.
461	 * In other words, if the start of the current region is greater
462	 * than the end of our region, we don't have to search any further.
463	 */
464
465	/*
466	 * Keep a pointer to the last region we looked at so
467	 * that we don't have to traverse the list again when
468	 * we insert ourselves.  If "last" is NULL when we
469	 * finally insert ourselves, we go at the head of the
470	 * list.  See extent_insert_and_optimize() for details.
471	 */
472	last = NULL;
473
474	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
475		if (rp->er_start > end) {
476			/*
477			 * We lie before this region and don't
478			 * conflict.
479			 */
480			break;
481		}
482
483		/*
484		 * The current region begins before we end.
485		 * Check for a conflict.
486		 */
487		if (rp->er_end >= start) {
488			/*
489			 * We conflict.  If we can (and want to) wait,
490			 * do so.
491			 */
492			if (flags & EX_WAITSPACE) {
493				ex->ex_flags |= EXF_WANTED;
494				error = tsleep(ex,
495				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
496				    "extnt", 0);
497				if (error)
498					return (error);
499				goto alloc_start;
500			}
501
502			/*
503			 * If we tolerate conflicts adjust things such
504			 * that all space in the requested region is
505			 * allocated.
506			 */
507			if (flags & EX_CONFLICTOK) {
508				/*
509				 * There are four possibilities:
510				 *
511				 * 1. The current region overlaps with
512				 *    the start of the requested region.
513				 *    Adjust the requested region to
514				 *    start at the end of the current
515				 *    region and try again.
516				 *
517				 * 2. The current region falls
518				 *    completely within the requested
519				 *    region.  Free the current region
520				 *    and try again.
521				 *
522				 * 3. The current region overlaps with
523				 *    the end of the requested region.
524				 *    Adjust the requested region to
525				 *    end at the start of the current
526				 *    region and try again.
527				 *
528				 * 4. The requested region falls
529				 *    completely within the current
530				 *    region.  We're done.
531				 */
532				if (rp->er_start <= start) {
533					if (rp->er_end < ex->ex_end) {
534						start = rp->er_end + 1;
535						size = end - start + 1;
536						goto alloc_start;
537					}
538				} else if (rp->er_end < end) {
539					LIST_REMOVE(rp, er_link);
540					extent_free_region_descriptor(ex, rp);
541					goto alloc_start;
542				} else if (rp->er_start < end) {
543					if (rp->er_start > ex->ex_start) {
544						end = rp->er_start - 1;
545						size = end - start + 1;
546						goto alloc_start;
547					}
548				}
549				return (0);
550			}
551
552			extent_free_region_descriptor(ex, myrp);
553			return (EAGAIN);
554		}
555		/*
556		 * We don't conflict, but this region lies before
557		 * us.  Keep a pointer to this region, and keep
558		 * trying.
559		 */
560		last = rp;
561	}
562
563	/*
564	 * We don't conflict with any regions.  "last" points
565	 * to the region we fall after, or is NULL if we belong
566	 * at the beginning of the region list.  Insert ourselves.
567	 */
568	extent_insert_and_optimize(ex, start, size, last, myrp);
569	return (0);
570}
571
572/*
573 * Macro to check (x + y) <= z.  This check is designed to fail
574 * if an overflow occurs.
575 */
576#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
577
578/*
579 * Allocate a region in an extent map subregion.
580 *
581 * If EX_FAST is specified, we return the first fit in the map.
582 * Otherwise, we try to minimize fragmentation by finding the
583 * smallest gap that will hold the request.
584 *
585 * The allocated region is aligned to "alignment", which must be
586 * a power of 2.
587 */
588int
589extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
590    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
591    struct extent_region *myrp, u_long *result)
592{
593	struct extent_region *rp, *last, *bestlast;
594	u_long newstart, newend, exend, beststart, bestovh, ovh;
595	u_long dontcross;
596	int error;
597
598#ifdef DIAGNOSTIC
599	/* Check arguments. */
600	if (ex == NULL)
601		panic("%s: NULL extent", __func__);
602	if (myrp == NULL)
603		panic("%s: NULL region descriptor", __func__);
604	if (result == NULL)
605		panic("%s: NULL result pointer", __func__);
606	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
607	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
608		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
609		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
610		printf("%s: substart 0x%lx, subend 0x%lx\n",
611		    __func__, substart, subend);
612		panic("%s: bad subregion", __func__);
613	}
614	if ((size < 1) || ((size - 1) > (subend - substart))) {
615		printf("%s: extent `%s', size 0x%lx\n",
616		    __func__, ex->ex_name, size);
617		panic("%s: bad size", __func__);
618	}
619	if (alignment == 0)
620		panic("%s: bad alignment", __func__);
621	if (boundary && (boundary < size)) {
622		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
623		    __func__, ex->ex_name, size, boundary);
624		panic("%s: bad boundary", __func__);
625	}
626#endif
627
628 alloc_start:
629	/*
630	 * Keep a pointer to the last region we looked at so
631	 * that we don't have to traverse the list again when
632	 * we insert ourselves.  If "last" is NULL when we
633	 * finally insert ourselves, we go at the head of the
634	 * list.  See extent_insert_and_optimize() for details.
635	 */
636	last = NULL;
637
638	/*
639	 * Keep track of size and location of the smallest
640	 * chunk we fit in.
641	 *
642	 * Since the extent can be as large as the numeric range
643	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
644	 * best overhead value can be the maximum unsigned integer.
645	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
646	 * into the region list immediately on an exact match (which
647	 * is the only case where "bestovh" would be set to 0).
648	 */
649	bestovh = 0;
650	beststart = 0;
651	bestlast = NULL;
652
653	/*
654	 * Keep track of end of free region.  This is either the end of extent
655	 * or the start of a region past the subend.
656	 */
657	exend = ex->ex_end;
658
659	/*
660	 * For N allocated regions, we must make (N + 1)
661	 * checks for unallocated space.  The first chunk we
662	 * check is the area from the beginning of the subregion
663	 * to the first allocated region after that point.
664	 */
665	newstart = extent_align(substart, alignment, skew);
666	if (newstart < ex->ex_start) {
667#ifdef DIAGNOSTIC
668		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
669		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
670		    alignment);
671		panic("%s: overflow after alignment", __func__);
672#else
673		extent_free_region_descriptor(ex, myrp);
674		return (EINVAL);
675#endif
676	}
677
678	/*
679	 * Find the first allocated region that begins on or after
680	 * the subregion start, advancing the "last" pointer along
681	 * the way.
682	 */
683	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
684		if (rp->er_start >= newstart)
685			break;
686		last = rp;
687	}
688
689	/*
690	 * Relocate the start of our candidate region to the end of
691	 * the last allocated region (if there was one overlapping
692	 * our subrange).
693	 */
694	if (last != NULL && last->er_end >= newstart)
695		newstart = extent_align((last->er_end + 1), alignment, skew);
696
697	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
698		/*
699		 * If the region pasts the subend, bail out and see
700		 * if we fit against the subend.
701		 */
702		if (rp->er_start > subend) {
703			exend = rp->er_start;
704			break;
705		}
706
707		/*
708		 * Check the chunk before "rp".  Note that our
709		 * comparison is safe from overflow conditions.
710		 */
711		if (LE_OV(newstart, size, rp->er_start)) {
712			/*
713			 * Do a boundary check, if necessary.  Note
714			 * that a region may *begin* on the boundary,
715			 * but it must end before the boundary.
716			 */
717			if (boundary) {
718				newend = newstart + (size - 1);
719
720				/*
721				 * Calculate the next boundary after the start
722				 * of this region.
723				 */
724				dontcross = extent_align(newstart+1, boundary,
725				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
726				    - 1;
727
728#if 0
729				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
730				    newstart, newend, ex->ex_start, ex->ex_end,
731				    boundary, dontcross);
732#endif
733
734				/* Check for overflow */
735				if (dontcross < ex->ex_start)
736					dontcross = ex->ex_end;
737				else if (newend > dontcross) {
738					/*
739					 * Candidate region crosses boundary.
740					 * Throw away the leading part and see
741					 * if we still fit.
742					 */
743					newstart = dontcross + 1;
744					newend = newstart + (size - 1);
745					dontcross += boundary;
746					if (!LE_OV(newstart, size, rp->er_start))
747						goto skip;
748				}
749
750				/*
751				 * If we run past the end of
752				 * the extent or the boundary
753				 * overflows, then the request
754				 * can't fit.
755				 */
756				if (newstart + size - 1 > ex->ex_end ||
757				    dontcross < newstart)
758					goto fail;
759			}
760
761			/*
762			 * We would fit into this space.  Calculate
763			 * the overhead (wasted space).  If we exactly
764			 * fit, or we're taking the first fit, insert
765			 * ourselves into the region list.
766			 */
767			ovh = rp->er_start - newstart - size;
768			if ((flags & EX_FAST) || (ovh == 0))
769				goto found;
770
771			/*
772			 * Don't exactly fit, but check to see
773			 * if we're better than any current choice.
774			 */
775			if ((bestovh == 0) || (ovh < bestovh)) {
776				bestovh = ovh;
777				beststart = newstart;
778				bestlast = last;
779			}
780		}
781
782skip:
783		/*
784		 * Skip past the current region and check again.
785		 */
786		newstart = extent_align((rp->er_end + 1), alignment, skew);
787		if (newstart < rp->er_end) {
788			/*
789			 * Overflow condition.  Don't error out, since
790			 * we might have a chunk of space that we can
791			 * use.
792			 */
793			goto fail;
794		}
795
796		last = rp;
797	}
798
799	/*
800	 * The final check is from the current starting point to the
801	 * end of the subregion.  If there were no allocated regions,
802	 * "newstart" is set to the beginning of the subregion, or
803	 * just past the end of the last allocated region, adjusted
804	 * for alignment in either case.
805	 */
806	if (LE_OV(newstart, (size - 1), subend)) {
807		/*
808		 * Do a boundary check, if necessary.  Note
809		 * that a region may *begin* on the boundary,
810		 * but it must end before the boundary.
811		 */
812		if (boundary) {
813			newend = newstart + (size - 1);
814
815			/*
816			 * Calculate the next boundary after the start
817			 * of this region.
818			 */
819			dontcross = extent_align(newstart+1, boundary,
820			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
821			    - 1;
822
823#if 0
824			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
825			    newstart, newend, ex->ex_start, ex->ex_end,
826			    boundary, dontcross);
827#endif
828
829			/* Check for overflow */
830			if (dontcross < ex->ex_start)
831				dontcross = ex->ex_end;
832			else if (newend > dontcross) {
833				/*
834				 * Candidate region crosses boundary.
835				 * Throw away the leading part and see
836				 * if we still fit.
837				 */
838				newstart = dontcross + 1;
839				newend = newstart + (size - 1);
840				dontcross += boundary;
841				if (!LE_OV(newstart, (size - 1), subend))
842					goto fail;
843			}
844
845			/*
846			 * If we run past the end of
847			 * the extent or the boundary
848			 * overflows, then the request
849			 * can't fit.
850			 */
851			if (newstart + size - 1 > ex->ex_end ||
852			    dontcross < newstart)
853				goto fail;
854		}
855
856		/*
857		 * We would fit into this space.  Calculate
858		 * the overhead (wasted space).  If we exactly
859		 * fit, or we're taking the first fit, insert
860		 * ourselves into the region list.
861		 */
862		ovh = exend - newstart - (size - 1);
863		if ((flags & EX_FAST) || (ovh == 0))
864			goto found;
865
866		/*
867		 * Don't exactly fit, but check to see
868		 * if we're better than any current choice.
869		 */
870		if ((bestovh == 0) || (ovh < bestovh)) {
871			bestovh = ovh;
872			beststart = newstart;
873			bestlast = last;
874		}
875	}
876
877 fail:
878	/*
879	 * One of the following two conditions have
880	 * occurred:
881	 *
882	 *	There is no chunk large enough to hold the request.
883	 *
884	 *	If EX_FAST was not specified, there is not an
885	 *	exact match for the request.
886	 *
887	 * Note that if we reach this point and EX_FAST is
888	 * set, then we know there is no space in the extent for
889	 * the request.
890	 */
891	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
892		/*
893		 * We have a match that's "good enough".
894		 */
895		newstart = beststart;
896		last = bestlast;
897		goto found;
898	}
899
900	/*
901	 * No space currently available.  Wait for it to free up,
902	 * if possible.
903	 */
904	if (flags & EX_WAITSPACE) {
905		ex->ex_flags |= EXF_WANTED;
906		error = tsleep(ex,
907		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
908		if (error)
909			return (error);
910		goto alloc_start;
911	}
912
913	extent_free_region_descriptor(ex, myrp);
914	return (EAGAIN);
915
916 found:
917	/*
918	 * Insert ourselves into the region list.
919	 */
920	extent_insert_and_optimize(ex, newstart, size, last, myrp);
921	*result = newstart;
922	return (0);
923}
924
925int
926extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
927    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
928    u_long *result)
929{
930	struct extent_region *rp;
931
932	/*
933	 * Allocate the region descriptor.  It will be freed later
934	 * if we can coalesce with another region.
935	 */
936	rp = extent_alloc_region_descriptor(ex, flags);
937	if (rp == NULL) {
938#ifdef DIAGNOSTIC
939		printf("%s: can't allocate region descriptor\n", __func__);
940#endif
941		return (ENOMEM);
942	}
943
944	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
945	    boundary, flags, rp, result);
946}
947
948int
949extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
950    u_long subend, u_long size, u_long alignment, u_long skew,
951    u_long boundary, int flags, struct extent_region *rp, u_long *result)
952{
953#ifdef DIAGNOSTIC
954	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
955		panic("%s: EX_NOCOALESCE not set", __func__);
956#endif
957
958	rp->er_flags = ER_DISCARD;
959	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
960	    boundary, flags, rp, result);
961}
962
963int
964extent_free(struct extent *ex, u_long start, u_long size, int flags)
965{
966	struct extent_region *rp, *nrp = NULL;
967	u_long end = start + (size - 1);
968	int exflags;
969
970#ifdef DIAGNOSTIC
971	/* Check arguments. */
972	if (ex == NULL)
973		panic("%s: NULL extent", __func__);
974	if ((start < ex->ex_start) || (end > ex->ex_end)) {
975		extent_print(ex);
976		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
977		    __func__, ex->ex_name, start, size);
978		panic("%s: extent `%s', region not within extent",
979		    __func__, ex->ex_name);
980	}
981	/* Check for an overflow. */
982	if (end < start) {
983		extent_print(ex);
984		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
985		    __func__, ex->ex_name, start, size);
986		panic("%s: overflow", __func__);
987	}
988#endif
989
990	/*
991	 * If we're allowing coalescing, we must allocate a region
992	 * descriptor now, since it might block.
993	 *
994	 * XXX Make a static, create-time flags word, so we don't
995	 * XXX have to lock to read it!
996	 */
997	exflags = ex->ex_flags;
998
999	if ((exflags & EXF_NOCOALESCE) == 0) {
1000		/* Allocate a region descriptor. */
1001		nrp = extent_alloc_region_descriptor(ex, flags);
1002		if (nrp == NULL)
1003			return (ENOMEM);
1004	}
1005
1006	/*
1007	 * Find region and deallocate.  Several possibilities:
1008	 *
1009	 *	1. (start == er_start) && (end == er_end):
1010	 *	   Free descriptor.
1011	 *
1012	 *	2. (start == er_start) && (end < er_end):
1013	 *	   Adjust er_start.
1014	 *
1015	 *	3. (start > er_start) && (end == er_end):
1016	 *	   Adjust er_end.
1017	 *
1018	 *	4. (start > er_start) && (end < er_end):
1019	 *	   Fragment region.  Requires descriptor alloc.
1020	 *
1021	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1022	 * is not set.
1023	 */
1024	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1025		/*
1026		 * Save ourselves some comparisons; does the current
1027		 * region end before chunk to be freed begins?  If so,
1028		 * then we haven't found the appropriate region descriptor.
1029		 */
1030		if (rp->er_end < start)
1031			continue;
1032
1033		/*
1034		 * Save ourselves some traversal; does the current
1035		 * region begin after the chunk to be freed ends?  If so,
1036		 * then we've already passed any possible region descriptors
1037		 * that might have contained the chunk to be freed.
1038		 */
1039		if (rp->er_start > end)
1040			break;
1041
1042		/* Case 1. */
1043		if ((start == rp->er_start) && (end == rp->er_end)) {
1044			LIST_REMOVE(rp, er_link);
1045			extent_free_region_descriptor(ex, rp);
1046			goto done;
1047		}
1048
1049		/*
1050		 * The following cases all require that EXF_NOCOALESCE
1051		 * is not set.
1052		 */
1053		if (ex->ex_flags & EXF_NOCOALESCE)
1054			continue;
1055
1056		/* Case 2. */
1057		if ((start == rp->er_start) && (end < rp->er_end)) {
1058			rp->er_start = (end + 1);
1059			goto done;
1060		}
1061
1062		/* Case 3. */
1063		if ((start > rp->er_start) && (end == rp->er_end)) {
1064			rp->er_end = (start - 1);
1065			goto done;
1066		}
1067
1068		/* Case 4. */
1069		if ((start > rp->er_start) && (end < rp->er_end)) {
1070			/* Fill in new descriptor. */
1071			nrp->er_start = end + 1;
1072			nrp->er_end = rp->er_end;
1073
1074			/* Adjust current descriptor. */
1075			rp->er_end = start - 1;
1076
1077			/* Insert new descriptor after current. */
1078			LIST_INSERT_AFTER(rp, nrp, er_link);
1079
1080			/* We used the new descriptor, so don't free it below */
1081			nrp = NULL;
1082			goto done;
1083		}
1084	}
1085
1086	/* Region not found, or request otherwise invalid. */
1087#if defined(DIAGNOSTIC) || defined(DDB)
1088	extent_print(ex);
1089#endif
1090	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1091	panic("%s: region not found", __func__);
1092
1093 done:
1094	if (nrp != NULL)
1095		extent_free_region_descriptor(ex, nrp);
1096	if (ex->ex_flags & EXF_WANTED) {
1097		ex->ex_flags &= ~EXF_WANTED;
1098		wakeup(ex);
1099	}
1100	return (0);
1101}
1102
1103static struct extent_region *
1104extent_alloc_region_descriptor(struct extent *ex, int flags)
1105{
1106	struct extent_region *rp;
1107
1108	if (ex->ex_flags & EXF_FIXED) {
1109		struct extent_fixed *fex = (struct extent_fixed *)ex;
1110
1111		while (LIST_EMPTY(&fex->fex_freelist)) {
1112			if (flags & EX_MALLOCOK)
1113				goto alloc;
1114
1115			if ((flags & EX_WAITOK) == 0)
1116				return (NULL);
1117			ex->ex_flags |= EXF_FLWANTED;
1118			if (tsleep(&fex->fex_freelist,
1119			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1120			    "extnt", 0))
1121				return (NULL);
1122		}
1123		rp = LIST_FIRST(&fex->fex_freelist);
1124		LIST_REMOVE(rp, er_link);
1125
1126		/*
1127		 * Don't muck with flags after pulling it off the
1128		 * freelist; it may be a dynamically allocated
1129		 * region pointer that was kindly given to us,
1130		 * and we need to preserve that information.
1131		 */
1132
1133		return (rp);
1134	}
1135
1136 alloc:
1137	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1138	    PR_NOWAIT);
1139	if (rp != NULL)
1140		rp->er_flags = ER_ALLOC;
1141
1142	return (rp);
1143}
1144
1145static void
1146extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1147{
1148	if (rp->er_flags & ER_DISCARD)
1149		return;
1150
1151	if (ex->ex_flags & EXF_FIXED) {
1152		struct extent_fixed *fex = (struct extent_fixed *)ex;
1153
1154		/*
1155		 * If someone's waiting for a region descriptor,
1156		 * be nice and give them this one, rather than
1157		 * just free'ing it back to the system.
1158		 */
1159		if (rp->er_flags & ER_ALLOC) {
1160			if (ex->ex_flags & EXF_FLWANTED) {
1161				/* Clear all but ER_ALLOC flag. */
1162				rp->er_flags = ER_ALLOC;
1163				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1164				    er_link);
1165				goto wake_em_up;
1166			} else {
1167				pool_put(&ex_region_pl, rp);
1168			}
1169		} else {
1170			/* Clear all flags. */
1171			rp->er_flags = 0;
1172			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1173		}
1174
1175		if (ex->ex_flags & EXF_FLWANTED) {
1176 wake_em_up:
1177			ex->ex_flags &= ~EXF_FLWANTED;
1178			wakeup(&fex->fex_freelist);
1179		}
1180		return;
1181	}
1182
1183	/*
1184	 * We know it's dynamically allocated if we get here.
1185	 */
1186	pool_put(&ex_region_pl, rp);
1187}
1188
1189
1190#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1191
1192void
1193extent_print(struct extent *ex)
1194{
1195	extent_print1(ex, printf);
1196}
1197
1198void
1199extent_print1(struct extent *ex,
1200    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1201{
1202	struct extent_region *rp;
1203
1204	if (ex == NULL)
1205		panic("%s: NULL extent", __func__);
1206
1207#ifdef _KERNEL
1208	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1209	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1210#else
1211	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1212	    ex->ex_start, ex->ex_end, ex->ex_flags);
1213#endif
1214
1215	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1216		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1217}
1218#endif
1219