subr_extent.c revision 1.53
1/*	$OpenBSD: subr_extent.c,v 1.53 2014/09/13 16:06:37 doug Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/pool.h>
46#include <ddb/db_output.h>
47#else
48/*
49 * user-land definitions, so it can fit into a testing harness.
50 */
51#include <sys/param.h>
52#include <sys/extent.h>
53#include <sys/queue.h>
54#include <errno.h>
55#include <err.h>
56#include <stdlib.h>
57#include <stdio.h>
58#include <string.h>
59
60#define	malloc(s, t, flags)		malloc(s)
61#define	free(p, t, s)			free(p)
62
63#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
64#define	wakeup(chan)			((void)0)
65
66struct pool {
67	size_t pr_size;
68};
69
70#define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
71#define	pool_setipl(pp, ipl)		/* nothing */
72#define pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
73#define	pool_put(pp, rp)		free((rp), 0, 0)
74
75#define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
76#endif
77
78#if defined(DIAGNOSTIC) || defined(DDB)
79void	extent_print1(struct extent *, int (*)(const char *, ...)
80	    __attribute__((__format__(__kprintf__,1,2))));
81#endif
82
83static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
84	    struct extent_region *, struct extent_region *);
85static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
86static	void extent_free_region_descriptor(struct extent *,
87	    struct extent_region *);
88int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
89	    u_long, u_long, int, struct extent_region *, u_long *);
90
91/*
92 * Shortcut to align to an arbitrary power-of-two boundary.
93 */
94static __inline__ u_long
95extent_align(u_long start, u_long align, u_long skew)
96{
97	return ((((start - skew) + (align - 1)) & (-align)) + skew);
98}
99
100
101#if defined(DIAGNOSTIC) || defined(DDB)
102/*
103 * Register the extent on a doubly linked list.
104 * Should work, no?
105 */
106static LIST_HEAD(listhead, extent) ext_list;
107static	void extent_register(struct extent *);
108
109static void
110extent_register(struct extent *ex)
111{
112#ifdef DIAGNOSTIC
113	struct extent *ep;
114#endif
115	static int initialized;
116
117	if (!initialized){
118		LIST_INIT(&ext_list);
119		initialized = 1;
120	}
121
122#ifdef DIAGNOSTIC
123	LIST_FOREACH(ep, &ext_list, ex_link) {
124		if (ep == ex)
125			panic("%s: already registered", __func__);
126	}
127#endif
128
129	/* Insert into list */
130	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
131}
132#endif	/* DIAGNOSTIC || DDB */
133
134struct pool ex_region_pl;
135
136static void
137extent_pool_init(void)
138{
139	static int inited;
140
141	if (!inited) {
142		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
143		    "extentpl", NULL);
144		pool_setipl(&ex_region_pl, IPL_VM);
145		inited = 1;
146	}
147}
148
149#ifdef DDB
150/*
151 * Print out all extents registered.  This is used in
152 * DDB show extents
153 */
154void
155extent_print_all(void)
156{
157	struct extent *ep;
158
159	LIST_FOREACH(ep, &ext_list, ex_link) {
160		extent_print1(ep, db_printf);
161	}
162}
163#endif
164
165/*
166 * Allocate and initialize an extent map.
167 */
168struct extent *
169extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
170    size_t storagesize, int flags)
171{
172	struct extent *ex;
173	caddr_t cp = storage;
174	size_t sz = storagesize;
175	struct extent_region *rp;
176	int fixed_extent = (storage != NULL);
177
178#ifdef DIAGNOSTIC
179	/* Check arguments. */
180	if (name == NULL)
181		panic("%s: name == NULL", __func__);
182	if (end < start) {
183		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
184		    __func__, name, start, end);
185		panic("%s: end < start", __func__);
186	}
187	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
188		panic("%s: fixed extent, bad storagesize 0x%lx",
189		    __func__, (u_long)storagesize);
190	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
191		panic("%s: storage provided for non-fixed", __func__);
192#endif
193
194	extent_pool_init();
195
196	/* Allocate extent descriptor. */
197	if (fixed_extent) {
198		struct extent_fixed *fex;
199
200		memset(storage, 0, storagesize);
201
202		/*
203		 * Align all descriptors on "long" boundaries.
204		 */
205		fex = (struct extent_fixed *)cp;
206		ex = (struct extent *)fex;
207		cp += ALIGN(sizeof(struct extent_fixed));
208		sz -= ALIGN(sizeof(struct extent_fixed));
209		fex->fex_storage = storage;
210		fex->fex_storagesize = storagesize;
211
212		/*
213		 * In a fixed extent, we have to pre-allocate region
214		 * descriptors and place them in the extent's freelist.
215		 */
216		LIST_INIT(&fex->fex_freelist);
217		while (sz >= ALIGN(sizeof(struct extent_region))) {
218			rp = (struct extent_region *)cp;
219			cp += ALIGN(sizeof(struct extent_region));
220			sz -= ALIGN(sizeof(struct extent_region));
221			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
222		}
223	} else {
224		ex = (struct extent *)malloc(sizeof(struct extent),
225		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
226		if (ex == NULL)
227			return (NULL);
228	}
229
230	/* Fill in the extent descriptor and return it to the caller. */
231	LIST_INIT(&ex->ex_regions);
232	ex->ex_name = name;
233	ex->ex_start = start;
234	ex->ex_end = end;
235	ex->ex_mtype = mtype;
236	ex->ex_flags = 0;
237	if (fixed_extent)
238		ex->ex_flags |= EXF_FIXED;
239	if (flags & EX_NOCOALESCE)
240		ex->ex_flags |= EXF_NOCOALESCE;
241
242	if (flags & EX_FILLED) {
243		rp = extent_alloc_region_descriptor(ex, flags);
244		if (rp == NULL) {
245			if (!fixed_extent)
246				free(ex, mtype, 0);
247			return (NULL);
248		}
249		rp->er_start = start;
250		rp->er_end = end;
251		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
252	}
253
254#if defined(DIAGNOSTIC) || defined(DDB)
255	extent_register(ex);
256#endif
257	return (ex);
258}
259
260/*
261 * Destroy an extent map.
262 */
263void
264extent_destroy(struct extent *ex)
265{
266	struct extent_region *rp, *orp;
267
268#ifdef DIAGNOSTIC
269	/* Check arguments. */
270	if (ex == NULL)
271		panic("%s: NULL extent", __func__);
272#endif
273
274	/* Free all region descriptors in extent. */
275	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
276		orp = rp;
277		rp = LIST_NEXT(rp, er_link);
278		LIST_REMOVE(orp, er_link);
279		extent_free_region_descriptor(ex, orp);
280	}
281
282#if defined(DIAGNOSTIC) || defined(DDB)
283	/* Remove from the list of all extents. */
284	LIST_REMOVE(ex, ex_link);
285#endif
286
287	/* If we're not a fixed extent, free the extent descriptor itself. */
288	if ((ex->ex_flags & EXF_FIXED) == 0)
289		free(ex, ex->ex_mtype, 0);
290}
291
292/*
293 * Insert a region descriptor into the sorted region list after the
294 * entry "after" or at the head of the list (if "after" is NULL).
295 * The region descriptor we insert is passed in "rp".  We must
296 * allocate the region descriptor before calling this function!
297 * If we don't need the region descriptor, it will be freed here.
298 */
299static void
300extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
301    struct extent_region *after, struct extent_region *rp)
302{
303	struct extent_region *nextr;
304	int appended = 0;
305
306	if (after == NULL) {
307		/*
308		 * We're the first in the region list.  If there's
309		 * a region after us, attempt to coalesce to save
310		 * descriptor overhead.
311		 */
312		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
313		    !LIST_EMPTY(&ex->ex_regions) &&
314		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
315			/*
316			 * We can coalesce.  Prepend us to the first region.
317			 */
318			LIST_FIRST(&ex->ex_regions)->er_start = start;
319			extent_free_region_descriptor(ex, rp);
320			return;
321		}
322
323		/*
324		 * Can't coalesce.  Fill in the region descriptor
325		 * in, and insert us at the head of the region list.
326		 */
327		rp->er_start = start;
328		rp->er_end = start + (size - 1);
329		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
330		return;
331	}
332
333	/*
334	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
335	 */
336	if (ex->ex_flags & EXF_NOCOALESCE)
337		goto cant_coalesce;
338
339	/*
340	 * Attempt to coalesce with the region before us.
341	 */
342	if ((after->er_end + 1) == start) {
343		/*
344		 * We can coalesce.  Append ourselves and make
345		 * note of it.
346		 */
347		after->er_end = start + (size - 1);
348		appended = 1;
349	}
350
351	/*
352	 * Attempt to coalesce with the region after us.
353	 */
354	if (LIST_NEXT(after, er_link) != NULL &&
355	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
356		/*
357		 * We can coalesce.  Note that if we appended ourselves
358		 * to the previous region, we exactly fit the gap, and
359		 * can free the "next" region descriptor.
360		 */
361		if (appended) {
362			/*
363			 * Yup, we can free it up.
364			 */
365			after->er_end = LIST_NEXT(after, er_link)->er_end;
366			nextr = LIST_NEXT(after, er_link);
367			LIST_REMOVE(nextr, er_link);
368			extent_free_region_descriptor(ex, nextr);
369		} else {
370			/*
371			 * Nope, just prepend us to the next region.
372			 */
373			LIST_NEXT(after, er_link)->er_start = start;
374		}
375
376		extent_free_region_descriptor(ex, rp);
377		return;
378	}
379
380	/*
381	 * We weren't able to coalesce with the next region, but
382	 * we don't need to allocate a region descriptor if we
383	 * appended ourselves to the previous region.
384	 */
385	if (appended) {
386		extent_free_region_descriptor(ex, rp);
387		return;
388	}
389
390 cant_coalesce:
391
392	/*
393	 * Fill in the region descriptor and insert ourselves
394	 * into the region list.
395	 */
396	rp->er_start = start;
397	rp->er_end = start + (size - 1);
398	LIST_INSERT_AFTER(after, rp, er_link);
399}
400
401/*
402 * Allocate a specific region in an extent map.
403 */
404int
405extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
406{
407	struct extent_region *rp, *last, *myrp;
408	u_long end = start + (size - 1);
409	int error;
410
411#ifdef DIAGNOSTIC
412	/* Check arguments. */
413	if (ex == NULL)
414		panic("%s: NULL extent", __func__);
415	if (size < 1) {
416		printf("%s: extent `%s', size 0x%lx\n",
417		    __func__, ex->ex_name, size);
418		panic("%s: bad size", __func__);
419	}
420	if (end < start) {
421		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
422		    __func__, ex->ex_name, start, size);
423		panic("%s: overflow", __func__);
424	}
425	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
426		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
427		    "are mutually exclusive", __func__);
428#endif
429
430	/*
431	 * Make sure the requested region lies within the
432	 * extent.
433	 */
434	if ((start < ex->ex_start) || (end > ex->ex_end)) {
435#ifdef DIAGNOSTIC
436		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
437		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
438		printf("%s: start 0x%lx, end 0x%lx\n",
439		    __func__, start, end);
440		panic("%s: region lies outside extent", __func__);
441#else
442		return (EINVAL);
443#endif
444	}
445
446	/*
447	 * Allocate the region descriptor.  It will be freed later
448	 * if we can coalesce with another region.
449	 */
450	myrp = extent_alloc_region_descriptor(ex, flags);
451	if (myrp == NULL) {
452#ifdef DIAGNOSTIC
453		printf(
454		    "%s: can't allocate region descriptor\n", __func__);
455#endif
456		return (ENOMEM);
457	}
458
459 alloc_start:
460	/*
461	 * Attempt to place ourselves in the desired area of the
462	 * extent.  We save ourselves some work by keeping the list sorted.
463	 * In other words, if the start of the current region is greater
464	 * than the end of our region, we don't have to search any further.
465	 */
466
467	/*
468	 * Keep a pointer to the last region we looked at so
469	 * that we don't have to traverse the list again when
470	 * we insert ourselves.  If "last" is NULL when we
471	 * finally insert ourselves, we go at the head of the
472	 * list.  See extent_insert_and_optimize() for details.
473	 */
474	last = NULL;
475
476	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
477		if (rp->er_start > end) {
478			/*
479			 * We lie before this region and don't
480			 * conflict.
481			 */
482			break;
483		}
484
485		/*
486		 * The current region begins before we end.
487		 * Check for a conflict.
488		 */
489		if (rp->er_end >= start) {
490			/*
491			 * We conflict.  If we can (and want to) wait,
492			 * do so.
493			 */
494			if (flags & EX_WAITSPACE) {
495				ex->ex_flags |= EXF_WANTED;
496				error = tsleep(ex,
497				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
498				    "extnt", 0);
499				if (error)
500					return (error);
501				goto alloc_start;
502			}
503
504			/*
505			 * If we tolerate conflicts adjust things such
506			 * that all space in the requested region is
507			 * allocated.
508			 */
509			if (flags & EX_CONFLICTOK) {
510				/*
511				 * There are four possibilities:
512				 *
513				 * 1. The current region overlaps with
514				 *    the start of the requested region.
515				 *    Adjust the requested region to
516				 *    start at the end of the current
517				 *    region and try again.
518				 *
519				 * 2. The current region falls
520				 *    completely within the requested
521				 *    region.  Free the current region
522				 *    and try again.
523				 *
524				 * 3. The current region overlaps with
525				 *    the end of the requested region.
526				 *    Adjust the requested region to
527				 *    end at the start of the current
528				 *    region and try again.
529				 *
530				 * 4. The requested region falls
531				 *    completely within the current
532				 *    region.  We're done.
533				 */
534				if (rp->er_start <= start) {
535					if (rp->er_end < ex->ex_end) {
536						start = rp->er_end + 1;
537						size = end - start + 1;
538						goto alloc_start;
539					}
540				} else if (rp->er_end < end) {
541					LIST_REMOVE(rp, er_link);
542					extent_free_region_descriptor(ex, rp);
543					goto alloc_start;
544				} else if (rp->er_start < end) {
545					if (rp->er_start > ex->ex_start) {
546						end = rp->er_start - 1;
547						size = end - start + 1;
548						goto alloc_start;
549					}
550				}
551				return (0);
552			}
553
554			extent_free_region_descriptor(ex, myrp);
555			return (EAGAIN);
556		}
557		/*
558		 * We don't conflict, but this region lies before
559		 * us.  Keep a pointer to this region, and keep
560		 * trying.
561		 */
562		last = rp;
563	}
564
565	/*
566	 * We don't conflict with any regions.  "last" points
567	 * to the region we fall after, or is NULL if we belong
568	 * at the beginning of the region list.  Insert ourselves.
569	 */
570	extent_insert_and_optimize(ex, start, size, last, myrp);
571	return (0);
572}
573
574/*
575 * Macro to check (x + y) <= z.  This check is designed to fail
576 * if an overflow occurs.
577 */
578#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
579
580/*
581 * Allocate a region in an extent map subregion.
582 *
583 * If EX_FAST is specified, we return the first fit in the map.
584 * Otherwise, we try to minimize fragmentation by finding the
585 * smallest gap that will hold the request.
586 *
587 * The allocated region is aligned to "alignment", which must be
588 * a power of 2.
589 */
590int
591extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
592    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
593    struct extent_region *myrp, u_long *result)
594{
595	struct extent_region *rp, *last, *bestlast;
596	u_long newstart, newend, exend, beststart, bestovh, ovh;
597	u_long dontcross;
598	int error;
599
600#ifdef DIAGNOSTIC
601	/* Check arguments. */
602	if (ex == NULL)
603		panic("%s: NULL extent", __func__);
604	if (myrp == NULL)
605		panic("%s: NULL region descriptor", __func__);
606	if (result == NULL)
607		panic("%s: NULL result pointer", __func__);
608	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
609	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
610		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
611		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
612		printf("%s: substart 0x%lx, subend 0x%lx\n",
613		    __func__, substart, subend);
614		panic("%s: bad subregion", __func__);
615	}
616	if ((size < 1) || ((size - 1) > (subend - substart))) {
617		printf("%s: extent `%s', size 0x%lx\n",
618		    __func__, ex->ex_name, size);
619		panic("%s: bad size", __func__);
620	}
621	if (alignment == 0)
622		panic("%s: bad alignment", __func__);
623	if (boundary && (boundary < size)) {
624		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
625		    __func__, ex->ex_name, size, boundary);
626		panic("%s: bad boundary", __func__);
627	}
628#endif
629
630 alloc_start:
631	/*
632	 * Keep a pointer to the last region we looked at so
633	 * that we don't have to traverse the list again when
634	 * we insert ourselves.  If "last" is NULL when we
635	 * finally insert ourselves, we go at the head of the
636	 * list.  See extent_insert_and_optimize() for details.
637	 */
638	last = NULL;
639
640	/*
641	 * Keep track of size and location of the smallest
642	 * chunk we fit in.
643	 *
644	 * Since the extent can be as large as the numeric range
645	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
646	 * best overhead value can be the maximum unsigned integer.
647	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
648	 * into the region list immediately on an exact match (which
649	 * is the only case where "bestovh" would be set to 0).
650	 */
651	bestovh = 0;
652	beststart = 0;
653	bestlast = NULL;
654
655	/*
656	 * Keep track of end of free region.  This is either the end of extent
657	 * or the start of a region past the subend.
658	 */
659	exend = ex->ex_end;
660
661	/*
662	 * For N allocated regions, we must make (N + 1)
663	 * checks for unallocated space.  The first chunk we
664	 * check is the area from the beginning of the subregion
665	 * to the first allocated region after that point.
666	 */
667	newstart = extent_align(substart, alignment, skew);
668	if (newstart < ex->ex_start) {
669#ifdef DIAGNOSTIC
670		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
671		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
672		    alignment);
673		panic("%s: overflow after alignment", __func__);
674#else
675		extent_free_region_descriptor(ex, myrp);
676		return (EINVAL);
677#endif
678	}
679
680	/*
681	 * Find the first allocated region that begins on or after
682	 * the subregion start, advancing the "last" pointer along
683	 * the way.
684	 */
685	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
686		if (rp->er_start >= newstart)
687			break;
688		last = rp;
689	}
690
691	/*
692	 * Relocate the start of our candidate region to the end of
693	 * the last allocated region (if there was one overlapping
694	 * our subrange).
695	 */
696	if (last != NULL && last->er_end >= newstart)
697		newstart = extent_align((last->er_end + 1), alignment, skew);
698
699	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
700		/*
701		 * If the region pasts the subend, bail out and see
702		 * if we fit against the subend.
703		 */
704		if (rp->er_start > subend) {
705			exend = rp->er_start;
706			break;
707		}
708
709		/*
710		 * Check the chunk before "rp".  Note that our
711		 * comparison is safe from overflow conditions.
712		 */
713		if (LE_OV(newstart, size, rp->er_start)) {
714			/*
715			 * Do a boundary check, if necessary.  Note
716			 * that a region may *begin* on the boundary,
717			 * but it must end before the boundary.
718			 */
719			if (boundary) {
720				newend = newstart + (size - 1);
721
722				/*
723				 * Calculate the next boundary after the start
724				 * of this region.
725				 */
726				dontcross = extent_align(newstart+1, boundary,
727				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
728				    - 1;
729
730#if 0
731				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
732				    newstart, newend, ex->ex_start, ex->ex_end,
733				    boundary, dontcross);
734#endif
735
736				/* Check for overflow */
737				if (dontcross < ex->ex_start)
738					dontcross = ex->ex_end;
739				else if (newend > dontcross) {
740					/*
741					 * Candidate region crosses boundary.
742					 * Throw away the leading part and see
743					 * if we still fit.
744					 */
745					newstart = dontcross + 1;
746					newend = newstart + (size - 1);
747					dontcross += boundary;
748					if (!LE_OV(newstart, size, rp->er_start))
749						goto skip;
750				}
751
752				/*
753				 * If we run past the end of
754				 * the extent or the boundary
755				 * overflows, then the request
756				 * can't fit.
757				 */
758				if (newstart + size - 1 > ex->ex_end ||
759				    dontcross < newstart)
760					goto fail;
761			}
762
763			/*
764			 * We would fit into this space.  Calculate
765			 * the overhead (wasted space).  If we exactly
766			 * fit, or we're taking the first fit, insert
767			 * ourselves into the region list.
768			 */
769			ovh = rp->er_start - newstart - size;
770			if ((flags & EX_FAST) || (ovh == 0))
771				goto found;
772
773			/*
774			 * Don't exactly fit, but check to see
775			 * if we're better than any current choice.
776			 */
777			if ((bestovh == 0) || (ovh < bestovh)) {
778				bestovh = ovh;
779				beststart = newstart;
780				bestlast = last;
781			}
782		}
783
784skip:
785		/*
786		 * Skip past the current region and check again.
787		 */
788		newstart = extent_align((rp->er_end + 1), alignment, skew);
789		if (newstart < rp->er_end) {
790			/*
791			 * Overflow condition.  Don't error out, since
792			 * we might have a chunk of space that we can
793			 * use.
794			 */
795			goto fail;
796		}
797
798		last = rp;
799	}
800
801	/*
802	 * The final check is from the current starting point to the
803	 * end of the subregion.  If there were no allocated regions,
804	 * "newstart" is set to the beginning of the subregion, or
805	 * just past the end of the last allocated region, adjusted
806	 * for alignment in either case.
807	 */
808	if (LE_OV(newstart, (size - 1), subend)) {
809		/*
810		 * Do a boundary check, if necessary.  Note
811		 * that a region may *begin* on the boundary,
812		 * but it must end before the boundary.
813		 */
814		if (boundary) {
815			newend = newstart + (size - 1);
816
817			/*
818			 * Calculate the next boundary after the start
819			 * of this region.
820			 */
821			dontcross = extent_align(newstart+1, boundary,
822			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
823			    - 1;
824
825#if 0
826			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
827			    newstart, newend, ex->ex_start, ex->ex_end,
828			    boundary, dontcross);
829#endif
830
831			/* Check for overflow */
832			if (dontcross < ex->ex_start)
833				dontcross = ex->ex_end;
834			else if (newend > dontcross) {
835				/*
836				 * Candidate region crosses boundary.
837				 * Throw away the leading part and see
838				 * if we still fit.
839				 */
840				newstart = dontcross + 1;
841				newend = newstart + (size - 1);
842				dontcross += boundary;
843				if (!LE_OV(newstart, (size - 1), subend))
844					goto fail;
845			}
846
847			/*
848			 * If we run past the end of
849			 * the extent or the boundary
850			 * overflows, then the request
851			 * can't fit.
852			 */
853			if (newstart + size - 1 > ex->ex_end ||
854			    dontcross < newstart)
855				goto fail;
856		}
857
858		/*
859		 * We would fit into this space.  Calculate
860		 * the overhead (wasted space).  If we exactly
861		 * fit, or we're taking the first fit, insert
862		 * ourselves into the region list.
863		 */
864		ovh = exend - newstart - (size - 1);
865		if ((flags & EX_FAST) || (ovh == 0))
866			goto found;
867
868		/*
869		 * Don't exactly fit, but check to see
870		 * if we're better than any current choice.
871		 */
872		if ((bestovh == 0) || (ovh < bestovh)) {
873			bestovh = ovh;
874			beststart = newstart;
875			bestlast = last;
876		}
877	}
878
879 fail:
880	/*
881	 * One of the following two conditions have
882	 * occurred:
883	 *
884	 *	There is no chunk large enough to hold the request.
885	 *
886	 *	If EX_FAST was not specified, there is not an
887	 *	exact match for the request.
888	 *
889	 * Note that if we reach this point and EX_FAST is
890	 * set, then we know there is no space in the extent for
891	 * the request.
892	 */
893	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
894		/*
895		 * We have a match that's "good enough".
896		 */
897		newstart = beststart;
898		last = bestlast;
899		goto found;
900	}
901
902	/*
903	 * No space currently available.  Wait for it to free up,
904	 * if possible.
905	 */
906	if (flags & EX_WAITSPACE) {
907		ex->ex_flags |= EXF_WANTED;
908		error = tsleep(ex,
909		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
910		if (error)
911			return (error);
912		goto alloc_start;
913	}
914
915	extent_free_region_descriptor(ex, myrp);
916	return (EAGAIN);
917
918 found:
919	/*
920	 * Insert ourselves into the region list.
921	 */
922	extent_insert_and_optimize(ex, newstart, size, last, myrp);
923	*result = newstart;
924	return (0);
925}
926
927int
928extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
929    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
930    u_long *result)
931{
932	struct extent_region *rp;
933
934	/*
935	 * Allocate the region descriptor.  It will be freed later
936	 * if we can coalesce with another region.
937	 */
938	rp = extent_alloc_region_descriptor(ex, flags);
939	if (rp == NULL) {
940#ifdef DIAGNOSTIC
941		printf("%s: can't allocate region descriptor\n", __func__);
942#endif
943		return (ENOMEM);
944	}
945
946	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
947	    boundary, flags, rp, result);
948}
949
950int
951extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
952    u_long subend, u_long size, u_long alignment, u_long skew,
953    u_long boundary, int flags, struct extent_region *rp, u_long *result)
954{
955#ifdef DIAGNOSTIC
956	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
957		panic("%s: EX_NOCOALESCE not set", __func__);
958#endif
959
960	rp->er_flags = ER_DISCARD;
961	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
962	    boundary, flags, rp, result);
963}
964
965int
966extent_free(struct extent *ex, u_long start, u_long size, int flags)
967{
968	struct extent_region *rp, *nrp = NULL;
969	u_long end = start + (size - 1);
970	int exflags;
971
972#ifdef DIAGNOSTIC
973	/* Check arguments. */
974	if (ex == NULL)
975		panic("%s: NULL extent", __func__);
976	if ((start < ex->ex_start) || (end > ex->ex_end)) {
977		extent_print(ex);
978		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
979		    __func__, ex->ex_name, start, size);
980		panic("%s: extent `%s', region not within extent",
981		    __func__, ex->ex_name);
982	}
983	/* Check for an overflow. */
984	if (end < start) {
985		extent_print(ex);
986		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
987		    __func__, ex->ex_name, start, size);
988		panic("%s: overflow", __func__);
989	}
990#endif
991
992	/*
993	 * If we're allowing coalescing, we must allocate a region
994	 * descriptor now, since it might block.
995	 *
996	 * XXX Make a static, create-time flags word, so we don't
997	 * XXX have to lock to read it!
998	 */
999	exflags = ex->ex_flags;
1000
1001	if ((exflags & EXF_NOCOALESCE) == 0) {
1002		/* Allocate a region descriptor. */
1003		nrp = extent_alloc_region_descriptor(ex, flags);
1004		if (nrp == NULL)
1005			return (ENOMEM);
1006	}
1007
1008	/*
1009	 * Find region and deallocate.  Several possibilities:
1010	 *
1011	 *	1. (start == er_start) && (end == er_end):
1012	 *	   Free descriptor.
1013	 *
1014	 *	2. (start == er_start) && (end < er_end):
1015	 *	   Adjust er_start.
1016	 *
1017	 *	3. (start > er_start) && (end == er_end):
1018	 *	   Adjust er_end.
1019	 *
1020	 *	4. (start > er_start) && (end < er_end):
1021	 *	   Fragment region.  Requires descriptor alloc.
1022	 *
1023	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1024	 * is not set.
1025	 */
1026	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1027		/*
1028		 * Save ourselves some comparisons; does the current
1029		 * region end before chunk to be freed begins?  If so,
1030		 * then we haven't found the appropriate region descriptor.
1031		 */
1032		if (rp->er_end < start)
1033			continue;
1034
1035		/*
1036		 * Save ourselves some traversal; does the current
1037		 * region begin after the chunk to be freed ends?  If so,
1038		 * then we've already passed any possible region descriptors
1039		 * that might have contained the chunk to be freed.
1040		 */
1041		if (rp->er_start > end)
1042			break;
1043
1044		/* Case 1. */
1045		if ((start == rp->er_start) && (end == rp->er_end)) {
1046			LIST_REMOVE(rp, er_link);
1047			extent_free_region_descriptor(ex, rp);
1048			goto done;
1049		}
1050
1051		/*
1052		 * The following cases all require that EXF_NOCOALESCE
1053		 * is not set.
1054		 */
1055		if (ex->ex_flags & EXF_NOCOALESCE)
1056			continue;
1057
1058		/* Case 2. */
1059		if ((start == rp->er_start) && (end < rp->er_end)) {
1060			rp->er_start = (end + 1);
1061			goto done;
1062		}
1063
1064		/* Case 3. */
1065		if ((start > rp->er_start) && (end == rp->er_end)) {
1066			rp->er_end = (start - 1);
1067			goto done;
1068		}
1069
1070		/* Case 4. */
1071		if ((start > rp->er_start) && (end < rp->er_end)) {
1072			/* Fill in new descriptor. */
1073			nrp->er_start = end + 1;
1074			nrp->er_end = rp->er_end;
1075
1076			/* Adjust current descriptor. */
1077			rp->er_end = start - 1;
1078
1079			/* Insert new descriptor after current. */
1080			LIST_INSERT_AFTER(rp, nrp, er_link);
1081
1082			/* We used the new descriptor, so don't free it below */
1083			nrp = NULL;
1084			goto done;
1085		}
1086	}
1087
1088	/* Region not found, or request otherwise invalid. */
1089#if defined(DIAGNOSTIC) || defined(DDB)
1090	extent_print(ex);
1091#endif
1092	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1093	panic("%s: region not found", __func__);
1094
1095 done:
1096	if (nrp != NULL)
1097		extent_free_region_descriptor(ex, nrp);
1098	if (ex->ex_flags & EXF_WANTED) {
1099		ex->ex_flags &= ~EXF_WANTED;
1100		wakeup(ex);
1101	}
1102	return (0);
1103}
1104
1105static struct extent_region *
1106extent_alloc_region_descriptor(struct extent *ex, int flags)
1107{
1108	struct extent_region *rp;
1109
1110	if (ex->ex_flags & EXF_FIXED) {
1111		struct extent_fixed *fex = (struct extent_fixed *)ex;
1112
1113		while (LIST_EMPTY(&fex->fex_freelist)) {
1114			if (flags & EX_MALLOCOK)
1115				goto alloc;
1116
1117			if ((flags & EX_WAITOK) == 0)
1118				return (NULL);
1119			ex->ex_flags |= EXF_FLWANTED;
1120			if (tsleep(&fex->fex_freelist,
1121			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1122			    "extnt", 0))
1123				return (NULL);
1124		}
1125		rp = LIST_FIRST(&fex->fex_freelist);
1126		LIST_REMOVE(rp, er_link);
1127
1128		/*
1129		 * Don't muck with flags after pulling it off the
1130		 * freelist; it may be a dynamically allocated
1131		 * region pointer that was kindly given to us,
1132		 * and we need to preserve that information.
1133		 */
1134
1135		return (rp);
1136	}
1137
1138 alloc:
1139	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1140	    PR_NOWAIT);
1141	if (rp != NULL)
1142		rp->er_flags = ER_ALLOC;
1143
1144	return (rp);
1145}
1146
1147static void
1148extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1149{
1150	if (rp->er_flags & ER_DISCARD)
1151		return;
1152
1153	if (ex->ex_flags & EXF_FIXED) {
1154		struct extent_fixed *fex = (struct extent_fixed *)ex;
1155
1156		/*
1157		 * If someone's waiting for a region descriptor,
1158		 * be nice and give them this one, rather than
1159		 * just free'ing it back to the system.
1160		 */
1161		if (rp->er_flags & ER_ALLOC) {
1162			if (ex->ex_flags & EXF_FLWANTED) {
1163				/* Clear all but ER_ALLOC flag. */
1164				rp->er_flags = ER_ALLOC;
1165				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1166				    er_link);
1167				goto wake_em_up;
1168			} else {
1169				pool_put(&ex_region_pl, rp);
1170			}
1171		} else {
1172			/* Clear all flags. */
1173			rp->er_flags = 0;
1174			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1175		}
1176
1177		if (ex->ex_flags & EXF_FLWANTED) {
1178 wake_em_up:
1179			ex->ex_flags &= ~EXF_FLWANTED;
1180			wakeup(&fex->fex_freelist);
1181		}
1182		return;
1183	}
1184
1185	/*
1186	 * We know it's dynamically allocated if we get here.
1187	 */
1188	pool_put(&ex_region_pl, rp);
1189}
1190
1191
1192#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1193
1194void
1195extent_print(struct extent *ex)
1196{
1197	extent_print1(ex, printf);
1198}
1199
1200void
1201extent_print1(struct extent *ex,
1202    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1203{
1204	struct extent_region *rp;
1205
1206	if (ex == NULL)
1207		panic("%s: NULL extent", __func__);
1208
1209#ifdef _KERNEL
1210	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1211	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1212#else
1213	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1214	    ex->ex_start, ex->ex_end, ex->ex_flags);
1215#endif
1216
1217	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1218		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1219}
1220#endif
1221