subr_extent.c revision 1.50
1/*	$OpenBSD: subr_extent.c,v 1.50 2014/01/21 21:45:58 kettenis Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/pool.h>
46#include <ddb/db_output.h>
47#else
48/*
49 * user-land definitions, so it can fit into a testing harness.
50 */
51#include <sys/param.h>
52#include <sys/pool.h>
53#include <sys/extent.h>
54#include <sys/queue.h>
55#include <errno.h>
56#include <stdlib.h>
57#include <stdio.h>
58#include <string.h>
59
60#define	malloc(s, t, flags)		malloc(s)
61#define	free(p, t)			free(p)
62#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
63#define	wakeup(chan)			((void)0)
64#define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
65#define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
66#define	pool_setipl(pool, ipl)		/* nothing */
67#define	pool_put(pool, rp)		free((rp), 0)
68#define	panic				printf
69#endif
70
71#if defined(DIAGNOSTIC) || defined(DDB)
72void	extent_print1(struct extent *, int (*)(const char *, ...)
73	    __attribute__((__format__(__kprintf__,1,2))));
74#endif
75
76static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
77	    struct extent_region *, struct extent_region *);
78static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
79static	void extent_free_region_descriptor(struct extent *,
80	    struct extent_region *);
81int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
82	    u_long, u_long, int, struct extent_region *, u_long *);
83
84/*
85 * Shortcut to align to an arbitrary power-of-two boundary.
86 */
87static __inline__ u_long
88extent_align(u_long start, u_long align, u_long skew)
89{
90	return ((((start - skew) + (align - 1)) & (-align)) + skew);
91}
92
93
94#if defined(DIAGNOSTIC) || defined(DDB)
95/*
96 * Register the extent on a doubly linked list.
97 * Should work, no?
98 */
99static LIST_HEAD(listhead, extent) ext_list;
100static	void extent_register(struct extent *);
101
102static void
103extent_register(struct extent *ex)
104{
105#ifdef DIAGNOSTIC
106	struct extent *ep;
107#endif
108	static int initialized;
109
110	if (!initialized){
111		LIST_INIT(&ext_list);
112		initialized = 1;
113	}
114
115#ifdef DIAGNOSTIC
116	LIST_FOREACH(ep, &ext_list, ex_link) {
117		if (ep == ex)
118			panic("%s: already registered", __func__);
119	}
120#endif
121
122	/* Insert into list */
123	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
124}
125#endif	/* DIAGNOSTIC || DDB */
126
127struct pool ex_region_pl;
128
129static void
130extent_pool_init(void)
131{
132	static int inited;
133
134	if (!inited) {
135		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
136		    "extentpl", NULL);
137		pool_setipl(&ex_region_pl, IPL_VM);
138		inited = 1;
139	}
140}
141
142#ifdef DDB
143/*
144 * Print out all extents registered.  This is used in
145 * DDB show extents
146 */
147void
148extent_print_all(void)
149{
150	struct extent *ep;
151
152	LIST_FOREACH(ep, &ext_list, ex_link) {
153		extent_print1(ep, db_printf);
154	}
155}
156#endif
157
158/*
159 * Allocate and initialize an extent map.
160 */
161struct extent *
162extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
163    size_t storagesize, int flags)
164{
165	struct extent *ex;
166	caddr_t cp = storage;
167	size_t sz = storagesize;
168	struct extent_region *rp;
169	int fixed_extent = (storage != NULL);
170
171#ifdef DIAGNOSTIC
172	/* Check arguments. */
173	if (name == NULL)
174		panic("%s: name == NULL", __func__);
175	if (end < start) {
176		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
177		    __func__, name, start, end);
178		panic("%s: end < start", __func__);
179	}
180	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
181		panic("%s: fixed extent, bad storagesize 0x%lx",
182		    __func__, (u_long)storagesize);
183	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
184		panic("%s: storage provided for non-fixed", __func__);
185#endif
186
187	extent_pool_init();
188
189	/* Allocate extent descriptor. */
190	if (fixed_extent) {
191		struct extent_fixed *fex;
192
193		memset(storage, 0, storagesize);
194
195		/*
196		 * Align all descriptors on "long" boundaries.
197		 */
198		fex = (struct extent_fixed *)cp;
199		ex = (struct extent *)fex;
200		cp += ALIGN(sizeof(struct extent_fixed));
201		sz -= ALIGN(sizeof(struct extent_fixed));
202		fex->fex_storage = storage;
203		fex->fex_storagesize = storagesize;
204
205		/*
206		 * In a fixed extent, we have to pre-allocate region
207		 * descriptors and place them in the extent's freelist.
208		 */
209		LIST_INIT(&fex->fex_freelist);
210		while (sz >= ALIGN(sizeof(struct extent_region))) {
211			rp = (struct extent_region *)cp;
212			cp += ALIGN(sizeof(struct extent_region));
213			sz -= ALIGN(sizeof(struct extent_region));
214			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
215		}
216	} else {
217		ex = (struct extent *)malloc(sizeof(struct extent),
218		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
219		if (ex == NULL)
220			return (NULL);
221	}
222
223	/* Fill in the extent descriptor and return it to the caller. */
224	LIST_INIT(&ex->ex_regions);
225	ex->ex_name = name;
226	ex->ex_start = start;
227	ex->ex_end = end;
228	ex->ex_mtype = mtype;
229	ex->ex_flags = 0;
230	if (fixed_extent)
231		ex->ex_flags |= EXF_FIXED;
232	if (flags & EX_NOCOALESCE)
233		ex->ex_flags |= EXF_NOCOALESCE;
234
235	if (flags & EX_FILLED) {
236		rp = extent_alloc_region_descriptor(ex, flags);
237		if (rp == NULL) {
238			if (!fixed_extent)
239				free(ex, mtype);
240			return (NULL);
241		}
242		rp->er_start = start;
243		rp->er_end = end;
244		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
245	}
246
247#if defined(DIAGNOSTIC) || defined(DDB)
248	extent_register(ex);
249#endif
250	return (ex);
251}
252
253/*
254 * Destroy an extent map.
255 */
256void
257extent_destroy(struct extent *ex)
258{
259	struct extent_region *rp, *orp;
260
261#ifdef DIAGNOSTIC
262	/* Check arguments. */
263	if (ex == NULL)
264		panic("%s: NULL extent", __func__);
265#endif
266
267	/* Free all region descriptors in extent. */
268	for (rp = LIST_FIRST(&ex->ex_regions);
269	    rp != LIST_END(&ex->ex_regions); ) {
270		orp = rp;
271		rp = LIST_NEXT(rp, er_link);
272		LIST_REMOVE(orp, er_link);
273		extent_free_region_descriptor(ex, orp);
274	}
275
276#if defined(DIAGNOSTIC) || defined(DDB)
277	/* Remove from the list of all extents. */
278	LIST_REMOVE(ex, ex_link);
279#endif
280
281	/* If we're not a fixed extent, free the extent descriptor itself. */
282	if ((ex->ex_flags & EXF_FIXED) == 0)
283		free(ex, ex->ex_mtype);
284}
285
286/*
287 * Insert a region descriptor into the sorted region list after the
288 * entry "after" or at the head of the list (if "after" is NULL).
289 * The region descriptor we insert is passed in "rp".  We must
290 * allocate the region descriptor before calling this function!
291 * If we don't need the region descriptor, it will be freed here.
292 */
293static void
294extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
295    struct extent_region *after, struct extent_region *rp)
296{
297	struct extent_region *nextr;
298	int appended = 0;
299
300	if (after == NULL) {
301		/*
302		 * We're the first in the region list.  If there's
303		 * a region after us, attempt to coalesce to save
304		 * descriptor overhead.
305		 */
306		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
307		    !LIST_EMPTY(&ex->ex_regions) &&
308		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
309			/*
310			 * We can coalesce.  Prepend us to the first region.
311			 */
312			LIST_FIRST(&ex->ex_regions)->er_start = start;
313			extent_free_region_descriptor(ex, rp);
314			return;
315		}
316
317		/*
318		 * Can't coalesce.  Fill in the region descriptor
319		 * in, and insert us at the head of the region list.
320		 */
321		rp->er_start = start;
322		rp->er_end = start + (size - 1);
323		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
324		return;
325	}
326
327	/*
328	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
329	 */
330	if (ex->ex_flags & EXF_NOCOALESCE)
331		goto cant_coalesce;
332
333	/*
334	 * Attempt to coalesce with the region before us.
335	 */
336	if ((after->er_end + 1) == start) {
337		/*
338		 * We can coalesce.  Append ourselves and make
339		 * note of it.
340		 */
341		after->er_end = start + (size - 1);
342		appended = 1;
343	}
344
345	/*
346	 * Attempt to coalesce with the region after us.
347	 */
348	if (LIST_NEXT(after, er_link) != NULL &&
349	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
350		/*
351		 * We can coalesce.  Note that if we appended ourselves
352		 * to the previous region, we exactly fit the gap, and
353		 * can free the "next" region descriptor.
354		 */
355		if (appended) {
356			/*
357			 * Yup, we can free it up.
358			 */
359			after->er_end = LIST_NEXT(after, er_link)->er_end;
360			nextr = LIST_NEXT(after, er_link);
361			LIST_REMOVE(nextr, er_link);
362			extent_free_region_descriptor(ex, nextr);
363		} else {
364			/*
365			 * Nope, just prepend us to the next region.
366			 */
367			LIST_NEXT(after, er_link)->er_start = start;
368		}
369
370		extent_free_region_descriptor(ex, rp);
371		return;
372	}
373
374	/*
375	 * We weren't able to coalesce with the next region, but
376	 * we don't need to allocate a region descriptor if we
377	 * appended ourselves to the previous region.
378	 */
379	if (appended) {
380		extent_free_region_descriptor(ex, rp);
381		return;
382	}
383
384 cant_coalesce:
385
386	/*
387	 * Fill in the region descriptor and insert ourselves
388	 * into the region list.
389	 */
390	rp->er_start = start;
391	rp->er_end = start + (size - 1);
392	LIST_INSERT_AFTER(after, rp, er_link);
393}
394
395/*
396 * Allocate a specific region in an extent map.
397 */
398int
399extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
400{
401	struct extent_region *rp, *last, *myrp;
402	u_long end = start + (size - 1);
403	int error;
404
405#ifdef DIAGNOSTIC
406	/* Check arguments. */
407	if (ex == NULL)
408		panic("%s: NULL extent", __func__);
409	if (size < 1) {
410		printf("%s: extent `%s', size 0x%lx\n",
411		    __func__, ex->ex_name, size);
412		panic("%s: bad size", __func__);
413	}
414	if (end < start) {
415		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
416		    __func__, ex->ex_name, start, size);
417		panic("%s: overflow", __func__);
418	}
419	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
420		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
421		    "are mutually exclusive", __func__);
422#endif
423
424	/*
425	 * Make sure the requested region lies within the
426	 * extent.
427	 */
428	if ((start < ex->ex_start) || (end > ex->ex_end)) {
429#ifdef DIAGNOSTIC
430		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
431		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
432		printf("%s: start 0x%lx, end 0x%lx\n",
433		    __func__, start, end);
434		panic("%s: region lies outside extent", __func__);
435#else
436		return (EINVAL);
437#endif
438	}
439
440	/*
441	 * Allocate the region descriptor.  It will be freed later
442	 * if we can coalesce with another region.
443	 */
444	myrp = extent_alloc_region_descriptor(ex, flags);
445	if (myrp == NULL) {
446#ifdef DIAGNOSTIC
447		printf(
448		    "%s: can't allocate region descriptor\n", __func__);
449#endif
450		return (ENOMEM);
451	}
452
453 alloc_start:
454	/*
455	 * Attempt to place ourselves in the desired area of the
456	 * extent.  We save ourselves some work by keeping the list sorted.
457	 * In other words, if the start of the current region is greater
458	 * than the end of our region, we don't have to search any further.
459	 */
460
461	/*
462	 * Keep a pointer to the last region we looked at so
463	 * that we don't have to traverse the list again when
464	 * we insert ourselves.  If "last" is NULL when we
465	 * finally insert ourselves, we go at the head of the
466	 * list.  See extent_insert_and_optimize() for details.
467	 */
468	last = NULL;
469
470	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
471		if (rp->er_start > end) {
472			/*
473			 * We lie before this region and don't
474			 * conflict.
475			 */
476			break;
477		}
478
479		/*
480		 * The current region begins before we end.
481		 * Check for a conflict.
482		 */
483		if (rp->er_end >= start) {
484			/*
485			 * We conflict.  If we can (and want to) wait,
486			 * do so.
487			 */
488			if (flags & EX_WAITSPACE) {
489				ex->ex_flags |= EXF_WANTED;
490				error = tsleep(ex,
491				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
492				    "extnt", 0);
493				if (error)
494					return (error);
495				goto alloc_start;
496			}
497
498			/*
499			 * If we tolerate conflicts adjust things such
500			 * that all space in the requested region is
501			 * allocated.
502			 */
503			if (flags & EX_CONFLICTOK) {
504				/*
505				 * There are four possibilities:
506				 *
507				 * 1. The current region overlaps with
508				 *    the start of the requested region.
509				 *    Adjust the requested region to
510				 *    start at the end of the current
511				 *    region and try again.
512				 *
513				 * 2. The current region falls
514				 *    completely within the requested
515				 *    region.  Free the current region
516				 *    and try again.
517				 *
518				 * 3. The current region overlaps with
519				 *    the end of the requested region.
520				 *    Adjust the requested region to
521				 *    end at the start of the current
522				 *    region and try again.
523				 *
524				 * 4. The requested region falls
525				 *    completely within the current
526				 *    region.  We're done.
527				 */
528				if (rp->er_start <= start) {
529					if (rp->er_end < ex->ex_end) {
530						start = rp->er_end + 1;
531						size = end - start + 1;
532						goto alloc_start;
533					}
534				} else if (rp->er_end < end) {
535					LIST_REMOVE(rp, er_link);
536					extent_free_region_descriptor(ex, rp);
537					goto alloc_start;
538				} else if (rp->er_start < end) {
539					if (rp->er_start > ex->ex_start) {
540						end = rp->er_start - 1;
541						size = end - start + 1;
542						goto alloc_start;
543					}
544				}
545				return (0);
546			}
547
548			extent_free_region_descriptor(ex, myrp);
549			return (EAGAIN);
550		}
551		/*
552		 * We don't conflict, but this region lies before
553		 * us.  Keep a pointer to this region, and keep
554		 * trying.
555		 */
556		last = rp;
557	}
558
559	/*
560	 * We don't conflict with any regions.  "last" points
561	 * to the region we fall after, or is NULL if we belong
562	 * at the beginning of the region list.  Insert ourselves.
563	 */
564	extent_insert_and_optimize(ex, start, size, last, myrp);
565	return (0);
566}
567
568/*
569 * Macro to check (x + y) <= z.  This check is designed to fail
570 * if an overflow occurs.
571 */
572#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
573
574/*
575 * Allocate a region in an extent map subregion.
576 *
577 * If EX_FAST is specified, we return the first fit in the map.
578 * Otherwise, we try to minimize fragmentation by finding the
579 * smallest gap that will hold the request.
580 *
581 * The allocated region is aligned to "alignment", which must be
582 * a power of 2.
583 */
584int
585extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
586    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
587    struct extent_region *myrp, u_long *result)
588{
589	struct extent_region *rp, *last, *bestlast;
590	u_long newstart, newend, exend, beststart, bestovh, ovh;
591	u_long dontcross;
592	int error;
593
594#ifdef DIAGNOSTIC
595	/* Check arguments. */
596	if (ex == NULL)
597		panic("%s: NULL extent", __func__);
598	if (myrp == NULL)
599		panic("%s: NULL region descriptor", __func__);
600	if (result == NULL)
601		panic("%s: NULL result pointer", __func__);
602	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
603	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
604		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
605		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
606		printf("%s: substart 0x%lx, subend 0x%lx\n",
607		    __func__, substart, subend);
608		panic("%s: bad subregion", __func__);
609	}
610	if ((size < 1) || ((size - 1) > (subend - substart))) {
611		printf("%s: extent `%s', size 0x%lx\n",
612		    __func__, ex->ex_name, size);
613		panic("%s: bad size", __func__);
614	}
615	if (alignment == 0)
616		panic("%s: bad alignment", __func__);
617	if (boundary && (boundary < size)) {
618		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
619		    __func__, ex->ex_name, size, boundary);
620		panic("%s: bad boundary", __func__);
621	}
622#endif
623
624 alloc_start:
625	/*
626	 * Keep a pointer to the last region we looked at so
627	 * that we don't have to traverse the list again when
628	 * we insert ourselves.  If "last" is NULL when we
629	 * finally insert ourselves, we go at the head of the
630	 * list.  See extent_insert_and_optimize() for details.
631	 */
632	last = NULL;
633
634	/*
635	 * Keep track of size and location of the smallest
636	 * chunk we fit in.
637	 *
638	 * Since the extent can be as large as the numeric range
639	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
640	 * best overhead value can be the maximum unsigned integer.
641	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
642	 * into the region list immediately on an exact match (which
643	 * is the only case where "bestovh" would be set to 0).
644	 */
645	bestovh = 0;
646	beststart = 0;
647	bestlast = NULL;
648
649	/*
650	 * Keep track of end of free region.  This is either the end of extent
651	 * or the start of a region past the subend.
652	 */
653	exend = ex->ex_end;
654
655	/*
656	 * For N allocated regions, we must make (N + 1)
657	 * checks for unallocated space.  The first chunk we
658	 * check is the area from the beginning of the subregion
659	 * to the first allocated region after that point.
660	 */
661	newstart = extent_align(substart, alignment, skew);
662	if (newstart < ex->ex_start) {
663#ifdef DIAGNOSTIC
664		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
665		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
666		    alignment);
667		panic("%s: overflow after alignment", __func__);
668#else
669		extent_free_region_descriptor(ex, myrp);
670		return (EINVAL);
671#endif
672	}
673
674	/*
675	 * Find the first allocated region that begins on or after
676	 * the subregion start, advancing the "last" pointer along
677	 * the way.
678	 */
679	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
680		if (rp->er_start >= newstart)
681			break;
682		last = rp;
683	}
684
685	/*
686	 * Relocate the start of our candidate region to the end of
687	 * the last allocated region (if there was one overlapping
688	 * our subrange).
689	 */
690	if (last != NULL && last->er_end >= newstart)
691		newstart = extent_align((last->er_end + 1), alignment, skew);
692
693	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
694		/*
695		 * If the region pasts the subend, bail out and see
696		 * if we fit against the subend.
697		 */
698		if (rp->er_start > subend) {
699			exend = rp->er_start;
700			break;
701		}
702
703		/*
704		 * Check the chunk before "rp".  Note that our
705		 * comparison is safe from overflow conditions.
706		 */
707		if (LE_OV(newstart, size, rp->er_start)) {
708			/*
709			 * Do a boundary check, if necessary.  Note
710			 * that a region may *begin* on the boundary,
711			 * but it must end before the boundary.
712			 */
713			if (boundary) {
714				newend = newstart + (size - 1);
715
716				/*
717				 * Calculate the next boundary after the start
718				 * of this region.
719				 */
720				dontcross = extent_align(newstart+1, boundary,
721				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
722				    - 1;
723
724#if 0
725				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
726				    newstart, newend, ex->ex_start, ex->ex_end,
727				    boundary, dontcross);
728#endif
729
730				/* Check for overflow */
731				if (dontcross < ex->ex_start)
732					dontcross = ex->ex_end;
733				else if (newend > dontcross) {
734					/*
735					 * Candidate region crosses boundary.
736					 * Throw away the leading part and see
737					 * if we still fit.
738					 */
739					newstart = dontcross + 1;
740					newend = newstart + (size - 1);
741					dontcross += boundary;
742					if (!LE_OV(newstart, size, rp->er_start))
743						goto skip;
744				}
745
746				/*
747				 * If we run past the end of
748				 * the extent or the boundary
749				 * overflows, then the request
750				 * can't fit.
751				 */
752				if (newstart + size - 1 > ex->ex_end ||
753				    dontcross < newstart)
754					goto fail;
755			}
756
757			/*
758			 * We would fit into this space.  Calculate
759			 * the overhead (wasted space).  If we exactly
760			 * fit, or we're taking the first fit, insert
761			 * ourselves into the region list.
762			 */
763			ovh = rp->er_start - newstart - size;
764			if ((flags & EX_FAST) || (ovh == 0))
765				goto found;
766
767			/*
768			 * Don't exactly fit, but check to see
769			 * if we're better than any current choice.
770			 */
771			if ((bestovh == 0) || (ovh < bestovh)) {
772				bestovh = ovh;
773				beststart = newstart;
774				bestlast = last;
775			}
776		}
777
778skip:
779		/*
780		 * Skip past the current region and check again.
781		 */
782		newstart = extent_align((rp->er_end + 1), alignment, skew);
783		if (newstart < rp->er_end) {
784			/*
785			 * Overflow condition.  Don't error out, since
786			 * we might have a chunk of space that we can
787			 * use.
788			 */
789			goto fail;
790		}
791
792		last = rp;
793	}
794
795	/*
796	 * The final check is from the current starting point to the
797	 * end of the subregion.  If there were no allocated regions,
798	 * "newstart" is set to the beginning of the subregion, or
799	 * just past the end of the last allocated region, adjusted
800	 * for alignment in either case.
801	 */
802	if (LE_OV(newstart, (size - 1), subend)) {
803		/*
804		 * Do a boundary check, if necessary.  Note
805		 * that a region may *begin* on the boundary,
806		 * but it must end before the boundary.
807		 */
808		if (boundary) {
809			newend = newstart + (size - 1);
810
811			/*
812			 * Calculate the next boundary after the start
813			 * of this region.
814			 */
815			dontcross = extent_align(newstart+1, boundary,
816			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
817			    - 1;
818
819#if 0
820			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
821			    newstart, newend, ex->ex_start, ex->ex_end,
822			    boundary, dontcross);
823#endif
824
825			/* Check for overflow */
826			if (dontcross < ex->ex_start)
827				dontcross = ex->ex_end;
828			else if (newend > dontcross) {
829				/*
830				 * Candidate region crosses boundary.
831				 * Throw away the leading part and see
832				 * if we still fit.
833				 */
834				newstart = dontcross + 1;
835				newend = newstart + (size - 1);
836				dontcross += boundary;
837				if (!LE_OV(newstart, (size - 1), subend))
838					goto fail;
839			}
840
841			/*
842			 * If we run past the end of
843			 * the extent or the boundary
844			 * overflows, then the request
845			 * can't fit.
846			 */
847			if (newstart + size - 1 > ex->ex_end ||
848			    dontcross < newstart)
849				goto fail;
850		}
851
852		/*
853		 * We would fit into this space.  Calculate
854		 * the overhead (wasted space).  If we exactly
855		 * fit, or we're taking the first fit, insert
856		 * ourselves into the region list.
857		 */
858		ovh = exend - newstart - (size - 1);
859		if ((flags & EX_FAST) || (ovh == 0))
860			goto found;
861
862		/*
863		 * Don't exactly fit, but check to see
864		 * if we're better than any current choice.
865		 */
866		if ((bestovh == 0) || (ovh < bestovh)) {
867			bestovh = ovh;
868			beststart = newstart;
869			bestlast = last;
870		}
871	}
872
873 fail:
874	/*
875	 * One of the following two conditions have
876	 * occurred:
877	 *
878	 *	There is no chunk large enough to hold the request.
879	 *
880	 *	If EX_FAST was not specified, there is not an
881	 *	exact match for the request.
882	 *
883	 * Note that if we reach this point and EX_FAST is
884	 * set, then we know there is no space in the extent for
885	 * the request.
886	 */
887	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
888		/*
889		 * We have a match that's "good enough".
890		 */
891		newstart = beststart;
892		last = bestlast;
893		goto found;
894	}
895
896	/*
897	 * No space currently available.  Wait for it to free up,
898	 * if possible.
899	 */
900	if (flags & EX_WAITSPACE) {
901		ex->ex_flags |= EXF_WANTED;
902		error = tsleep(ex,
903		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
904		if (error)
905			return (error);
906		goto alloc_start;
907	}
908
909	extent_free_region_descriptor(ex, myrp);
910	return (EAGAIN);
911
912 found:
913	/*
914	 * Insert ourselves into the region list.
915	 */
916	extent_insert_and_optimize(ex, newstart, size, last, myrp);
917	*result = newstart;
918	return (0);
919}
920
921int
922extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
923    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
924    u_long *result)
925{
926	struct extent_region *rp;
927
928	/*
929	 * Allocate the region descriptor.  It will be freed later
930	 * if we can coalesce with another region.
931	 */
932	rp = extent_alloc_region_descriptor(ex, flags);
933	if (rp == NULL) {
934#ifdef DIAGNOSTIC
935		printf("%s: can't allocate region descriptor\n", __func__);
936#endif
937		return (ENOMEM);
938	}
939
940	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
941	    boundary, flags, rp, result);
942}
943
944int
945extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
946    u_long subend, u_long size, u_long alignment, u_long skew,
947    u_long boundary, int flags, struct extent_region *rp, u_long *result)
948{
949#ifdef DIAGNOSTIC
950	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
951		panic("%s: EX_NOCOALESCE not set", __func__);
952#endif
953
954	rp->er_flags = ER_DISCARD;
955	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
956	    boundary, flags, rp, result);
957}
958
959int
960extent_free(struct extent *ex, u_long start, u_long size, int flags)
961{
962	struct extent_region *rp, *nrp = NULL;
963	u_long end = start + (size - 1);
964	int exflags;
965
966#ifdef DIAGNOSTIC
967	/* Check arguments. */
968	if (ex == NULL)
969		panic("%s: NULL extent", __func__);
970	if ((start < ex->ex_start) || (end > ex->ex_end)) {
971		extent_print(ex);
972		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
973		    __func__, ex->ex_name, start, size);
974		panic("%s: extent `%s', region not within extent",
975		    __func__, ex->ex_name);
976	}
977	/* Check for an overflow. */
978	if (end < start) {
979		extent_print(ex);
980		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
981		    __func__, ex->ex_name, start, size);
982		panic("%s: overflow", __func__);
983	}
984#endif
985
986	/*
987	 * If we're allowing coalescing, we must allocate a region
988	 * descriptor now, since it might block.
989	 *
990	 * XXX Make a static, create-time flags word, so we don't
991	 * XXX have to lock to read it!
992	 */
993	exflags = ex->ex_flags;
994
995	if ((exflags & EXF_NOCOALESCE) == 0) {
996		/* Allocate a region descriptor. */
997		nrp = extent_alloc_region_descriptor(ex, flags);
998		if (nrp == NULL)
999			return (ENOMEM);
1000	}
1001
1002	/*
1003	 * Find region and deallocate.  Several possibilities:
1004	 *
1005	 *	1. (start == er_start) && (end == er_end):
1006	 *	   Free descriptor.
1007	 *
1008	 *	2. (start == er_start) && (end < er_end):
1009	 *	   Adjust er_start.
1010	 *
1011	 *	3. (start > er_start) && (end == er_end):
1012	 *	   Adjust er_end.
1013	 *
1014	 *	4. (start > er_start) && (end < er_end):
1015	 *	   Fragment region.  Requires descriptor alloc.
1016	 *
1017	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1018	 * is not set.
1019	 */
1020	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1021		/*
1022		 * Save ourselves some comparisons; does the current
1023		 * region end before chunk to be freed begins?  If so,
1024		 * then we haven't found the appropriate region descriptor.
1025		 */
1026		if (rp->er_end < start)
1027			continue;
1028
1029		/*
1030		 * Save ourselves some traversal; does the current
1031		 * region begin after the chunk to be freed ends?  If so,
1032		 * then we've already passed any possible region descriptors
1033		 * that might have contained the chunk to be freed.
1034		 */
1035		if (rp->er_start > end)
1036			break;
1037
1038		/* Case 1. */
1039		if ((start == rp->er_start) && (end == rp->er_end)) {
1040			LIST_REMOVE(rp, er_link);
1041			extent_free_region_descriptor(ex, rp);
1042			goto done;
1043		}
1044
1045		/*
1046		 * The following cases all require that EXF_NOCOALESCE
1047		 * is not set.
1048		 */
1049		if (ex->ex_flags & EXF_NOCOALESCE)
1050			continue;
1051
1052		/* Case 2. */
1053		if ((start == rp->er_start) && (end < rp->er_end)) {
1054			rp->er_start = (end + 1);
1055			goto done;
1056		}
1057
1058		/* Case 3. */
1059		if ((start > rp->er_start) && (end == rp->er_end)) {
1060			rp->er_end = (start - 1);
1061			goto done;
1062		}
1063
1064		/* Case 4. */
1065		if ((start > rp->er_start) && (end < rp->er_end)) {
1066			/* Fill in new descriptor. */
1067			nrp->er_start = end + 1;
1068			nrp->er_end = rp->er_end;
1069
1070			/* Adjust current descriptor. */
1071			rp->er_end = start - 1;
1072
1073			/* Insert new descriptor after current. */
1074			LIST_INSERT_AFTER(rp, nrp, er_link);
1075
1076			/* We used the new descriptor, so don't free it below */
1077			nrp = NULL;
1078			goto done;
1079		}
1080	}
1081
1082	/* Region not found, or request otherwise invalid. */
1083#if defined(DIAGNOSTIC) || defined(DDB)
1084	extent_print(ex);
1085#endif
1086	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1087	panic("%s: region not found", __func__);
1088
1089 done:
1090	if (nrp != NULL)
1091		extent_free_region_descriptor(ex, nrp);
1092	if (ex->ex_flags & EXF_WANTED) {
1093		ex->ex_flags &= ~EXF_WANTED;
1094		wakeup(ex);
1095	}
1096	return (0);
1097}
1098
1099static struct extent_region *
1100extent_alloc_region_descriptor(struct extent *ex, int flags)
1101{
1102	struct extent_region *rp;
1103
1104	if (ex->ex_flags & EXF_FIXED) {
1105		struct extent_fixed *fex = (struct extent_fixed *)ex;
1106
1107		while (LIST_EMPTY(&fex->fex_freelist)) {
1108			if (flags & EX_MALLOCOK)
1109				goto alloc;
1110
1111			if ((flags & EX_WAITOK) == 0)
1112				return (NULL);
1113			ex->ex_flags |= EXF_FLWANTED;
1114			if (tsleep(&fex->fex_freelist,
1115			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1116			    "extnt", 0))
1117				return (NULL);
1118		}
1119		rp = LIST_FIRST(&fex->fex_freelist);
1120		LIST_REMOVE(rp, er_link);
1121
1122		/*
1123		 * Don't muck with flags after pulling it off the
1124		 * freelist; it may be a dynamically allocated
1125		 * region pointer that was kindly given to us,
1126		 * and we need to preserve that information.
1127		 */
1128
1129		return (rp);
1130	}
1131
1132 alloc:
1133	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1134	    PR_NOWAIT);
1135	if (rp != NULL)
1136		rp->er_flags = ER_ALLOC;
1137
1138	return (rp);
1139}
1140
1141static void
1142extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1143{
1144	if (rp->er_flags & ER_DISCARD)
1145		return;
1146
1147	if (ex->ex_flags & EXF_FIXED) {
1148		struct extent_fixed *fex = (struct extent_fixed *)ex;
1149
1150		/*
1151		 * If someone's waiting for a region descriptor,
1152		 * be nice and give them this one, rather than
1153		 * just free'ing it back to the system.
1154		 */
1155		if (rp->er_flags & ER_ALLOC) {
1156			if (ex->ex_flags & EXF_FLWANTED) {
1157				/* Clear all but ER_ALLOC flag. */
1158				rp->er_flags = ER_ALLOC;
1159				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1160				    er_link);
1161				goto wake_em_up;
1162			} else {
1163				pool_put(&ex_region_pl, rp);
1164			}
1165		} else {
1166			/* Clear all flags. */
1167			rp->er_flags = 0;
1168			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1169		}
1170
1171		if (ex->ex_flags & EXF_FLWANTED) {
1172 wake_em_up:
1173			ex->ex_flags &= ~EXF_FLWANTED;
1174			wakeup(&fex->fex_freelist);
1175		}
1176		return;
1177	}
1178
1179	/*
1180	 * We know it's dynamically allocated if we get here.
1181	 */
1182	pool_put(&ex_region_pl, rp);
1183}
1184
1185
1186#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1187
1188void
1189extent_print(struct extent *ex)
1190{
1191	extent_print1(ex, printf);
1192}
1193
1194void
1195extent_print1(struct extent *ex,
1196    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1197{
1198	struct extent_region *rp;
1199
1200	if (ex == NULL)
1201		panic("%s: NULL extent", __func__);
1202
1203#ifdef _KERNEL
1204	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1205	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1206#else
1207	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1208	    ex->ex_start, ex->ex_end, ex->ex_flags);
1209#endif
1210
1211	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1212		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1213}
1214#endif
1215