subr_extent.c revision 1.45
1/*	$OpenBSD: subr_extent.c,v 1.45 2011/01/05 13:36:19 fgsch Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/pool.h>
46#include <ddb/db_output.h>
47#else
48/*
49 * user-land definitions, so it can fit into a testing harness.
50 */
51#include <sys/param.h>
52#include <sys/pool.h>
53#include <sys/extent.h>
54#include <sys/queue.h>
55#include <errno.h>
56#include <stdlib.h>
57#include <stdio.h>
58#include <string.h>
59
60#define	malloc(s, t, flags)		malloc(s)
61#define	free(p, t)			free(p)
62#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
63#define	wakeup(chan)			((void)0)
64#define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
65#define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
66#define	pool_setipl(pool, ipl)		/* nothing */
67#define	pool_put(pool, rp)		free((rp), 0)
68#define	panic				printf
69#endif
70
71#if defined(DIAGNOSTIC) || defined(DDB)
72void	extent_print1(struct extent *, int (*)(const char *, ...));
73#endif
74
75static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
76	    struct extent_region *, struct extent_region *);
77static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
78static	void extent_free_region_descriptor(struct extent *,
79	    struct extent_region *);
80
81/*
82 * Shortcut to align to an arbitrary power-of-two boundary.
83 */
84static __inline__ u_long
85extent_align(u_long start, u_long align, u_long skew)
86{
87	return ((((start - skew) + (align - 1)) & (-align)) + skew);
88}
89
90
91#if defined(DIAGNOSTIC) || defined(DDB)
92/*
93 * Register the extent on a doubly linked list.
94 * Should work, no?
95 */
96static LIST_HEAD(listhead, extent) ext_list;
97static	void extent_register(struct extent *);
98
99static void
100extent_register(struct extent *ex)
101{
102#ifdef DIAGNOSTIC
103	struct extent *ep;
104#endif
105	static int initialized;
106
107	if (!initialized){
108		LIST_INIT(&ext_list);
109		initialized = 1;
110	}
111
112#ifdef DIAGNOSTIC
113	LIST_FOREACH(ep, &ext_list, ex_link) {
114		if (ep == ex)
115			panic("%s: already registered", __func__);
116	}
117#endif
118
119	/* Insert into list */
120	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
121}
122#endif	/* DIAGNOSTIC || DDB */
123
124struct pool ex_region_pl;
125
126static void
127extent_pool_init(void)
128{
129	static int inited;
130
131	if (!inited) {
132		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
133		    "extentpl", NULL);
134		pool_setipl(&ex_region_pl, IPL_VM);
135		inited = 1;
136	}
137}
138
139#ifdef DDB
140/*
141 * Print out all extents registered.  This is used in
142 * DDB show extents
143 */
144void
145extent_print_all(void)
146{
147	struct extent *ep;
148
149	LIST_FOREACH(ep, &ext_list, ex_link) {
150		extent_print1(ep, db_printf);
151	}
152}
153#endif
154
155/*
156 * Allocate and initialize an extent map.
157 */
158struct extent *
159extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
160    size_t storagesize, int flags)
161{
162	struct extent *ex;
163	caddr_t cp = storage;
164	size_t sz = storagesize;
165	struct extent_region *rp;
166	int fixed_extent = (storage != NULL);
167
168#ifdef DIAGNOSTIC
169	/* Check arguments. */
170	if (name == NULL)
171		panic("%s: name == NULL", __func__);
172	if (end < start) {
173		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
174		    __func__, name, start, end);
175		panic("%s: end < start", __func__);
176	}
177	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
178		panic("%s: fixed extent, bad storagesize 0x%lx",
179		    __func__, (u_long)storagesize);
180	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
181		panic("%s: storage provided for non-fixed", __func__);
182#endif
183
184	extent_pool_init();
185
186	/* Allocate extent descriptor. */
187	if (fixed_extent) {
188		struct extent_fixed *fex;
189
190		bzero(storage, storagesize);
191
192		/*
193		 * Align all descriptors on "long" boundaries.
194		 */
195		fex = (struct extent_fixed *)cp;
196		ex = (struct extent *)fex;
197		cp += ALIGN(sizeof(struct extent_fixed));
198		sz -= ALIGN(sizeof(struct extent_fixed));
199		fex->fex_storage = storage;
200		fex->fex_storagesize = storagesize;
201
202		/*
203		 * In a fixed extent, we have to pre-allocate region
204		 * descriptors and place them in the extent's freelist.
205		 */
206		LIST_INIT(&fex->fex_freelist);
207		while (sz >= ALIGN(sizeof(struct extent_region))) {
208			rp = (struct extent_region *)cp;
209			cp += ALIGN(sizeof(struct extent_region));
210			sz -= ALIGN(sizeof(struct extent_region));
211			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
212		}
213	} else {
214		ex = (struct extent *)malloc(sizeof(struct extent),
215		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
216		if (ex == NULL)
217			return (NULL);
218	}
219
220	/* Fill in the extent descriptor and return it to the caller. */
221	LIST_INIT(&ex->ex_regions);
222	ex->ex_name = name;
223	ex->ex_start = start;
224	ex->ex_end = end;
225	ex->ex_mtype = mtype;
226	ex->ex_flags = 0;
227	if (fixed_extent)
228		ex->ex_flags |= EXF_FIXED;
229	if (flags & EX_NOCOALESCE)
230		ex->ex_flags |= EXF_NOCOALESCE;
231
232	if (flags & EX_FILLED) {
233		rp = extent_alloc_region_descriptor(ex, flags);
234		if (rp == NULL) {
235			if (!fixed_extent)
236				free(ex, mtype);
237			return (NULL);
238		}
239		rp->er_start = start;
240		rp->er_end = end;
241		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
242	}
243
244#if defined(DIAGNOSTIC) || defined(DDB)
245	extent_register(ex);
246#endif
247	return (ex);
248}
249
250/*
251 * Destroy an extent map.
252 */
253void
254extent_destroy(struct extent *ex)
255{
256	struct extent_region *rp, *orp;
257
258#ifdef DIAGNOSTIC
259	/* Check arguments. */
260	if (ex == NULL)
261		panic("%s: NULL extent", __func__);
262#endif
263
264	/* Free all region descriptors in extent. */
265	for (rp = LIST_FIRST(&ex->ex_regions);
266	    rp != LIST_END(&ex->ex_regions); ) {
267		orp = rp;
268		rp = LIST_NEXT(rp, er_link);
269		LIST_REMOVE(orp, er_link);
270		extent_free_region_descriptor(ex, orp);
271	}
272
273#if defined(DIAGNOSTIC) || defined(DDB)
274	/* Remove from the list of all extents. */
275	LIST_REMOVE(ex, ex_link);
276#endif
277
278	/* If we're not a fixed extent, free the extent descriptor itself. */
279	if ((ex->ex_flags & EXF_FIXED) == 0)
280		free(ex, ex->ex_mtype);
281}
282
283/*
284 * Insert a region descriptor into the sorted region list after the
285 * entry "after" or at the head of the list (if "after" is NULL).
286 * The region descriptor we insert is passed in "rp".  We must
287 * allocate the region descriptor before calling this function!
288 * If we don't need the region descriptor, it will be freed here.
289 */
290static void
291extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
292    struct extent_region *after, struct extent_region *rp)
293{
294	struct extent_region *nextr;
295	int appended = 0;
296
297	if (after == NULL) {
298		/*
299		 * We're the first in the region list.  If there's
300		 * a region after us, attempt to coalesce to save
301		 * descriptor overhead.
302		 */
303		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
304		    !LIST_EMPTY(&ex->ex_regions) &&
305		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
306			/*
307			 * We can coalesce.  Prepend us to the first region.
308			 */
309			LIST_FIRST(&ex->ex_regions)->er_start = start;
310			extent_free_region_descriptor(ex, rp);
311			return;
312		}
313
314		/*
315		 * Can't coalesce.  Fill in the region descriptor
316		 * in, and insert us at the head of the region list.
317		 */
318		rp->er_start = start;
319		rp->er_end = start + (size - 1);
320		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
321		return;
322	}
323
324	/*
325	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
326	 */
327	if (ex->ex_flags & EXF_NOCOALESCE)
328		goto cant_coalesce;
329
330	/*
331	 * Attempt to coalesce with the region before us.
332	 */
333	if ((after->er_end + 1) == start) {
334		/*
335		 * We can coalesce.  Append ourselves and make
336		 * note of it.
337		 */
338		after->er_end = start + (size - 1);
339		appended = 1;
340	}
341
342	/*
343	 * Attempt to coalesce with the region after us.
344	 */
345	if (LIST_NEXT(after, er_link) != NULL &&
346	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
347		/*
348		 * We can coalesce.  Note that if we appended ourselves
349		 * to the previous region, we exactly fit the gap, and
350		 * can free the "next" region descriptor.
351		 */
352		if (appended) {
353			/*
354			 * Yup, we can free it up.
355			 */
356			after->er_end = LIST_NEXT(after, er_link)->er_end;
357			nextr = LIST_NEXT(after, er_link);
358			LIST_REMOVE(nextr, er_link);
359			extent_free_region_descriptor(ex, nextr);
360		} else {
361			/*
362			 * Nope, just prepend us to the next region.
363			 */
364			LIST_NEXT(after, er_link)->er_start = start;
365		}
366
367		extent_free_region_descriptor(ex, rp);
368		return;
369	}
370
371	/*
372	 * We weren't able to coalesce with the next region, but
373	 * we don't need to allocate a region descriptor if we
374	 * appended ourselves to the previous region.
375	 */
376	if (appended) {
377		extent_free_region_descriptor(ex, rp);
378		return;
379	}
380
381 cant_coalesce:
382
383	/*
384	 * Fill in the region descriptor and insert ourselves
385	 * into the region list.
386	 */
387	rp->er_start = start;
388	rp->er_end = start + (size - 1);
389	LIST_INSERT_AFTER(after, rp, er_link);
390}
391
392/*
393 * Allocate a specific region in an extent map.
394 */
395int
396extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
397{
398	struct extent_region *rp, *last, *myrp;
399	u_long end = start + (size - 1);
400	int error;
401
402#ifdef DIAGNOSTIC
403	/* Check arguments. */
404	if (ex == NULL)
405		panic("%s: NULL extent", __func__);
406	if (size < 1) {
407		printf("%s: extent `%s', size 0x%lx\n",
408		    __func__, ex->ex_name, size);
409		panic("%s: bad size", __func__);
410	}
411	if (end < start) {
412		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
413		    __func__, ex->ex_name, start, size);
414		panic("%s: overflow", __func__);
415	}
416	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
417		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
418		    "are mutually exclusive", __func__);
419#endif
420
421	/*
422	 * Make sure the requested region lies within the
423	 * extent.
424	 */
425	if ((start < ex->ex_start) || (end > ex->ex_end)) {
426#ifdef DIAGNOSTIC
427		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
428		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
429		printf("%s: start 0x%lx, end 0x%lx\n",
430		    __func__, start, end);
431		panic("%s: region lies outside extent", __func__);
432#else
433		return (EINVAL);
434#endif
435	}
436
437	/*
438	 * Allocate the region descriptor.  It will be freed later
439	 * if we can coalesce with another region.
440	 */
441	myrp = extent_alloc_region_descriptor(ex, flags);
442	if (myrp == NULL) {
443#ifdef DIAGNOSTIC
444		printf(
445		    "%s: can't allocate region descriptor\n", __func__);
446#endif
447		return (ENOMEM);
448	}
449
450 alloc_start:
451	/*
452	 * Attempt to place ourselves in the desired area of the
453	 * extent.  We save ourselves some work by keeping the list sorted.
454	 * In other words, if the start of the current region is greater
455	 * than the end of our region, we don't have to search any further.
456	 */
457
458	/*
459	 * Keep a pointer to the last region we looked at so
460	 * that we don't have to traverse the list again when
461	 * we insert ourselves.  If "last" is NULL when we
462	 * finally insert ourselves, we go at the head of the
463	 * list.  See extent_insert_and_optimize() for details.
464	 */
465	last = NULL;
466
467	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
468		if (rp->er_start > end) {
469			/*
470			 * We lie before this region and don't
471			 * conflict.
472			 */
473			break;
474		}
475
476		/*
477		 * The current region begins before we end.
478		 * Check for a conflict.
479		 */
480		if (rp->er_end >= start) {
481			/*
482			 * We conflict.  If we can (and want to) wait,
483			 * do so.
484			 */
485			if (flags & EX_WAITSPACE) {
486				ex->ex_flags |= EXF_WANTED;
487				error = tsleep(ex,
488				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
489				    "extnt", 0);
490				if (error)
491					return (error);
492				goto alloc_start;
493			}
494
495			/*
496			 * If we tolerate conflicts adjust things such
497			 * that all space in the requested region is
498			 * allocated.
499			 */
500			if (flags & EX_CONFLICTOK) {
501				/*
502				 * There are four possibilities:
503				 *
504				 * 1. The current region overlaps with
505				 *    the start of the requested region.
506				 *    Adjust the requested region to
507				 *    start at the end of the current
508				 *    region and try again.
509				 *
510				 * 2. The current region falls
511				 *    completely within the requested
512				 *    region.  Free the current region
513				 *    and try again.
514				 *
515				 * 3. The current region overlaps with
516				 *    the end of the requested region.
517				 *    Adjust the requested region to
518				 *    end at the start of the current
519				 *    region and try again.
520				 *
521				 * 4. The requested region falls
522				 *    completely within the current
523				 *    region.  We're done.
524				 */
525				if (rp->er_start <= start) {
526					if (rp->er_end < ex->ex_end) {
527						start = rp->er_end + 1;
528						size = end - start + 1;
529						goto alloc_start;
530					}
531				} else if (rp->er_end < end) {
532					LIST_REMOVE(rp, er_link);
533					extent_free_region_descriptor(ex, rp);
534					goto alloc_start;
535				} else if (rp->er_start < end) {
536					if (rp->er_start > ex->ex_start) {
537						end = rp->er_start - 1;
538						size = end - start + 1;
539						goto alloc_start;
540					}
541				}
542				return (0);
543			}
544
545			extent_free_region_descriptor(ex, myrp);
546			return (EAGAIN);
547		}
548		/*
549		 * We don't conflict, but this region lies before
550		 * us.  Keep a pointer to this region, and keep
551		 * trying.
552		 */
553		last = rp;
554	}
555
556	/*
557	 * We don't conflict with any regions.  "last" points
558	 * to the region we fall after, or is NULL if we belong
559	 * at the beginning of the region list.  Insert ourselves.
560	 */
561	extent_insert_and_optimize(ex, start, size, last, myrp);
562	return (0);
563}
564
565/*
566 * Macro to check (x + y) <= z.  This check is designed to fail
567 * if an overflow occurs.
568 */
569#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
570
571/*
572 * Allocate a region in an extent map subregion.
573 *
574 * If EX_FAST is specified, we return the first fit in the map.
575 * Otherwise, we try to minimize fragmentation by finding the
576 * smallest gap that will hold the request.
577 *
578 * The allocated region is aligned to "alignment", which must be
579 * a power of 2.
580 */
581int
582extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
583    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
584    u_long *result)
585{
586	struct extent_region *rp, *myrp, *last, *bestlast;
587	u_long newstart, newend, exend, beststart, bestovh, ovh;
588	u_long dontcross;
589	int error;
590
591#ifdef DIAGNOSTIC
592	/* Check arguments. */
593	if (ex == NULL)
594		panic("%s: NULL extent", __func__);
595	if (result == NULL)
596		panic("%s: NULL result pointer", __func__);
597	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
598	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
599		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
600		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
601		printf("%s: substart 0x%lx, subend 0x%lx\n",
602		    __func__, substart, subend);
603		panic("%s: bad subregion", __func__);
604	}
605	if ((size < 1) || ((size - 1) > (subend - substart))) {
606		printf("%s: extent `%s', size 0x%lx\n",
607		    __func__, ex->ex_name, size);
608		panic("%s: bad size", __func__);
609	}
610	if (alignment == 0)
611		panic("%s: bad alignment", __func__);
612	if (boundary && (boundary < size)) {
613		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
614		    __func__, ex->ex_name, size, boundary);
615		panic("%s: bad boundary", __func__);
616	}
617#endif
618
619	/*
620	 * Allocate the region descriptor.  It will be freed later
621	 * if we can coalesce with another region.
622	 */
623	myrp = extent_alloc_region_descriptor(ex, flags);
624	if (myrp == NULL) {
625#ifdef DIAGNOSTIC
626		printf("%s: can't allocate region descriptor\n", __func__);
627#endif
628		return (ENOMEM);
629	}
630
631 alloc_start:
632	/*
633	 * Keep a pointer to the last region we looked at so
634	 * that we don't have to traverse the list again when
635	 * we insert ourselves.  If "last" is NULL when we
636	 * finally insert ourselves, we go at the head of the
637	 * list.  See extent_insert_and_optimize() for details.
638	 */
639	last = NULL;
640
641	/*
642	 * Keep track of size and location of the smallest
643	 * chunk we fit in.
644	 *
645	 * Since the extent can be as large as the numeric range
646	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
647	 * best overhead value can be the maximum unsigned integer.
648	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
649	 * into the region list immediately on an exact match (which
650	 * is the only case where "bestovh" would be set to 0).
651	 */
652	bestovh = 0;
653	beststart = 0;
654	bestlast = NULL;
655
656	/*
657	 * Keep track of end of free region.  This is either the end of extent
658	 * or the start of a region past the subend.
659	 */
660	exend = ex->ex_end;
661
662	/*
663	 * For N allocated regions, we must make (N + 1)
664	 * checks for unallocated space.  The first chunk we
665	 * check is the area from the beginning of the subregion
666	 * to the first allocated region after that point.
667	 */
668	newstart = extent_align(substart, alignment, skew);
669	if (newstart < ex->ex_start) {
670#ifdef DIAGNOSTIC
671		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
672		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
673		    alignment);
674		panic("%s: overflow after alignment", __func__);
675#else
676		extent_free_region_descriptor(ex, myrp);
677		return (EINVAL);
678#endif
679	}
680
681	/*
682	 * Find the first allocated region that begins on or after
683	 * the subregion start, advancing the "last" pointer along
684	 * the way.
685	 */
686	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
687		if (rp->er_start >= newstart)
688			break;
689		last = rp;
690	}
691
692	/*
693	 * Relocate the start of our candidate region to the end of
694	 * the last allocated region (if there was one overlapping
695	 * our subrange).
696	 */
697	if (last != NULL && last->er_end >= newstart)
698		newstart = extent_align((last->er_end + 1), alignment, skew);
699
700	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
701		/*
702		 * If the region pasts the subend, bail out and see
703		 * if we fit against the subend.
704		 */
705		if (rp->er_start > subend) {
706			exend = rp->er_start;
707			break;
708		}
709
710		/*
711		 * Check the chunk before "rp".  Note that our
712		 * comparison is safe from overflow conditions.
713		 */
714		if (LE_OV(newstart, size, rp->er_start)) {
715			/*
716			 * Do a boundary check, if necessary.  Note
717			 * that a region may *begin* on the boundary,
718			 * but it must end before the boundary.
719			 */
720			if (boundary) {
721				newend = newstart + (size - 1);
722
723				/*
724				 * Calculate the next boundary after the start
725				 * of this region.
726				 */
727				dontcross = extent_align(newstart+1, boundary,
728				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
729				    - 1;
730
731#if 0
732				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
733				    newstart, newend, ex->ex_start, ex->ex_end,
734				    boundary, dontcross);
735#endif
736
737				/* Check for overflow */
738				if (dontcross < ex->ex_start)
739					dontcross = ex->ex_end;
740				else if (newend > dontcross) {
741					/*
742					 * Candidate region crosses boundary.
743					 * Throw away the leading part and see
744					 * if we still fit.
745					 */
746					newstart = dontcross + 1;
747					newend = newstart + (size - 1);
748					dontcross += boundary;
749					if (!LE_OV(newstart, size, rp->er_start))
750						goto skip;
751				}
752
753				/*
754				 * If we run past the end of
755				 * the extent or the boundary
756				 * overflows, then the request
757				 * can't fit.
758				 */
759				if (newstart + size - 1 > ex->ex_end ||
760				    dontcross < newstart)
761					goto fail;
762			}
763
764			/*
765			 * We would fit into this space.  Calculate
766			 * the overhead (wasted space).  If we exactly
767			 * fit, or we're taking the first fit, insert
768			 * ourselves into the region list.
769			 */
770			ovh = rp->er_start - newstart - size;
771			if ((flags & EX_FAST) || (ovh == 0))
772				goto found;
773
774			/*
775			 * Don't exactly fit, but check to see
776			 * if we're better than any current choice.
777			 */
778			if ((bestovh == 0) || (ovh < bestovh)) {
779				bestovh = ovh;
780				beststart = newstart;
781				bestlast = last;
782			}
783		}
784
785skip:
786		/*
787		 * Skip past the current region and check again.
788		 */
789		newstart = extent_align((rp->er_end + 1), alignment, skew);
790		if (newstart < rp->er_end) {
791			/*
792			 * Overflow condition.  Don't error out, since
793			 * we might have a chunk of space that we can
794			 * use.
795			 */
796			goto fail;
797		}
798
799		last = rp;
800	}
801
802	/*
803	 * The final check is from the current starting point to the
804	 * end of the subregion.  If there were no allocated regions,
805	 * "newstart" is set to the beginning of the subregion, or
806	 * just past the end of the last allocated region, adjusted
807	 * for alignment in either case.
808	 */
809	if (LE_OV(newstart, (size - 1), subend)) {
810		/*
811		 * Do a boundary check, if necessary.  Note
812		 * that a region may *begin* on the boundary,
813		 * but it must end before the boundary.
814		 */
815		if (boundary) {
816			newend = newstart + (size - 1);
817
818			/*
819			 * Calculate the next boundary after the start
820			 * of this region.
821			 */
822			dontcross = extent_align(newstart+1, boundary,
823			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
824			    - 1;
825
826#if 0
827			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
828			    newstart, newend, ex->ex_start, ex->ex_end,
829			    boundary, dontcross);
830#endif
831
832			/* Check for overflow */
833			if (dontcross < ex->ex_start)
834				dontcross = ex->ex_end;
835			else if (newend > dontcross) {
836				/*
837				 * Candidate region crosses boundary.
838				 * Throw away the leading part and see
839				 * if we still fit.
840				 */
841				newstart = dontcross + 1;
842				newend = newstart + (size - 1);
843				dontcross += boundary;
844				if (!LE_OV(newstart, (size - 1), subend))
845					goto fail;
846			}
847
848			/*
849			 * If we run past the end of
850			 * the extent or the boundary
851			 * overflows, then the request
852			 * can't fit.
853			 */
854			if (newstart + size - 1 > ex->ex_end ||
855			    dontcross < newstart)
856				goto fail;
857		}
858
859		/*
860		 * We would fit into this space.  Calculate
861		 * the overhead (wasted space).  If we exactly
862		 * fit, or we're taking the first fit, insert
863		 * ourselves into the region list.
864		 */
865		ovh = exend - newstart - (size - 1);
866		if ((flags & EX_FAST) || (ovh == 0))
867			goto found;
868
869		/*
870		 * Don't exactly fit, but check to see
871		 * if we're better than any current choice.
872		 */
873		if ((bestovh == 0) || (ovh < bestovh)) {
874			bestovh = ovh;
875			beststart = newstart;
876			bestlast = last;
877		}
878	}
879
880 fail:
881	/*
882	 * One of the following two conditions have
883	 * occurred:
884	 *
885	 *	There is no chunk large enough to hold the request.
886	 *
887	 *	If EX_FAST was not specified, there is not an
888	 *	exact match for the request.
889	 *
890	 * Note that if we reach this point and EX_FAST is
891	 * set, then we know there is no space in the extent for
892	 * the request.
893	 */
894	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
895		/*
896		 * We have a match that's "good enough".
897		 */
898		newstart = beststart;
899		last = bestlast;
900		goto found;
901	}
902
903	/*
904	 * No space currently available.  Wait for it to free up,
905	 * if possible.
906	 */
907	if (flags & EX_WAITSPACE) {
908		ex->ex_flags |= EXF_WANTED;
909		error = tsleep(ex,
910		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
911		if (error)
912			return (error);
913		goto alloc_start;
914	}
915
916	extent_free_region_descriptor(ex, myrp);
917	return (EAGAIN);
918
919 found:
920	/*
921	 * Insert ourselves into the region list.
922	 */
923	extent_insert_and_optimize(ex, newstart, size, last, myrp);
924	*result = newstart;
925	return (0);
926}
927
928int
929extent_free(struct extent *ex, u_long start, u_long size, int flags)
930{
931	struct extent_region *rp, *nrp = NULL;
932	u_long end = start + (size - 1);
933	int exflags;
934
935#ifdef DIAGNOSTIC
936	/* Check arguments. */
937	if (ex == NULL)
938		panic("%s: NULL extent", __func__);
939	if ((start < ex->ex_start) || (end > ex->ex_end)) {
940		extent_print(ex);
941		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
942		    __func__, ex->ex_name, start, size);
943		panic("%s: extent `%s', region not within extent",
944		    __func__, ex->ex_name);
945	}
946	/* Check for an overflow. */
947	if (end < start) {
948		extent_print(ex);
949		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
950		    __func__, ex->ex_name, start, size);
951		panic("%s: overflow", __func__);
952	}
953#endif
954
955	/*
956	 * If we're allowing coalescing, we must allocate a region
957	 * descriptor now, since it might block.
958	 *
959	 * XXX Make a static, create-time flags word, so we don't
960	 * XXX have to lock to read it!
961	 */
962	exflags = ex->ex_flags;
963
964	if ((exflags & EXF_NOCOALESCE) == 0) {
965		/* Allocate a region descriptor. */
966		nrp = extent_alloc_region_descriptor(ex, flags);
967		if (nrp == NULL)
968			return (ENOMEM);
969	}
970
971	/*
972	 * Find region and deallocate.  Several possibilities:
973	 *
974	 *	1. (start == er_start) && (end == er_end):
975	 *	   Free descriptor.
976	 *
977	 *	2. (start == er_start) && (end < er_end):
978	 *	   Adjust er_start.
979	 *
980	 *	3. (start > er_start) && (end == er_end):
981	 *	   Adjust er_end.
982	 *
983	 *	4. (start > er_start) && (end < er_end):
984	 *	   Fragment region.  Requires descriptor alloc.
985	 *
986	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
987	 * is not set.
988	 */
989	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
990		/*
991		 * Save ourselves some comparisons; does the current
992		 * region end before chunk to be freed begins?  If so,
993		 * then we haven't found the appropriate region descriptor.
994		 */
995		if (rp->er_end < start)
996			continue;
997
998		/*
999		 * Save ourselves some traversal; does the current
1000		 * region begin after the chunk to be freed ends?  If so,
1001		 * then we've already passed any possible region descriptors
1002		 * that might have contained the chunk to be freed.
1003		 */
1004		if (rp->er_start > end)
1005			break;
1006
1007		/* Case 1. */
1008		if ((start == rp->er_start) && (end == rp->er_end)) {
1009			LIST_REMOVE(rp, er_link);
1010			extent_free_region_descriptor(ex, rp);
1011			goto done;
1012		}
1013
1014		/*
1015		 * The following cases all require that EXF_NOCOALESCE
1016		 * is not set.
1017		 */
1018		if (ex->ex_flags & EXF_NOCOALESCE)
1019			continue;
1020
1021		/* Case 2. */
1022		if ((start == rp->er_start) && (end < rp->er_end)) {
1023			rp->er_start = (end + 1);
1024			goto done;
1025		}
1026
1027		/* Case 3. */
1028		if ((start > rp->er_start) && (end == rp->er_end)) {
1029			rp->er_end = (start - 1);
1030			goto done;
1031		}
1032
1033		/* Case 4. */
1034		if ((start > rp->er_start) && (end < rp->er_end)) {
1035			/* Fill in new descriptor. */
1036			nrp->er_start = end + 1;
1037			nrp->er_end = rp->er_end;
1038
1039			/* Adjust current descriptor. */
1040			rp->er_end = start - 1;
1041
1042			/* Insert new descriptor after current. */
1043			LIST_INSERT_AFTER(rp, nrp, er_link);
1044
1045			/* We used the new descriptor, so don't free it below */
1046			nrp = NULL;
1047			goto done;
1048		}
1049	}
1050
1051	/* Region not found, or request otherwise invalid. */
1052#if defined(DIAGNOSTIC) || defined(DDB)
1053	extent_print(ex);
1054#endif
1055	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1056	panic("%s: region not found", __func__);
1057
1058 done:
1059	if (nrp != NULL)
1060		extent_free_region_descriptor(ex, nrp);
1061	if (ex->ex_flags & EXF_WANTED) {
1062		ex->ex_flags &= ~EXF_WANTED;
1063		wakeup(ex);
1064	}
1065	return (0);
1066}
1067
1068static struct extent_region *
1069extent_alloc_region_descriptor(struct extent *ex, int flags)
1070{
1071	struct extent_region *rp;
1072
1073	if (ex->ex_flags & EXF_FIXED) {
1074		struct extent_fixed *fex = (struct extent_fixed *)ex;
1075
1076		while (LIST_EMPTY(&fex->fex_freelist)) {
1077			if (flags & EX_MALLOCOK)
1078				goto alloc;
1079
1080			if ((flags & EX_WAITOK) == 0)
1081				return (NULL);
1082			ex->ex_flags |= EXF_FLWANTED;
1083			if (tsleep(&fex->fex_freelist,
1084			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1085			    "extnt", 0))
1086				return (NULL);
1087		}
1088		rp = LIST_FIRST(&fex->fex_freelist);
1089		LIST_REMOVE(rp, er_link);
1090
1091		/*
1092		 * Don't muck with flags after pulling it off the
1093		 * freelist; it may be a dynamically allocated
1094		 * region pointer that was kindly given to us,
1095		 * and we need to preserve that information.
1096		 */
1097
1098		return (rp);
1099	}
1100
1101 alloc:
1102	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1103	    PR_NOWAIT);
1104	if (rp != NULL)
1105		rp->er_flags = ER_ALLOC;
1106
1107	return (rp);
1108}
1109
1110static void
1111extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1112{
1113	if (ex->ex_flags & EXF_FIXED) {
1114		struct extent_fixed *fex = (struct extent_fixed *)ex;
1115
1116		/*
1117		 * If someone's waiting for a region descriptor,
1118		 * be nice and give them this one, rather than
1119		 * just free'ing it back to the system.
1120		 */
1121		if (rp->er_flags & ER_ALLOC) {
1122			if (ex->ex_flags & EXF_FLWANTED) {
1123				/* Clear all but ER_ALLOC flag. */
1124				rp->er_flags = ER_ALLOC;
1125				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1126				    er_link);
1127				goto wake_em_up;
1128			} else {
1129				pool_put(&ex_region_pl, rp);
1130			}
1131		} else {
1132			/* Clear all flags. */
1133			rp->er_flags = 0;
1134			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1135		}
1136
1137		if (ex->ex_flags & EXF_FLWANTED) {
1138 wake_em_up:
1139			ex->ex_flags &= ~EXF_FLWANTED;
1140			wakeup(&fex->fex_freelist);
1141		}
1142		return;
1143	}
1144
1145	/*
1146	 * We know it's dynamically allocated if we get here.
1147	 */
1148	pool_put(&ex_region_pl, rp);
1149}
1150
1151
1152#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1153
1154void
1155extent_print(struct extent *ex)
1156{
1157	extent_print1(ex, printf);
1158}
1159
1160void
1161extent_print1(struct extent *ex, int (*pr)(const char *, ...))
1162{
1163	struct extent_region *rp;
1164
1165	if (ex == NULL)
1166		panic("%s: NULL extent", __func__);
1167
1168#ifdef _KERNEL
1169	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1170	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1171#else
1172	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1173	    ex->ex_start, ex->ex_end, ex->ex_flags);
1174#endif
1175
1176	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1177		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1178}
1179#endif
1180