1/*	$OpenBSD: subr_extent.c,v 1.65 2024/01/19 22:12:24 kettenis Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/systm.h>
42#include <sys/queue.h>
43#include <sys/pool.h>
44#include <ddb/db_output.h>
45#else
46/*
47 * user-land definitions, so it can fit into a testing harness.
48 */
49#include <sys/param.h>
50#include <sys/extent.h>
51#include <sys/queue.h>
52#include <errno.h>
53#include <err.h>
54#include <stdlib.h>
55#include <stdio.h>
56#include <string.h>
57
58#define	malloc(s, t, flags)		malloc(s)
59#define	free(p, t, s)			free(p)
60
61#define	tsleep_nsec(c, p, s, t)		(EWOULDBLOCK)
62#define	wakeup(chan)			((void)0)
63
64struct pool {
65	size_t pr_size;
66};
67
68#define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
69#define	pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
70#define	pool_put(pp, rp)		free((rp), 0, 0)
71
72#define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
73#endif
74
75#if defined(DIAGNOSTIC) || defined(DDB)
76void	extent_print1(struct extent *, int (*)(const char *, ...)
77	    __attribute__((__format__(__kprintf__,1,2))));
78#endif
79
80static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
81	    struct extent_region *, struct extent_region *);
82static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
83static	void extent_free_region_descriptor(struct extent *,
84	    struct extent_region *);
85int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
86	    u_long, u_long, int, struct extent_region *, u_long *);
87
88/*
89 * Shortcut to align to an arbitrary power-of-two boundary.
90 */
91static __inline__ u_long
92extent_align(u_long start, u_long align, u_long skew)
93{
94	return ((((start - skew) + (align - 1)) & (-align)) + skew);
95}
96
97
98#if defined(DIAGNOSTIC) || defined(DDB)
99/*
100 * Register the extent on a doubly linked list.
101 * Should work, no?
102 */
103static LIST_HEAD(listhead, extent) ext_list;
104static	void extent_register(struct extent *);
105
106static void
107extent_register(struct extent *ex)
108{
109#ifdef DIAGNOSTIC
110	struct extent *ep;
111#endif
112	static int initialized;
113
114	if (!initialized){
115		LIST_INIT(&ext_list);
116		initialized = 1;
117	}
118
119#ifdef DIAGNOSTIC
120	LIST_FOREACH(ep, &ext_list, ex_link) {
121		if (ep == ex)
122			panic("%s: already registered", __func__);
123	}
124#endif
125
126	/* Insert into list */
127	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
128}
129#endif	/* DIAGNOSTIC || DDB */
130
131struct pool ex_region_pl;
132
133static void
134extent_pool_init(void)
135{
136	static int inited;
137
138	if (!inited) {
139		pool_init(&ex_region_pl, sizeof(struct extent_region), 0,
140		    IPL_VM, 0, "extentpl", NULL);
141		inited = 1;
142	}
143}
144
145#ifdef DDB
146/*
147 * Print out all extents registered.  This is used in
148 * DDB show extents
149 */
150void
151extent_print_all(void)
152{
153	struct extent *ep;
154
155	LIST_FOREACH(ep, &ext_list, ex_link) {
156		extent_print1(ep, db_printf);
157	}
158}
159#endif
160
161/*
162 * Allocate and initialize an extent map.
163 */
164struct extent *
165extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
166    size_t storagesize, int flags)
167{
168	struct extent *ex;
169	caddr_t cp = storage;
170	size_t sz = storagesize;
171	struct extent_region *rp;
172	int fixed_extent = (storage != NULL);
173
174#ifdef DIAGNOSTIC
175	/* Check arguments. */
176	if (name == NULL)
177		panic("%s: name == NULL", __func__);
178	if (end < start) {
179		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
180		    __func__, name, start, end);
181		panic("%s: end < start", __func__);
182	}
183	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
184		panic("%s: fixed extent, bad storagesize 0x%lx",
185		    __func__, (u_long)storagesize);
186	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
187		panic("%s: storage provided for non-fixed", __func__);
188#endif
189
190	extent_pool_init();
191
192	/* Allocate extent descriptor. */
193	if (fixed_extent) {
194		struct extent_fixed *fex;
195
196		memset(storage, 0, storagesize);
197
198		/*
199		 * Align all descriptors on "long" boundaries.
200		 */
201		fex = (struct extent_fixed *)cp;
202		ex = (struct extent *)fex;
203		cp += ALIGN(sizeof(struct extent_fixed));
204		sz -= ALIGN(sizeof(struct extent_fixed));
205		fex->fex_storage = storage;
206		fex->fex_storagesize = storagesize;
207
208		/*
209		 * In a fixed extent, we have to pre-allocate region
210		 * descriptors and place them in the extent's freelist.
211		 */
212		LIST_INIT(&fex->fex_freelist);
213		while (sz >= ALIGN(sizeof(struct extent_region))) {
214			rp = (struct extent_region *)cp;
215			cp += ALIGN(sizeof(struct extent_region));
216			sz -= ALIGN(sizeof(struct extent_region));
217			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
218		}
219	} else {
220		ex = (struct extent *)malloc(sizeof(struct extent),
221		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
222		if (ex == NULL)
223			return (NULL);
224	}
225
226	/* Fill in the extent descriptor and return it to the caller. */
227	LIST_INIT(&ex->ex_regions);
228	ex->ex_name = name;
229	ex->ex_start = start;
230	ex->ex_end = end;
231	ex->ex_mtype = mtype;
232	ex->ex_flags = 0;
233	if (fixed_extent)
234		ex->ex_flags |= EXF_FIXED;
235	if (flags & EX_NOCOALESCE)
236		ex->ex_flags |= EXF_NOCOALESCE;
237
238	if (flags & EX_FILLED) {
239		rp = extent_alloc_region_descriptor(ex, flags);
240		if (rp == NULL) {
241			if (!fixed_extent)
242				free(ex, mtype, sizeof(struct extent));
243			return (NULL);
244		}
245		rp->er_start = start;
246		rp->er_end = end;
247		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
248	}
249
250#if defined(DIAGNOSTIC) || defined(DDB)
251	extent_register(ex);
252#endif
253	return (ex);
254}
255
256/*
257 * Destroy an extent map.
258 */
259void
260extent_destroy(struct extent *ex)
261{
262	struct extent_region *rp, *orp;
263
264#ifdef DIAGNOSTIC
265	/* Check arguments. */
266	if (ex == NULL)
267		panic("%s: NULL extent", __func__);
268#endif
269
270	/* Free all region descriptors in extent. */
271	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
272		orp = rp;
273		rp = LIST_NEXT(rp, er_link);
274		LIST_REMOVE(orp, er_link);
275		extent_free_region_descriptor(ex, orp);
276	}
277
278#if defined(DIAGNOSTIC) || defined(DDB)
279	/* Remove from the list of all extents. */
280	LIST_REMOVE(ex, ex_link);
281#endif
282
283	/* If we're not a fixed extent, free the extent descriptor itself. */
284	if ((ex->ex_flags & EXF_FIXED) == 0)
285		free(ex, ex->ex_mtype, sizeof(*ex));
286}
287
288/*
289 * Insert a region descriptor into the sorted region list after the
290 * entry "after" or at the head of the list (if "after" is NULL).
291 * The region descriptor we insert is passed in "rp".  We must
292 * allocate the region descriptor before calling this function!
293 * If we don't need the region descriptor, it will be freed here.
294 */
295static void
296extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
297    struct extent_region *after, struct extent_region *rp)
298{
299	struct extent_region *nextr;
300	int appended = 0;
301
302	if (after == NULL) {
303		/*
304		 * We're the first in the region list.  If there's
305		 * a region after us, attempt to coalesce to save
306		 * descriptor overhead.
307		 */
308		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
309		    !LIST_EMPTY(&ex->ex_regions) &&
310		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
311			/*
312			 * We can coalesce.  Prepend us to the first region.
313			 */
314			LIST_FIRST(&ex->ex_regions)->er_start = start;
315			extent_free_region_descriptor(ex, rp);
316			return;
317		}
318
319		/*
320		 * Can't coalesce.  Fill in the region descriptor
321		 * in, and insert us at the head of the region list.
322		 */
323		rp->er_start = start;
324		rp->er_end = start + (size - 1);
325		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
326		return;
327	}
328
329	/*
330	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
331	 */
332	if (ex->ex_flags & EXF_NOCOALESCE)
333		goto cant_coalesce;
334
335	/*
336	 * Attempt to coalesce with the region before us.
337	 */
338	if ((after->er_end + 1) == start) {
339		/*
340		 * We can coalesce.  Append ourselves and make
341		 * note of it.
342		 */
343		after->er_end = start + (size - 1);
344		appended = 1;
345	}
346
347	/*
348	 * Attempt to coalesce with the region after us.
349	 */
350	if (LIST_NEXT(after, er_link) != NULL &&
351	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
352		/*
353		 * We can coalesce.  Note that if we appended ourselves
354		 * to the previous region, we exactly fit the gap, and
355		 * can free the "next" region descriptor.
356		 */
357		if (appended) {
358			/*
359			 * Yup, we can free it up.
360			 */
361			after->er_end = LIST_NEXT(after, er_link)->er_end;
362			nextr = LIST_NEXT(after, er_link);
363			LIST_REMOVE(nextr, er_link);
364			extent_free_region_descriptor(ex, nextr);
365		} else {
366			/*
367			 * Nope, just prepend us to the next region.
368			 */
369			LIST_NEXT(after, er_link)->er_start = start;
370		}
371
372		extent_free_region_descriptor(ex, rp);
373		return;
374	}
375
376	/*
377	 * We weren't able to coalesce with the next region, but
378	 * we don't need to allocate a region descriptor if we
379	 * appended ourselves to the previous region.
380	 */
381	if (appended) {
382		extent_free_region_descriptor(ex, rp);
383		return;
384	}
385
386 cant_coalesce:
387
388	/*
389	 * Fill in the region descriptor and insert ourselves
390	 * into the region list.
391	 */
392	rp->er_start = start;
393	rp->er_end = start + (size - 1);
394	LIST_INSERT_AFTER(after, rp, er_link);
395}
396
397/*
398 * Allocate a specific region in an extent map.
399 */
400int
401extent_do_alloc_region(struct extent *ex, u_long start, u_long size,
402    int flags, struct extent_region *myrp)
403{
404	struct extent_region *rp, *last;
405	u_long end = start + (size - 1);
406	int error;
407
408#ifdef DIAGNOSTIC
409	/* Check arguments. */
410	if (ex == NULL)
411		panic("%s: NULL extent", __func__);
412	if (size < 1) {
413		printf("%s: extent `%s', size 0x%lx\n",
414		    __func__, ex->ex_name, size);
415		panic("%s: bad size", __func__);
416	}
417	if (end < start) {
418		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
419		    __func__, ex->ex_name, start, size);
420		panic("%s: overflow", __func__);
421	}
422	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
423		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
424		    "are mutually exclusive", __func__);
425#endif
426
427	/*
428	 * Make sure the requested region lies within the
429	 * extent.
430	 */
431	if ((start < ex->ex_start) || (end > ex->ex_end)) {
432#ifdef DIAGNOSTIC
433		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
434		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
435		printf("%s: start 0x%lx, end 0x%lx\n",
436		    __func__, start, end);
437		panic("%s: region lies outside extent", __func__);
438#else
439		extent_free_region_descriptor(ex, myrp);
440		return (EINVAL);
441#endif
442	}
443
444 alloc_start:
445	/*
446	 * Attempt to place ourselves in the desired area of the
447	 * extent.  We save ourselves some work by keeping the list sorted.
448	 * In other words, if the start of the current region is greater
449	 * than the end of our region, we don't have to search any further.
450	 */
451
452	/*
453	 * Keep a pointer to the last region we looked at so
454	 * that we don't have to traverse the list again when
455	 * we insert ourselves.  If "last" is NULL when we
456	 * finally insert ourselves, we go at the head of the
457	 * list.  See extent_insert_and_optimize() for details.
458	 */
459	last = NULL;
460
461	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
462		if (rp->er_start > end) {
463			/*
464			 * We lie before this region and don't
465			 * conflict.
466			 */
467			break;
468		}
469
470		/*
471		 * The current region begins before we end.
472		 * Check for a conflict.
473		 */
474		if (rp->er_end >= start) {
475			/*
476			 * We conflict.  If we can (and want to) wait,
477			 * do so.
478			 */
479			if (flags & EX_WAITSPACE) {
480				ex->ex_flags |= EXF_WANTED;
481				error = tsleep_nsec(ex,
482				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
483				    "extnt", INFSLP);
484				if (error)
485					return (error);
486				goto alloc_start;
487			}
488
489			/*
490			 * If we tolerate conflicts adjust things such
491			 * that all space in the requested region is
492			 * allocated.
493			 */
494			if (flags & EX_CONFLICTOK) {
495				/*
496				 * There are four possibilities:
497				 *
498				 * 1. The current region overlaps with
499				 *    the start of the requested region.
500				 *    Adjust the requested region to
501				 *    start at the end of the current
502				 *    region and try again.
503				 *
504				 * 2. The current region falls
505				 *    completely within the requested
506				 *    region.  Free the current region
507				 *    and try again.
508				 *
509				 * 3. The current region overlaps with
510				 *    the end of the requested region.
511				 *    Adjust the requested region to
512				 *    end at the start of the current
513				 *    region and try again.
514				 *
515				 * 4. The requested region falls
516				 *    completely within the current
517				 *    region.  We're done.
518				 */
519				if (rp->er_start <= start) {
520					if (rp->er_end < ex->ex_end) {
521						start = rp->er_end + 1;
522						size = end - start + 1;
523						goto alloc_start;
524					}
525				} else if (rp->er_end < end) {
526					LIST_REMOVE(rp, er_link);
527					extent_free_region_descriptor(ex, rp);
528					goto alloc_start;
529				} else if (rp->er_start < end) {
530					if (rp->er_start > ex->ex_start) {
531						end = rp->er_start - 1;
532						size = end - start + 1;
533						goto alloc_start;
534					}
535				}
536				return (0);
537			}
538
539			extent_free_region_descriptor(ex, myrp);
540			return (EAGAIN);
541		}
542		/*
543		 * We don't conflict, but this region lies before
544		 * us.  Keep a pointer to this region, and keep
545		 * trying.
546		 */
547		last = rp;
548	}
549
550	/*
551	 * We don't conflict with any regions.  "last" points
552	 * to the region we fall after, or is NULL if we belong
553	 * at the beginning of the region list.  Insert ourselves.
554	 */
555	extent_insert_and_optimize(ex, start, size, last, myrp);
556	return (0);
557}
558
559int
560extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
561{
562	struct extent_region *rp;
563
564	/*
565	 * Allocate the region descriptor.  It will be freed later
566	 * if we can coalesce with another region.
567	 */
568	rp = extent_alloc_region_descriptor(ex, flags);
569	if (rp == NULL) {
570#ifdef DIAGNOSTIC
571		printf("%s: can't allocate region descriptor\n", __func__);
572#endif
573		return (ENOMEM);
574	}
575
576	return extent_do_alloc_region(ex, start, size, flags, rp);
577}
578
579int
580extent_alloc_region_with_descr(struct extent *ex, u_long start,
581    u_long size, int flags, struct extent_region *rp)
582{
583#ifdef DIAGNOSTIC
584	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
585		panic("%s: EX_NOCOALESCE not set", __func__);
586#endif
587
588	rp->er_flags = ER_DISCARD;
589	return extent_do_alloc_region(ex, start, size, flags, rp);
590}
591
592/*
593 * Macro to check (x + y) <= z.  This check is designed to fail
594 * if an overflow occurs.
595 */
596#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
597
598/*
599 * Allocate a region in an extent map subregion.
600 *
601 * If EX_FAST is specified, we return the first fit in the map.
602 * Otherwise, we try to minimize fragmentation by finding the
603 * smallest gap that will hold the request.
604 *
605 * The allocated region is aligned to "alignment", which must be
606 * a power of 2.
607 */
608int
609extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
610    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
611    struct extent_region *myrp, u_long *result)
612{
613	struct extent_region *rp, *last, *bestlast;
614	u_long newstart, newend, exend, beststart, bestovh, ovh;
615	u_long dontcross;
616	int error;
617
618#ifdef DIAGNOSTIC
619	/* Check arguments. */
620	if (ex == NULL)
621		panic("%s: NULL extent", __func__);
622	if (myrp == NULL)
623		panic("%s: NULL region descriptor", __func__);
624	if (result == NULL)
625		panic("%s: NULL result pointer", __func__);
626	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
627	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
628		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
629		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
630		printf("%s: substart 0x%lx, subend 0x%lx\n",
631		    __func__, substart, subend);
632		panic("%s: bad subregion", __func__);
633	}
634	if ((size < 1) || ((size - 1) > (subend - substart))) {
635		printf("%s: extent `%s', size 0x%lx\n",
636		    __func__, ex->ex_name, size);
637		panic("%s: bad size", __func__);
638	}
639	if (alignment == 0)
640		panic("%s: bad alignment", __func__);
641	if (boundary && (boundary < size)) {
642		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
643		    __func__, ex->ex_name, size, boundary);
644		panic("%s: bad boundary", __func__);
645	}
646#endif
647
648 alloc_start:
649	/*
650	 * Keep a pointer to the last region we looked at so
651	 * that we don't have to traverse the list again when
652	 * we insert ourselves.  If "last" is NULL when we
653	 * finally insert ourselves, we go at the head of the
654	 * list.  See extent_insert_and_optimize() for details.
655	 */
656	last = NULL;
657
658	/*
659	 * Keep track of size and location of the smallest
660	 * chunk we fit in.
661	 *
662	 * Since the extent can be as large as the numeric range
663	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
664	 * best overhead value can be the maximum unsigned integer.
665	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
666	 * into the region list immediately on an exact match (which
667	 * is the only case where "bestovh" would be set to 0).
668	 */
669	bestovh = 0;
670	beststart = 0;
671	bestlast = NULL;
672
673	/*
674	 * Keep track of end of free region.  This is either the end of extent
675	 * or the start of a region past the subend.
676	 */
677	exend = ex->ex_end;
678
679	/*
680	 * For N allocated regions, we must make (N + 1)
681	 * checks for unallocated space.  The first chunk we
682	 * check is the area from the beginning of the subregion
683	 * to the first allocated region after that point.
684	 */
685	newstart = extent_align(substart, alignment, skew);
686	if (newstart < ex->ex_start) {
687#ifdef DIAGNOSTIC
688		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
689		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
690		    alignment);
691		panic("%s: overflow after alignment", __func__);
692#else
693		extent_free_region_descriptor(ex, myrp);
694		return (EINVAL);
695#endif
696	}
697
698	/*
699	 * Find the first allocated region that begins on or after
700	 * the subregion start, advancing the "last" pointer along
701	 * the way.
702	 */
703	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
704		if (rp->er_start >= newstart)
705			break;
706		last = rp;
707	}
708
709	/*
710	 * Relocate the start of our candidate region to the end of
711	 * the last allocated region (if there was one overlapping
712	 * our subrange).
713	 */
714	if (last != NULL && last->er_end >= newstart)
715		newstart = extent_align((last->er_end + 1), alignment, skew);
716
717	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
718		/*
719		 * If the region pasts the subend, bail out and see
720		 * if we fit against the subend.
721		 */
722		if (rp->er_start > subend) {
723			exend = rp->er_start;
724			break;
725		}
726
727		/*
728		 * Check the chunk before "rp".  Note that our
729		 * comparison is safe from overflow conditions.
730		 */
731		if (LE_OV(newstart, size, rp->er_start)) {
732			/*
733			 * Do a boundary check, if necessary.  Note
734			 * that a region may *begin* on the boundary,
735			 * but it must end before the boundary.
736			 */
737			if (boundary) {
738				newend = newstart + (size - 1);
739
740				/*
741				 * Calculate the next boundary after the start
742				 * of this region.
743				 */
744				dontcross = extent_align(newstart+1, boundary,
745				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
746				    - 1;
747
748#if 0
749				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
750				    newstart, newend, ex->ex_start, ex->ex_end,
751				    boundary, dontcross);
752#endif
753
754				/* Check for overflow */
755				if (dontcross < ex->ex_start)
756					dontcross = ex->ex_end;
757				else if (newend > dontcross) {
758					/*
759					 * Candidate region crosses boundary.
760					 * Throw away the leading part and see
761					 * if we still fit.
762					 */
763					newstart = dontcross + 1;
764					newend = newstart + (size - 1);
765					dontcross += boundary;
766					if (!LE_OV(newstart, size, rp->er_start))
767						goto skip;
768				}
769
770				/*
771				 * If we run past the end of
772				 * the extent or the boundary
773				 * overflows, then the request
774				 * can't fit.
775				 */
776				if (newstart + size - 1 > ex->ex_end ||
777				    dontcross < newstart)
778					goto fail;
779			}
780
781			/*
782			 * We would fit into this space.  Calculate
783			 * the overhead (wasted space).  If we exactly
784			 * fit, or we're taking the first fit, insert
785			 * ourselves into the region list.
786			 */
787			ovh = rp->er_start - newstart - size;
788			if ((flags & EX_FAST) || (ovh == 0))
789				goto found;
790
791			/*
792			 * Don't exactly fit, but check to see
793			 * if we're better than any current choice.
794			 */
795			if ((bestovh == 0) || (ovh < bestovh)) {
796				bestovh = ovh;
797				beststart = newstart;
798				bestlast = last;
799			}
800		}
801
802skip:
803		/*
804		 * Skip past the current region and check again.
805		 */
806		newstart = extent_align((rp->er_end + 1), alignment, skew);
807		if (newstart < rp->er_end) {
808			/*
809			 * Overflow condition.  Don't error out, since
810			 * we might have a chunk of space that we can
811			 * use.
812			 */
813			goto fail;
814		}
815
816		last = rp;
817	}
818
819	/*
820	 * The final check is from the current starting point to the
821	 * end of the subregion.  If there were no allocated regions,
822	 * "newstart" is set to the beginning of the subregion, or
823	 * just past the end of the last allocated region, adjusted
824	 * for alignment in either case.
825	 */
826	if (LE_OV(newstart, (size - 1), subend)) {
827		/*
828		 * Do a boundary check, if necessary.  Note
829		 * that a region may *begin* on the boundary,
830		 * but it must end before the boundary.
831		 */
832		if (boundary) {
833			newend = newstart + (size - 1);
834
835			/*
836			 * Calculate the next boundary after the start
837			 * of this region.
838			 */
839			dontcross = extent_align(newstart+1, boundary,
840			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
841			    - 1;
842
843#if 0
844			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
845			    newstart, newend, ex->ex_start, ex->ex_end,
846			    boundary, dontcross);
847#endif
848
849			/* Check for overflow */
850			if (dontcross < ex->ex_start)
851				dontcross = ex->ex_end;
852			else if (newend > dontcross) {
853				/*
854				 * Candidate region crosses boundary.
855				 * Throw away the leading part and see
856				 * if we still fit.
857				 */
858				newstart = dontcross + 1;
859				newend = newstart + (size - 1);
860				dontcross += boundary;
861				if (!LE_OV(newstart, (size - 1), subend))
862					goto fail;
863			}
864
865			/*
866			 * If we run past the end of
867			 * the extent or the boundary
868			 * overflows, then the request
869			 * can't fit.
870			 */
871			if (newstart + size - 1 > ex->ex_end ||
872			    dontcross < newstart)
873				goto fail;
874		}
875
876		/*
877		 * We would fit into this space.  Calculate
878		 * the overhead (wasted space).  If we exactly
879		 * fit, or we're taking the first fit, insert
880		 * ourselves into the region list.
881		 */
882		ovh = exend - newstart - (size - 1);
883		if ((flags & EX_FAST) || (ovh == 0))
884			goto found;
885
886		/*
887		 * Don't exactly fit, but check to see
888		 * if we're better than any current choice.
889		 */
890		if ((bestovh == 0) || (ovh < bestovh)) {
891			bestovh = ovh;
892			beststart = newstart;
893			bestlast = last;
894		}
895	}
896
897 fail:
898	/*
899	 * One of the following two conditions have
900	 * occurred:
901	 *
902	 *	There is no chunk large enough to hold the request.
903	 *
904	 *	If EX_FAST was not specified, there is not an
905	 *	exact match for the request.
906	 *
907	 * Note that if we reach this point and EX_FAST is
908	 * set, then we know there is no space in the extent for
909	 * the request.
910	 */
911	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
912		/*
913		 * We have a match that's "good enough".
914		 */
915		newstart = beststart;
916		last = bestlast;
917		goto found;
918	}
919
920	/*
921	 * No space currently available.  Wait for it to free up,
922	 * if possible.
923	 */
924	if (flags & EX_WAITSPACE) {
925		ex->ex_flags |= EXF_WANTED;
926		error = tsleep_nsec(ex,
927		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
928		    "extnt", INFSLP);
929		if (error)
930			return (error);
931		goto alloc_start;
932	}
933
934	extent_free_region_descriptor(ex, myrp);
935	return (EAGAIN);
936
937 found:
938	/*
939	 * Insert ourselves into the region list.
940	 */
941	extent_insert_and_optimize(ex, newstart, size, last, myrp);
942	*result = newstart;
943	return (0);
944}
945
946int
947extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
948    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
949    u_long *result)
950{
951	struct extent_region *rp;
952
953	/*
954	 * Allocate the region descriptor.  It will be freed later
955	 * if we can coalesce with another region.
956	 */
957	rp = extent_alloc_region_descriptor(ex, flags);
958	if (rp == NULL) {
959#ifdef DIAGNOSTIC
960		printf("%s: can't allocate region descriptor\n", __func__);
961#endif
962		return (ENOMEM);
963	}
964
965	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
966	    boundary, flags, rp, result);
967}
968
969int
970extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
971    u_long subend, u_long size, u_long alignment, u_long skew,
972    u_long boundary, int flags, struct extent_region *rp, u_long *result)
973{
974#ifdef DIAGNOSTIC
975	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
976		panic("%s: EX_NOCOALESCE not set", __func__);
977#endif
978
979	rp->er_flags = ER_DISCARD;
980	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
981	    boundary, flags, rp, result);
982}
983
984int
985extent_free(struct extent *ex, u_long start, u_long size, int flags)
986{
987	struct extent_region *rp, *nrp = NULL;
988	struct extent_region *tmp;
989	u_long end = start + (size - 1);
990	int exflags;
991	int error = 0;
992
993#ifdef DIAGNOSTIC
994	/* Check arguments. */
995	if (ex == NULL)
996		panic("%s: NULL extent", __func__);
997	if ((start < ex->ex_start) || (end > ex->ex_end)) {
998		extent_print(ex);
999		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
1000		    __func__, ex->ex_name, start, size);
1001		panic("%s: extent `%s', region not within extent",
1002		    __func__, ex->ex_name);
1003	}
1004	/* Check for an overflow. */
1005	if (end < start) {
1006		extent_print(ex);
1007		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
1008		    __func__, ex->ex_name, start, size);
1009		panic("%s: overflow", __func__);
1010	}
1011#endif
1012
1013	/*
1014	 * If we're allowing coalescing, we must allocate a region
1015	 * descriptor now, since it might block.
1016	 *
1017	 * XXX Make a static, create-time flags word, so we don't
1018	 * XXX have to lock to read it!
1019	 */
1020	exflags = ex->ex_flags;
1021
1022	if ((exflags & EXF_NOCOALESCE) == 0) {
1023		/* Allocate a region descriptor. */
1024		nrp = extent_alloc_region_descriptor(ex, flags);
1025		if (nrp == NULL)
1026			return (ENOMEM);
1027	}
1028
1029	/*
1030	 * Find region and deallocate.  Several possibilities:
1031	 *
1032	 *	1. (start == er_start) && (end == er_end):
1033	 *	   Free descriptor.
1034	 *
1035	 *	2. (start == er_start) && (end < er_end):
1036	 *	   Adjust er_start.
1037	 *
1038	 *	3. (start > er_start) && (end == er_end):
1039	 *	   Adjust er_end.
1040	 *
1041	 *	4. (start > er_start) && (end < er_end):
1042	 *	   Fragment region.  Requires descriptor alloc.
1043	 *
1044	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1045	 * is not set.
1046	 *
1047	 * If the EX_CONFLICTOK flag is set, partially overlapping
1048	 * regions are allowed.  This is handled in cases 1a, 2a and
1049	 * 3a below.
1050	 */
1051	LIST_FOREACH_SAFE(rp, &ex->ex_regions, er_link, tmp) {
1052		/*
1053		 * Save ourselves some comparisons; does the current
1054		 * region end before chunk to be freed begins?  If so,
1055		 * then we haven't found the appropriate region descriptor.
1056		 */
1057		if (rp->er_end < start)
1058			continue;
1059
1060		/*
1061		 * Save ourselves some traversal; does the current
1062		 * region begin after the chunk to be freed ends?  If so,
1063		 * then we've already passed any possible region descriptors
1064		 * that might have contained the chunk to be freed.
1065		 */
1066		if (rp->er_start > end)
1067			break;
1068
1069		/* Case 1. */
1070		if ((start == rp->er_start) && (end == rp->er_end)) {
1071			LIST_REMOVE(rp, er_link);
1072			extent_free_region_descriptor(ex, rp);
1073			goto done;
1074		}
1075
1076		/*
1077		 * The following cases all require that EXF_NOCOALESCE
1078		 * is not set.
1079		 */
1080		if (ex->ex_flags & EXF_NOCOALESCE)
1081			continue;
1082
1083		/* Case 2. */
1084		if ((start == rp->er_start) && (end < rp->er_end)) {
1085			rp->er_start = (end + 1);
1086			goto done;
1087		}
1088
1089		/* Case 3. */
1090		if ((start > rp->er_start) && (end == rp->er_end)) {
1091			rp->er_end = (start - 1);
1092			goto done;
1093		}
1094
1095		/* Case 4. */
1096		if ((start > rp->er_start) && (end < rp->er_end)) {
1097			/* Fill in new descriptor. */
1098			nrp->er_start = end + 1;
1099			nrp->er_end = rp->er_end;
1100
1101			/* Adjust current descriptor. */
1102			rp->er_end = start - 1;
1103
1104			/* Insert new descriptor after current. */
1105			LIST_INSERT_AFTER(rp, nrp, er_link);
1106
1107			/* We used the new descriptor, so don't free it below */
1108			nrp = NULL;
1109			goto done;
1110		}
1111
1112		if ((flags & EX_CONFLICTOK) == 0)
1113			continue;
1114
1115		/* Case 1a. */
1116		if ((start <= rp->er_start && end >= rp->er_end)) {
1117			LIST_REMOVE(rp, er_link);
1118			extent_free_region_descriptor(ex, rp);
1119			continue;
1120		}
1121
1122		/* Case 2a. */
1123		if ((start <= rp->er_start) && (end >= rp->er_start))
1124			rp->er_start = (end + 1);
1125
1126		/* Case 3a. */
1127		if ((start <= rp->er_end) && (end >= rp->er_end))
1128			rp->er_end = (start - 1);
1129	}
1130
1131	if (flags & EX_CONFLICTOK)
1132		goto done;
1133
1134	/* Region not found, or request otherwise invalid. */
1135#if defined(DIAGNOSTIC) || defined(DDB)
1136	extent_print(ex);
1137#endif
1138	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1139	panic("%s: region not found", __func__);
1140
1141 done:
1142	if (nrp != NULL)
1143		extent_free_region_descriptor(ex, nrp);
1144	if (ex->ex_flags & EXF_WANTED) {
1145		ex->ex_flags &= ~EXF_WANTED;
1146		wakeup(ex);
1147	}
1148	return (error);
1149}
1150
1151static struct extent_region *
1152extent_alloc_region_descriptor(struct extent *ex, int flags)
1153{
1154	struct extent_region *rp;
1155
1156	if (ex->ex_flags & EXF_FIXED) {
1157		struct extent_fixed *fex = (struct extent_fixed *)ex;
1158
1159		while (LIST_EMPTY(&fex->fex_freelist)) {
1160			if (flags & EX_MALLOCOK)
1161				goto alloc;
1162
1163			if ((flags & EX_WAITOK) == 0)
1164				return (NULL);
1165			ex->ex_flags |= EXF_FLWANTED;
1166			if (tsleep_nsec(&fex->fex_freelist,
1167			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1168			    "extnt", INFSLP))
1169				return (NULL);
1170		}
1171		rp = LIST_FIRST(&fex->fex_freelist);
1172		LIST_REMOVE(rp, er_link);
1173
1174		/*
1175		 * Don't muck with flags after pulling it off the
1176		 * freelist; it may be a dynamically allocated
1177		 * region pointer that was kindly given to us,
1178		 * and we need to preserve that information.
1179		 */
1180
1181		return (rp);
1182	}
1183
1184 alloc:
1185	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1186	    PR_NOWAIT);
1187	if (rp != NULL)
1188		rp->er_flags = ER_ALLOC;
1189
1190	return (rp);
1191}
1192
1193static void
1194extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1195{
1196	if (rp->er_flags & ER_DISCARD)
1197		return;
1198
1199	if (ex->ex_flags & EXF_FIXED) {
1200		struct extent_fixed *fex = (struct extent_fixed *)ex;
1201
1202		/*
1203		 * If someone's waiting for a region descriptor,
1204		 * be nice and give them this one, rather than
1205		 * just free'ing it back to the system.
1206		 */
1207		if (rp->er_flags & ER_ALLOC) {
1208			if (ex->ex_flags & EXF_FLWANTED) {
1209				/* Clear all but ER_ALLOC flag. */
1210				rp->er_flags = ER_ALLOC;
1211				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1212				    er_link);
1213				goto wake_em_up;
1214			} else {
1215				pool_put(&ex_region_pl, rp);
1216			}
1217		} else {
1218			/* Clear all flags. */
1219			rp->er_flags = 0;
1220			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1221		}
1222
1223		if (ex->ex_flags & EXF_FLWANTED) {
1224 wake_em_up:
1225			ex->ex_flags &= ~EXF_FLWANTED;
1226			wakeup(&fex->fex_freelist);
1227		}
1228		return;
1229	}
1230
1231	/*
1232	 * We know it's dynamically allocated if we get here.
1233	 */
1234	pool_put(&ex_region_pl, rp);
1235}
1236
1237
1238#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1239
1240void
1241extent_print(struct extent *ex)
1242{
1243	extent_print1(ex, printf);
1244}
1245
1246void
1247extent_print1(struct extent *ex,
1248    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1249{
1250	struct extent_region *rp;
1251
1252	if (ex == NULL)
1253		panic("%s: NULL extent", __func__);
1254
1255#ifdef _KERNEL
1256	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1257	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1258#else
1259	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1260	    ex->ex_start, ex->ex_end, ex->ex_flags);
1261#endif
1262
1263	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1264		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1265}
1266#endif
1267