subr_extent.c revision 1.62
1/*	$OpenBSD: subr_extent.c,v 1.62 2019/09/11 12:30:34 kettenis Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/systm.h>
42#include <sys/queue.h>
43#include <sys/pool.h>
44#include <ddb/db_output.h>
45#else
46/*
47 * user-land definitions, so it can fit into a testing harness.
48 */
49#include <sys/param.h>
50#include <sys/extent.h>
51#include <sys/queue.h>
52#include <errno.h>
53#include <err.h>
54#include <stdlib.h>
55#include <stdio.h>
56#include <string.h>
57
58#define	malloc(s, t, flags)		malloc(s)
59#define	free(p, t, s)			free(p)
60
61#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
62#define	wakeup(chan)			((void)0)
63
64struct pool {
65	size_t pr_size;
66};
67
68#define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
69#define	pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
70#define	pool_put(pp, rp)		free((rp), 0, 0)
71
72#define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
73#endif
74
75#if defined(DIAGNOSTIC) || defined(DDB)
76void	extent_print1(struct extent *, int (*)(const char *, ...)
77	    __attribute__((__format__(__kprintf__,1,2))));
78#endif
79
80static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
81	    struct extent_region *, struct extent_region *);
82static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
83static	void extent_free_region_descriptor(struct extent *,
84	    struct extent_region *);
85int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
86	    u_long, u_long, int, struct extent_region *, u_long *);
87
88/*
89 * Shortcut to align to an arbitrary power-of-two boundary.
90 */
91static __inline__ u_long
92extent_align(u_long start, u_long align, u_long skew)
93{
94	return ((((start - skew) + (align - 1)) & (-align)) + skew);
95}
96
97
98#if defined(DIAGNOSTIC) || defined(DDB)
99/*
100 * Register the extent on a doubly linked list.
101 * Should work, no?
102 */
103static LIST_HEAD(listhead, extent) ext_list;
104static	void extent_register(struct extent *);
105
106static void
107extent_register(struct extent *ex)
108{
109#ifdef DIAGNOSTIC
110	struct extent *ep;
111#endif
112	static int initialized;
113
114	if (!initialized){
115		LIST_INIT(&ext_list);
116		initialized = 1;
117	}
118
119#ifdef DIAGNOSTIC
120	LIST_FOREACH(ep, &ext_list, ex_link) {
121		if (ep == ex)
122			panic("%s: already registered", __func__);
123	}
124#endif
125
126	/* Insert into list */
127	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
128}
129#endif	/* DIAGNOSTIC || DDB */
130
131struct pool ex_region_pl;
132
133static void
134extent_pool_init(void)
135{
136	static int inited;
137
138	if (!inited) {
139		pool_init(&ex_region_pl, sizeof(struct extent_region), 0,
140		    IPL_VM, 0, "extentpl", NULL);
141		inited = 1;
142	}
143}
144
145#ifdef DDB
146/*
147 * Print out all extents registered.  This is used in
148 * DDB show extents
149 */
150void
151extent_print_all(void)
152{
153	struct extent *ep;
154
155	LIST_FOREACH(ep, &ext_list, ex_link) {
156		extent_print1(ep, db_printf);
157	}
158}
159#endif
160
161/*
162 * Allocate and initialize an extent map.
163 */
164struct extent *
165extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
166    size_t storagesize, int flags)
167{
168	struct extent *ex;
169	caddr_t cp = storage;
170	size_t sz = storagesize;
171	struct extent_region *rp;
172	int fixed_extent = (storage != NULL);
173
174#ifdef DIAGNOSTIC
175	/* Check arguments. */
176	if (name == NULL)
177		panic("%s: name == NULL", __func__);
178	if (end < start) {
179		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
180		    __func__, name, start, end);
181		panic("%s: end < start", __func__);
182	}
183	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
184		panic("%s: fixed extent, bad storagesize 0x%lx",
185		    __func__, (u_long)storagesize);
186	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
187		panic("%s: storage provided for non-fixed", __func__);
188#endif
189
190	extent_pool_init();
191
192	/* Allocate extent descriptor. */
193	if (fixed_extent) {
194		struct extent_fixed *fex;
195
196		memset(storage, 0, storagesize);
197
198		/*
199		 * Align all descriptors on "long" boundaries.
200		 */
201		fex = (struct extent_fixed *)cp;
202		ex = (struct extent *)fex;
203		cp += ALIGN(sizeof(struct extent_fixed));
204		sz -= ALIGN(sizeof(struct extent_fixed));
205		fex->fex_storage = storage;
206		fex->fex_storagesize = storagesize;
207
208		/*
209		 * In a fixed extent, we have to pre-allocate region
210		 * descriptors and place them in the extent's freelist.
211		 */
212		LIST_INIT(&fex->fex_freelist);
213		while (sz >= ALIGN(sizeof(struct extent_region))) {
214			rp = (struct extent_region *)cp;
215			cp += ALIGN(sizeof(struct extent_region));
216			sz -= ALIGN(sizeof(struct extent_region));
217			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
218		}
219	} else {
220		ex = (struct extent *)malloc(sizeof(struct extent),
221		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
222		if (ex == NULL)
223			return (NULL);
224	}
225
226	/* Fill in the extent descriptor and return it to the caller. */
227	LIST_INIT(&ex->ex_regions);
228	ex->ex_name = name;
229	ex->ex_start = start;
230	ex->ex_end = end;
231	ex->ex_mtype = mtype;
232	ex->ex_flags = 0;
233	if (fixed_extent)
234		ex->ex_flags |= EXF_FIXED;
235	if (flags & EX_NOCOALESCE)
236		ex->ex_flags |= EXF_NOCOALESCE;
237
238	if (flags & EX_FILLED) {
239		rp = extent_alloc_region_descriptor(ex, flags);
240		if (rp == NULL) {
241			if (!fixed_extent)
242				free(ex, mtype, sizeof(struct extent));
243			return (NULL);
244		}
245		rp->er_start = start;
246		rp->er_end = end;
247		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
248	}
249
250#if defined(DIAGNOSTIC) || defined(DDB)
251	extent_register(ex);
252#endif
253	return (ex);
254}
255
256/*
257 * Destroy an extent map.
258 */
259void
260extent_destroy(struct extent *ex)
261{
262	struct extent_region *rp, *orp;
263
264#ifdef DIAGNOSTIC
265	/* Check arguments. */
266	if (ex == NULL)
267		panic("%s: NULL extent", __func__);
268#endif
269
270	/* Free all region descriptors in extent. */
271	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
272		orp = rp;
273		rp = LIST_NEXT(rp, er_link);
274		LIST_REMOVE(orp, er_link);
275		extent_free_region_descriptor(ex, orp);
276	}
277
278#if defined(DIAGNOSTIC) || defined(DDB)
279	/* Remove from the list of all extents. */
280	LIST_REMOVE(ex, ex_link);
281#endif
282
283	/* If we're not a fixed extent, free the extent descriptor itself. */
284	if ((ex->ex_flags & EXF_FIXED) == 0)
285		free(ex, ex->ex_mtype, sizeof(*ex));
286}
287
288/*
289 * Insert a region descriptor into the sorted region list after the
290 * entry "after" or at the head of the list (if "after" is NULL).
291 * The region descriptor we insert is passed in "rp".  We must
292 * allocate the region descriptor before calling this function!
293 * If we don't need the region descriptor, it will be freed here.
294 */
295static void
296extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
297    struct extent_region *after, struct extent_region *rp)
298{
299	struct extent_region *nextr;
300	int appended = 0;
301
302	if (after == NULL) {
303		/*
304		 * We're the first in the region list.  If there's
305		 * a region after us, attempt to coalesce to save
306		 * descriptor overhead.
307		 */
308		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
309		    !LIST_EMPTY(&ex->ex_regions) &&
310		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
311			/*
312			 * We can coalesce.  Prepend us to the first region.
313			 */
314			LIST_FIRST(&ex->ex_regions)->er_start = start;
315			extent_free_region_descriptor(ex, rp);
316			return;
317		}
318
319		/*
320		 * Can't coalesce.  Fill in the region descriptor
321		 * in, and insert us at the head of the region list.
322		 */
323		rp->er_start = start;
324		rp->er_end = start + (size - 1);
325		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
326		return;
327	}
328
329	/*
330	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
331	 */
332	if (ex->ex_flags & EXF_NOCOALESCE)
333		goto cant_coalesce;
334
335	/*
336	 * Attempt to coalesce with the region before us.
337	 */
338	if ((after->er_end + 1) == start) {
339		/*
340		 * We can coalesce.  Append ourselves and make
341		 * note of it.
342		 */
343		after->er_end = start + (size - 1);
344		appended = 1;
345	}
346
347	/*
348	 * Attempt to coalesce with the region after us.
349	 */
350	if (LIST_NEXT(after, er_link) != NULL &&
351	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
352		/*
353		 * We can coalesce.  Note that if we appended ourselves
354		 * to the previous region, we exactly fit the gap, and
355		 * can free the "next" region descriptor.
356		 */
357		if (appended) {
358			/*
359			 * Yup, we can free it up.
360			 */
361			after->er_end = LIST_NEXT(after, er_link)->er_end;
362			nextr = LIST_NEXT(after, er_link);
363			LIST_REMOVE(nextr, er_link);
364			extent_free_region_descriptor(ex, nextr);
365		} else {
366			/*
367			 * Nope, just prepend us to the next region.
368			 */
369			LIST_NEXT(after, er_link)->er_start = start;
370		}
371
372		extent_free_region_descriptor(ex, rp);
373		return;
374	}
375
376	/*
377	 * We weren't able to coalesce with the next region, but
378	 * we don't need to allocate a region descriptor if we
379	 * appended ourselves to the previous region.
380	 */
381	if (appended) {
382		extent_free_region_descriptor(ex, rp);
383		return;
384	}
385
386 cant_coalesce:
387
388	/*
389	 * Fill in the region descriptor and insert ourselves
390	 * into the region list.
391	 */
392	rp->er_start = start;
393	rp->er_end = start + (size - 1);
394	LIST_INSERT_AFTER(after, rp, er_link);
395}
396
397/*
398 * Allocate a specific region in an extent map.
399 */
400int
401extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
402{
403	struct extent_region *rp, *last, *myrp;
404	u_long end = start + (size - 1);
405	int error;
406
407#ifdef DIAGNOSTIC
408	/* Check arguments. */
409	if (ex == NULL)
410		panic("%s: NULL extent", __func__);
411	if (size < 1) {
412		printf("%s: extent `%s', size 0x%lx\n",
413		    __func__, ex->ex_name, size);
414		panic("%s: bad size", __func__);
415	}
416	if (end < start) {
417		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
418		    __func__, ex->ex_name, start, size);
419		panic("%s: overflow", __func__);
420	}
421	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
422		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
423		    "are mutually exclusive", __func__);
424#endif
425
426	/*
427	 * Make sure the requested region lies within the
428	 * extent.
429	 */
430	if ((start < ex->ex_start) || (end > ex->ex_end)) {
431#ifdef DIAGNOSTIC
432		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
433		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
434		printf("%s: start 0x%lx, end 0x%lx\n",
435		    __func__, start, end);
436		panic("%s: region lies outside extent", __func__);
437#else
438		return (EINVAL);
439#endif
440	}
441
442	/*
443	 * Allocate the region descriptor.  It will be freed later
444	 * if we can coalesce with another region.
445	 */
446	myrp = extent_alloc_region_descriptor(ex, flags);
447	if (myrp == NULL) {
448#ifdef DIAGNOSTIC
449		printf(
450		    "%s: can't allocate region descriptor\n", __func__);
451#endif
452		return (ENOMEM);
453	}
454
455 alloc_start:
456	/*
457	 * Attempt to place ourselves in the desired area of the
458	 * extent.  We save ourselves some work by keeping the list sorted.
459	 * In other words, if the start of the current region is greater
460	 * than the end of our region, we don't have to search any further.
461	 */
462
463	/*
464	 * Keep a pointer to the last region we looked at so
465	 * that we don't have to traverse the list again when
466	 * we insert ourselves.  If "last" is NULL when we
467	 * finally insert ourselves, we go at the head of the
468	 * list.  See extent_insert_and_optimize() for details.
469	 */
470	last = NULL;
471
472	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
473		if (rp->er_start > end) {
474			/*
475			 * We lie before this region and don't
476			 * conflict.
477			 */
478			break;
479		}
480
481		/*
482		 * The current region begins before we end.
483		 * Check for a conflict.
484		 */
485		if (rp->er_end >= start) {
486			/*
487			 * We conflict.  If we can (and want to) wait,
488			 * do so.
489			 */
490			if (flags & EX_WAITSPACE) {
491				ex->ex_flags |= EXF_WANTED;
492				error = tsleep(ex,
493				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
494				    "extnt", 0);
495				if (error)
496					return (error);
497				goto alloc_start;
498			}
499
500			/*
501			 * If we tolerate conflicts adjust things such
502			 * that all space in the requested region is
503			 * allocated.
504			 */
505			if (flags & EX_CONFLICTOK) {
506				/*
507				 * There are four possibilities:
508				 *
509				 * 1. The current region overlaps with
510				 *    the start of the requested region.
511				 *    Adjust the requested region to
512				 *    start at the end of the current
513				 *    region and try again.
514				 *
515				 * 2. The current region falls
516				 *    completely within the requested
517				 *    region.  Free the current region
518				 *    and try again.
519				 *
520				 * 3. The current region overlaps with
521				 *    the end of the requested region.
522				 *    Adjust the requested region to
523				 *    end at the start of the current
524				 *    region and try again.
525				 *
526				 * 4. The requested region falls
527				 *    completely within the current
528				 *    region.  We're done.
529				 */
530				if (rp->er_start <= start) {
531					if (rp->er_end < ex->ex_end) {
532						start = rp->er_end + 1;
533						size = end - start + 1;
534						goto alloc_start;
535					}
536				} else if (rp->er_end < end) {
537					LIST_REMOVE(rp, er_link);
538					extent_free_region_descriptor(ex, rp);
539					goto alloc_start;
540				} else if (rp->er_start < end) {
541					if (rp->er_start > ex->ex_start) {
542						end = rp->er_start - 1;
543						size = end - start + 1;
544						goto alloc_start;
545					}
546				}
547				return (0);
548			}
549
550			extent_free_region_descriptor(ex, myrp);
551			return (EAGAIN);
552		}
553		/*
554		 * We don't conflict, but this region lies before
555		 * us.  Keep a pointer to this region, and keep
556		 * trying.
557		 */
558		last = rp;
559	}
560
561	/*
562	 * We don't conflict with any regions.  "last" points
563	 * to the region we fall after, or is NULL if we belong
564	 * at the beginning of the region list.  Insert ourselves.
565	 */
566	extent_insert_and_optimize(ex, start, size, last, myrp);
567	return (0);
568}
569
570/*
571 * Macro to check (x + y) <= z.  This check is designed to fail
572 * if an overflow occurs.
573 */
574#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
575
576/*
577 * Allocate a region in an extent map subregion.
578 *
579 * If EX_FAST is specified, we return the first fit in the map.
580 * Otherwise, we try to minimize fragmentation by finding the
581 * smallest gap that will hold the request.
582 *
583 * The allocated region is aligned to "alignment", which must be
584 * a power of 2.
585 */
586int
587extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
588    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
589    struct extent_region *myrp, u_long *result)
590{
591	struct extent_region *rp, *last, *bestlast;
592	u_long newstart, newend, exend, beststart, bestovh, ovh;
593	u_long dontcross;
594	int error;
595
596#ifdef DIAGNOSTIC
597	/* Check arguments. */
598	if (ex == NULL)
599		panic("%s: NULL extent", __func__);
600	if (myrp == NULL)
601		panic("%s: NULL region descriptor", __func__);
602	if (result == NULL)
603		panic("%s: NULL result pointer", __func__);
604	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
605	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
606		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
607		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
608		printf("%s: substart 0x%lx, subend 0x%lx\n",
609		    __func__, substart, subend);
610		panic("%s: bad subregion", __func__);
611	}
612	if ((size < 1) || ((size - 1) > (subend - substart))) {
613		printf("%s: extent `%s', size 0x%lx\n",
614		    __func__, ex->ex_name, size);
615		panic("%s: bad size", __func__);
616	}
617	if (alignment == 0)
618		panic("%s: bad alignment", __func__);
619	if (boundary && (boundary < size)) {
620		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
621		    __func__, ex->ex_name, size, boundary);
622		panic("%s: bad boundary", __func__);
623	}
624#endif
625
626 alloc_start:
627	/*
628	 * Keep a pointer to the last region we looked at so
629	 * that we don't have to traverse the list again when
630	 * we insert ourselves.  If "last" is NULL when we
631	 * finally insert ourselves, we go at the head of the
632	 * list.  See extent_insert_and_optimize() for details.
633	 */
634	last = NULL;
635
636	/*
637	 * Keep track of size and location of the smallest
638	 * chunk we fit in.
639	 *
640	 * Since the extent can be as large as the numeric range
641	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
642	 * best overhead value can be the maximum unsigned integer.
643	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
644	 * into the region list immediately on an exact match (which
645	 * is the only case where "bestovh" would be set to 0).
646	 */
647	bestovh = 0;
648	beststart = 0;
649	bestlast = NULL;
650
651	/*
652	 * Keep track of end of free region.  This is either the end of extent
653	 * or the start of a region past the subend.
654	 */
655	exend = ex->ex_end;
656
657	/*
658	 * For N allocated regions, we must make (N + 1)
659	 * checks for unallocated space.  The first chunk we
660	 * check is the area from the beginning of the subregion
661	 * to the first allocated region after that point.
662	 */
663	newstart = extent_align(substart, alignment, skew);
664	if (newstart < ex->ex_start) {
665#ifdef DIAGNOSTIC
666		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
667		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
668		    alignment);
669		panic("%s: overflow after alignment", __func__);
670#else
671		extent_free_region_descriptor(ex, myrp);
672		return (EINVAL);
673#endif
674	}
675
676	/*
677	 * Find the first allocated region that begins on or after
678	 * the subregion start, advancing the "last" pointer along
679	 * the way.
680	 */
681	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
682		if (rp->er_start >= newstart)
683			break;
684		last = rp;
685	}
686
687	/*
688	 * Relocate the start of our candidate region to the end of
689	 * the last allocated region (if there was one overlapping
690	 * our subrange).
691	 */
692	if (last != NULL && last->er_end >= newstart)
693		newstart = extent_align((last->er_end + 1), alignment, skew);
694
695	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
696		/*
697		 * If the region pasts the subend, bail out and see
698		 * if we fit against the subend.
699		 */
700		if (rp->er_start > subend) {
701			exend = rp->er_start;
702			break;
703		}
704
705		/*
706		 * Check the chunk before "rp".  Note that our
707		 * comparison is safe from overflow conditions.
708		 */
709		if (LE_OV(newstart, size, rp->er_start)) {
710			/*
711			 * Do a boundary check, if necessary.  Note
712			 * that a region may *begin* on the boundary,
713			 * but it must end before the boundary.
714			 */
715			if (boundary) {
716				newend = newstart + (size - 1);
717
718				/*
719				 * Calculate the next boundary after the start
720				 * of this region.
721				 */
722				dontcross = extent_align(newstart+1, boundary,
723				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
724				    - 1;
725
726#if 0
727				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
728				    newstart, newend, ex->ex_start, ex->ex_end,
729				    boundary, dontcross);
730#endif
731
732				/* Check for overflow */
733				if (dontcross < ex->ex_start)
734					dontcross = ex->ex_end;
735				else if (newend > dontcross) {
736					/*
737					 * Candidate region crosses boundary.
738					 * Throw away the leading part and see
739					 * if we still fit.
740					 */
741					newstart = dontcross + 1;
742					newend = newstart + (size - 1);
743					dontcross += boundary;
744					if (!LE_OV(newstart, size, rp->er_start))
745						goto skip;
746				}
747
748				/*
749				 * If we run past the end of
750				 * the extent or the boundary
751				 * overflows, then the request
752				 * can't fit.
753				 */
754				if (newstart + size - 1 > ex->ex_end ||
755				    dontcross < newstart)
756					goto fail;
757			}
758
759			/*
760			 * We would fit into this space.  Calculate
761			 * the overhead (wasted space).  If we exactly
762			 * fit, or we're taking the first fit, insert
763			 * ourselves into the region list.
764			 */
765			ovh = rp->er_start - newstart - size;
766			if ((flags & EX_FAST) || (ovh == 0))
767				goto found;
768
769			/*
770			 * Don't exactly fit, but check to see
771			 * if we're better than any current choice.
772			 */
773			if ((bestovh == 0) || (ovh < bestovh)) {
774				bestovh = ovh;
775				beststart = newstart;
776				bestlast = last;
777			}
778		}
779
780skip:
781		/*
782		 * Skip past the current region and check again.
783		 */
784		newstart = extent_align((rp->er_end + 1), alignment, skew);
785		if (newstart < rp->er_end) {
786			/*
787			 * Overflow condition.  Don't error out, since
788			 * we might have a chunk of space that we can
789			 * use.
790			 */
791			goto fail;
792		}
793
794		last = rp;
795	}
796
797	/*
798	 * The final check is from the current starting point to the
799	 * end of the subregion.  If there were no allocated regions,
800	 * "newstart" is set to the beginning of the subregion, or
801	 * just past the end of the last allocated region, adjusted
802	 * for alignment in either case.
803	 */
804	if (LE_OV(newstart, (size - 1), subend)) {
805		/*
806		 * Do a boundary check, if necessary.  Note
807		 * that a region may *begin* on the boundary,
808		 * but it must end before the boundary.
809		 */
810		if (boundary) {
811			newend = newstart + (size - 1);
812
813			/*
814			 * Calculate the next boundary after the start
815			 * of this region.
816			 */
817			dontcross = extent_align(newstart+1, boundary,
818			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
819			    - 1;
820
821#if 0
822			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
823			    newstart, newend, ex->ex_start, ex->ex_end,
824			    boundary, dontcross);
825#endif
826
827			/* Check for overflow */
828			if (dontcross < ex->ex_start)
829				dontcross = ex->ex_end;
830			else if (newend > dontcross) {
831				/*
832				 * Candidate region crosses boundary.
833				 * Throw away the leading part and see
834				 * if we still fit.
835				 */
836				newstart = dontcross + 1;
837				newend = newstart + (size - 1);
838				dontcross += boundary;
839				if (!LE_OV(newstart, (size - 1), subend))
840					goto fail;
841			}
842
843			/*
844			 * If we run past the end of
845			 * the extent or the boundary
846			 * overflows, then the request
847			 * can't fit.
848			 */
849			if (newstart + size - 1 > ex->ex_end ||
850			    dontcross < newstart)
851				goto fail;
852		}
853
854		/*
855		 * We would fit into this space.  Calculate
856		 * the overhead (wasted space).  If we exactly
857		 * fit, or we're taking the first fit, insert
858		 * ourselves into the region list.
859		 */
860		ovh = exend - newstart - (size - 1);
861		if ((flags & EX_FAST) || (ovh == 0))
862			goto found;
863
864		/*
865		 * Don't exactly fit, but check to see
866		 * if we're better than any current choice.
867		 */
868		if ((bestovh == 0) || (ovh < bestovh)) {
869			bestovh = ovh;
870			beststart = newstart;
871			bestlast = last;
872		}
873	}
874
875 fail:
876	/*
877	 * One of the following two conditions have
878	 * occurred:
879	 *
880	 *	There is no chunk large enough to hold the request.
881	 *
882	 *	If EX_FAST was not specified, there is not an
883	 *	exact match for the request.
884	 *
885	 * Note that if we reach this point and EX_FAST is
886	 * set, then we know there is no space in the extent for
887	 * the request.
888	 */
889	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
890		/*
891		 * We have a match that's "good enough".
892		 */
893		newstart = beststart;
894		last = bestlast;
895		goto found;
896	}
897
898	/*
899	 * No space currently available.  Wait for it to free up,
900	 * if possible.
901	 */
902	if (flags & EX_WAITSPACE) {
903		ex->ex_flags |= EXF_WANTED;
904		error = tsleep(ex,
905		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
906		if (error)
907			return (error);
908		goto alloc_start;
909	}
910
911	extent_free_region_descriptor(ex, myrp);
912	return (EAGAIN);
913
914 found:
915	/*
916	 * Insert ourselves into the region list.
917	 */
918	extent_insert_and_optimize(ex, newstart, size, last, myrp);
919	*result = newstart;
920	return (0);
921}
922
923int
924extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
925    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
926    u_long *result)
927{
928	struct extent_region *rp;
929
930	/*
931	 * Allocate the region descriptor.  It will be freed later
932	 * if we can coalesce with another region.
933	 */
934	rp = extent_alloc_region_descriptor(ex, flags);
935	if (rp == NULL) {
936#ifdef DIAGNOSTIC
937		printf("%s: can't allocate region descriptor\n", __func__);
938#endif
939		return (ENOMEM);
940	}
941
942	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
943	    boundary, flags, rp, result);
944}
945
946int
947extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
948    u_long subend, u_long size, u_long alignment, u_long skew,
949    u_long boundary, int flags, struct extent_region *rp, u_long *result)
950{
951#ifdef DIAGNOSTIC
952	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
953		panic("%s: EX_NOCOALESCE not set", __func__);
954#endif
955
956	rp->er_flags = ER_DISCARD;
957	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
958	    boundary, flags, rp, result);
959}
960
961int
962extent_free(struct extent *ex, u_long start, u_long size, int flags)
963{
964	struct extent_region *rp, *nrp = NULL;
965	struct extent_region *tmp;
966	u_long end = start + (size - 1);
967	int exflags;
968	int error = 0;
969
970#ifdef DIAGNOSTIC
971	/* Check arguments. */
972	if (ex == NULL)
973		panic("%s: NULL extent", __func__);
974	if ((start < ex->ex_start) || (end > ex->ex_end)) {
975		extent_print(ex);
976		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
977		    __func__, ex->ex_name, start, size);
978		panic("%s: extent `%s', region not within extent",
979		    __func__, ex->ex_name);
980	}
981	/* Check for an overflow. */
982	if (end < start) {
983		extent_print(ex);
984		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
985		    __func__, ex->ex_name, start, size);
986		panic("%s: overflow", __func__);
987	}
988#endif
989
990	/*
991	 * If we're allowing coalescing, we must allocate a region
992	 * descriptor now, since it might block.
993	 *
994	 * XXX Make a static, create-time flags word, so we don't
995	 * XXX have to lock to read it!
996	 */
997	exflags = ex->ex_flags;
998
999	if ((exflags & EXF_NOCOALESCE) == 0) {
1000		/* Allocate a region descriptor. */
1001		nrp = extent_alloc_region_descriptor(ex, flags);
1002		if (nrp == NULL)
1003			return (ENOMEM);
1004	}
1005
1006	/*
1007	 * Find region and deallocate.  Several possibilities:
1008	 *
1009	 *	1. (start == er_start) && (end == er_end):
1010	 *	   Free descriptor.
1011	 *
1012	 *	2. (start == er_start) && (end < er_end):
1013	 *	   Adjust er_start.
1014	 *
1015	 *	3. (start > er_start) && (end == er_end):
1016	 *	   Adjust er_end.
1017	 *
1018	 *	4. (start > er_start) && (end < er_end):
1019	 *	   Fragment region.  Requires descriptor alloc.
1020	 *
1021	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1022	 * is not set.
1023	 *
1024	 * If the EX_CONFLICTOK flag is set, partially overlapping
1025	 * regions are allowed.  This is handled in cases 1a, 2a and
1026	 * 3a below.
1027	 */
1028	LIST_FOREACH_SAFE(rp, &ex->ex_regions, er_link, tmp) {
1029		/*
1030		 * Save ourselves some comparisons; does the current
1031		 * region end before chunk to be freed begins?  If so,
1032		 * then we haven't found the appropriate region descriptor.
1033		 */
1034		if (rp->er_end < start)
1035			continue;
1036
1037		/*
1038		 * Save ourselves some traversal; does the current
1039		 * region begin after the chunk to be freed ends?  If so,
1040		 * then we've already passed any possible region descriptors
1041		 * that might have contained the chunk to be freed.
1042		 */
1043		if (rp->er_start > end)
1044			break;
1045
1046		/* Case 1. */
1047		if ((start == rp->er_start) && (end == rp->er_end)) {
1048			LIST_REMOVE(rp, er_link);
1049			extent_free_region_descriptor(ex, rp);
1050			goto done;
1051		}
1052
1053		/*
1054		 * The following cases all require that EXF_NOCOALESCE
1055		 * is not set.
1056		 */
1057		if (ex->ex_flags & EXF_NOCOALESCE)
1058			continue;
1059
1060		/* Case 2. */
1061		if ((start == rp->er_start) && (end < rp->er_end)) {
1062			rp->er_start = (end + 1);
1063			goto done;
1064		}
1065
1066		/* Case 3. */
1067		if ((start > rp->er_start) && (end == rp->er_end)) {
1068			rp->er_end = (start - 1);
1069			goto done;
1070		}
1071
1072		/* Case 4. */
1073		if ((start > rp->er_start) && (end < rp->er_end)) {
1074			/* Fill in new descriptor. */
1075			nrp->er_start = end + 1;
1076			nrp->er_end = rp->er_end;
1077
1078			/* Adjust current descriptor. */
1079			rp->er_end = start - 1;
1080
1081			/* Insert new descriptor after current. */
1082			LIST_INSERT_AFTER(rp, nrp, er_link);
1083
1084			/* We used the new descriptor, so don't free it below */
1085			nrp = NULL;
1086			goto done;
1087		}
1088
1089		if ((flags & EX_CONFLICTOK) == 0)
1090			continue;
1091
1092		/* Case 1a. */
1093		if ((start <= rp->er_start && end >= rp->er_end)) {
1094			LIST_REMOVE(rp, er_link);
1095			extent_free_region_descriptor(ex, rp);
1096			continue;
1097		}
1098
1099		/* Case 2a. */
1100		if ((start <= rp->er_start) && (end >= rp->er_start))
1101			rp->er_start = (end + 1);
1102
1103		/* Case 3a. */
1104		if ((start <= rp->er_end) && (end >= rp->er_end))
1105			rp->er_end = (start - 1);
1106	}
1107
1108	if (flags & EX_CONFLICTOK)
1109		goto done;
1110
1111	/* Region not found, or request otherwise invalid. */
1112#if defined(DIAGNOSTIC) || defined(DDB)
1113	extent_print(ex);
1114#endif
1115	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1116	panic("%s: region not found", __func__);
1117
1118 done:
1119	if (nrp != NULL)
1120		extent_free_region_descriptor(ex, nrp);
1121	if (ex->ex_flags & EXF_WANTED) {
1122		ex->ex_flags &= ~EXF_WANTED;
1123		wakeup(ex);
1124	}
1125	return (error);
1126}
1127
1128static struct extent_region *
1129extent_alloc_region_descriptor(struct extent *ex, int flags)
1130{
1131	struct extent_region *rp;
1132
1133	if (ex->ex_flags & EXF_FIXED) {
1134		struct extent_fixed *fex = (struct extent_fixed *)ex;
1135
1136		while (LIST_EMPTY(&fex->fex_freelist)) {
1137			if (flags & EX_MALLOCOK)
1138				goto alloc;
1139
1140			if ((flags & EX_WAITOK) == 0)
1141				return (NULL);
1142			ex->ex_flags |= EXF_FLWANTED;
1143			if (tsleep(&fex->fex_freelist,
1144			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1145			    "extnt", 0))
1146				return (NULL);
1147		}
1148		rp = LIST_FIRST(&fex->fex_freelist);
1149		LIST_REMOVE(rp, er_link);
1150
1151		/*
1152		 * Don't muck with flags after pulling it off the
1153		 * freelist; it may be a dynamically allocated
1154		 * region pointer that was kindly given to us,
1155		 * and we need to preserve that information.
1156		 */
1157
1158		return (rp);
1159	}
1160
1161 alloc:
1162	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1163	    PR_NOWAIT);
1164	if (rp != NULL)
1165		rp->er_flags = ER_ALLOC;
1166
1167	return (rp);
1168}
1169
1170static void
1171extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1172{
1173	if (rp->er_flags & ER_DISCARD)
1174		return;
1175
1176	if (ex->ex_flags & EXF_FIXED) {
1177		struct extent_fixed *fex = (struct extent_fixed *)ex;
1178
1179		/*
1180		 * If someone's waiting for a region descriptor,
1181		 * be nice and give them this one, rather than
1182		 * just free'ing it back to the system.
1183		 */
1184		if (rp->er_flags & ER_ALLOC) {
1185			if (ex->ex_flags & EXF_FLWANTED) {
1186				/* Clear all but ER_ALLOC flag. */
1187				rp->er_flags = ER_ALLOC;
1188				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1189				    er_link);
1190				goto wake_em_up;
1191			} else {
1192				pool_put(&ex_region_pl, rp);
1193			}
1194		} else {
1195			/* Clear all flags. */
1196			rp->er_flags = 0;
1197			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1198		}
1199
1200		if (ex->ex_flags & EXF_FLWANTED) {
1201 wake_em_up:
1202			ex->ex_flags &= ~EXF_FLWANTED;
1203			wakeup(&fex->fex_freelist);
1204		}
1205		return;
1206	}
1207
1208	/*
1209	 * We know it's dynamically allocated if we get here.
1210	 */
1211	pool_put(&ex_region_pl, rp);
1212}
1213
1214
1215#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1216
1217void
1218extent_print(struct extent *ex)
1219{
1220	extent_print1(ex, printf);
1221}
1222
1223void
1224extent_print1(struct extent *ex,
1225    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1226{
1227	struct extent_region *rp;
1228
1229	if (ex == NULL)
1230		panic("%s: NULL extent", __func__);
1231
1232#ifdef _KERNEL
1233	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1234	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1235#else
1236	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1237	    ex->ex_start, ex->ex_end, ex->ex_flags);
1238#endif
1239
1240	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1241		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1242}
1243#endif
1244