subr_extent.c revision 1.37
1/*	$OpenBSD: subr_extent.c,v 1.37 2009/04/19 15:26:52 kettenis Exp $	*/
2/*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Matthias Drochner.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * General purpose extent manager.
35 */
36
37#ifdef _KERNEL
38#include <sys/param.h>
39#include <sys/extent.h>
40#include <sys/malloc.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/pool.h>
46#include <ddb/db_output.h>
47#else
48/*
49 * user-land definitions, so it can fit into a testing harness.
50 */
51#include <sys/param.h>
52#include <sys/pool.h>
53#include <sys/extent.h>
54#include <sys/queue.h>
55#include <errno.h>
56#include <stdlib.h>
57#include <stdio.h>
58
59#define	malloc(s, t, flags)		malloc(s)
60#define	free(p, t)			free(p)
61#define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
62#define	wakeup(chan)			((void)0)
63#define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
64#define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
65#define	pool_put(pool, rp)		free((rp), 0)
66#define	panic				printf
67#define	splvm()				(1)
68#define	splx(s)				((void)(s))
69#endif
70
71#if defined(DIAGNOSTIC) || defined(DDB)
72void	extent_print1(struct extent *, int (*)(const char *, ...));
73#endif
74
75static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
76	    struct extent_region *, struct extent_region *);
77static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
78static	void extent_free_region_descriptor(struct extent *,
79	    struct extent_region *);
80
81/*
82 * Shortcut to align to an arbitrary power-of-two boundary.
83 */
84static __inline__ u_long
85extent_align(u_long start, u_long align, u_long skew)
86{
87	return ((((start - skew) + (align - 1)) & (-align)) + skew);
88}
89
90
91#if defined(DIAGNOSTIC) || defined(DDB)
92/*
93 * Register the extent on a doubly linked list.
94 * Should work, no?
95 */
96static LIST_HEAD(listhead, extent) ext_list;
97static	void extent_register(struct extent *);
98
99static void
100extent_register(struct extent *ex)
101{
102#ifdef DIAGNOSTIC
103	struct extent *ep;
104#endif
105	static int initialized;
106
107	if (!initialized){
108		LIST_INIT(&ext_list);
109		initialized = 1;
110	}
111
112#ifdef DIAGNOSTIC
113	LIST_FOREACH(ep, &ext_list, ex_link) {
114		if (ep == ex)
115			panic("extent_register: already registered");
116	}
117#endif
118
119	/* Insert into list */
120	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
121}
122#endif	/* DIAGNOSTIC || DDB */
123
124struct pool ex_region_pl;
125
126static void
127extent_pool_init(void)
128{
129	static int inited;
130
131	if (!inited) {
132		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
133		    "extentpl", NULL);
134		inited = 1;
135	}
136}
137
138#ifdef DDB
139/*
140 * Print out all extents registered.  This is used in
141 * DDB show extents
142 */
143void
144extent_print_all(void)
145{
146	struct extent *ep;
147
148	LIST_FOREACH(ep, &ext_list, ex_link) {
149		extent_print1(ep, db_printf);
150	}
151}
152#endif
153
154/*
155 * Allocate and initialize an extent map.
156 */
157struct extent *
158extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
159    size_t storagesize, int flags)
160{
161	struct extent *ex;
162	caddr_t cp = storage;
163	size_t sz = storagesize;
164	struct extent_region *rp;
165	int fixed_extent = (storage != NULL);
166
167#ifdef DIAGNOSTIC
168	/* Check arguments. */
169	if (name == NULL)
170		panic("extent_create: name == NULL");
171	if (end < start) {
172		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
173		    name, start, end);
174		panic("extent_create: end < start");
175	}
176	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
177		panic("extent_create: fixed extent, bad storagesize 0x%lx",
178		    (u_long)storagesize);
179	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
180		panic("extent_create: storage provided for non-fixed");
181#endif
182
183	extent_pool_init();
184
185	/* Allocate extent descriptor. */
186	if (fixed_extent) {
187		struct extent_fixed *fex;
188
189		bzero(storage, storagesize);
190
191		/*
192		 * Align all descriptors on "long" boundaries.
193		 */
194		fex = (struct extent_fixed *)cp;
195		ex = (struct extent *)fex;
196		cp += ALIGN(sizeof(struct extent_fixed));
197		sz -= ALIGN(sizeof(struct extent_fixed));
198		fex->fex_storage = storage;
199		fex->fex_storagesize = storagesize;
200
201		/*
202		 * In a fixed extent, we have to pre-allocate region
203		 * descriptors and place them in the extent's freelist.
204		 */
205		LIST_INIT(&fex->fex_freelist);
206		while (sz >= ALIGN(sizeof(struct extent_region))) {
207			rp = (struct extent_region *)cp;
208			cp += ALIGN(sizeof(struct extent_region));
209			sz -= ALIGN(sizeof(struct extent_region));
210			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
211		}
212	} else {
213		ex = (struct extent *)malloc(sizeof(struct extent),
214		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
215		if (ex == NULL)
216			return (NULL);
217	}
218
219	/* Fill in the extent descriptor and return it to the caller. */
220	LIST_INIT(&ex->ex_regions);
221	ex->ex_name = name;
222	ex->ex_start = start;
223	ex->ex_end = end;
224	ex->ex_mtype = mtype;
225	ex->ex_flags = 0;
226	if (fixed_extent)
227		ex->ex_flags |= EXF_FIXED;
228	if (flags & EX_NOCOALESCE)
229		ex->ex_flags |= EXF_NOCOALESCE;
230
231	if (flags & EX_FILLED) {
232		rp = extent_alloc_region_descriptor(ex, flags);
233		if (rp == NULL) {
234			if (!fixed_extent)
235				free(ex, mtype);
236			return (NULL);
237		}
238		rp->er_start = start;
239		rp->er_end = end;
240		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
241	}
242
243#if defined(DIAGNOSTIC) || defined(DDB)
244	extent_register(ex);
245#endif
246	return (ex);
247}
248
249/*
250 * Destroy an extent map.
251 */
252void
253extent_destroy(struct extent *ex)
254{
255	struct extent_region *rp, *orp;
256
257#ifdef DIAGNOSTIC
258	/* Check arguments. */
259	if (ex == NULL)
260		panic("extent_destroy: NULL extent");
261#endif
262
263	/* Free all region descriptors in extent. */
264	for (rp = LIST_FIRST(&ex->ex_regions);
265	    rp != LIST_END(&ex->ex_regions); ) {
266		orp = rp;
267		rp = LIST_NEXT(rp, er_link);
268		LIST_REMOVE(orp, er_link);
269		extent_free_region_descriptor(ex, orp);
270	}
271
272#if defined(DIAGNOSTIC) || defined(DDB)
273	/* Remove from the list of all extents. */
274	LIST_REMOVE(ex, ex_link);
275#endif
276
277	/* If we're not a fixed extent, free the extent descriptor itself. */
278	if ((ex->ex_flags & EXF_FIXED) == 0)
279		free(ex, ex->ex_mtype);
280}
281
282/*
283 * Insert a region descriptor into the sorted region list after the
284 * entry "after" or at the head of the list (if "after" is NULL).
285 * The region descriptor we insert is passed in "rp".  We must
286 * allocate the region descriptor before calling this function!
287 * If we don't need the region descriptor, it will be freed here.
288 */
289static void
290extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
291    struct extent_region *after, struct extent_region *rp)
292{
293	struct extent_region *nextr;
294	int appended = 0;
295
296	if (after == NULL) {
297		/*
298		 * We're the first in the region list.  If there's
299		 * a region after us, attempt to coalesce to save
300		 * descriptor overhead.
301		 */
302		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
303		    !LIST_EMPTY(&ex->ex_regions) &&
304		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
305			/*
306			 * We can coalesce.  Prepend us to the first region.
307			 */
308			LIST_FIRST(&ex->ex_regions)->er_start = start;
309			extent_free_region_descriptor(ex, rp);
310			return;
311		}
312
313		/*
314		 * Can't coalesce.  Fill in the region descriptor
315		 * in, and insert us at the head of the region list.
316		 */
317		rp->er_start = start;
318		rp->er_end = start + (size - 1);
319		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
320		return;
321	}
322
323	/*
324	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
325	 */
326	if (ex->ex_flags & EXF_NOCOALESCE)
327		goto cant_coalesce;
328
329	/*
330	 * Attempt to coalesce with the region before us.
331	 */
332	if ((after->er_end + 1) == start) {
333		/*
334		 * We can coalesce.  Append ourselves and make
335		 * note of it.
336		 */
337		after->er_end = start + (size - 1);
338		appended = 1;
339	}
340
341	/*
342	 * Attempt to coalesce with the region after us.
343	 */
344	if (LIST_NEXT(after, er_link) != NULL &&
345	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
346		/*
347		 * We can coalesce.  Note that if we appended ourselves
348		 * to the previous region, we exactly fit the gap, and
349		 * can free the "next" region descriptor.
350		 */
351		if (appended) {
352			/*
353			 * Yup, we can free it up.
354			 */
355			after->er_end = LIST_NEXT(after, er_link)->er_end;
356			nextr = LIST_NEXT(after, er_link);
357			LIST_REMOVE(nextr, er_link);
358			extent_free_region_descriptor(ex, nextr);
359		} else {
360			/*
361			 * Nope, just prepend us to the next region.
362			 */
363			LIST_NEXT(after, er_link)->er_start = start;
364		}
365
366		extent_free_region_descriptor(ex, rp);
367		return;
368	}
369
370	/*
371	 * We weren't able to coalesce with the next region, but
372	 * we don't need to allocate a region descriptor if we
373	 * appended ourselves to the previous region.
374	 */
375	if (appended) {
376		extent_free_region_descriptor(ex, rp);
377		return;
378	}
379
380 cant_coalesce:
381
382	/*
383	 * Fill in the region descriptor and insert ourselves
384	 * into the region list.
385	 */
386	rp->er_start = start;
387	rp->er_end = start + (size - 1);
388	LIST_INSERT_AFTER(after, rp, er_link);
389}
390
391/*
392 * Allocate a specific region in an extent map.
393 */
394int
395extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
396{
397	struct extent_region *rp, *last, *myrp;
398	u_long end = start + (size - 1);
399	int error;
400
401#ifdef DIAGNOSTIC
402	/* Check arguments. */
403	if (ex == NULL)
404		panic("extent_alloc_region: NULL extent");
405	if (size < 1) {
406		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
407		    ex->ex_name, size);
408		panic("extent_alloc_region: bad size");
409	}
410	if (end < start) {
411		printf(
412		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
413		 ex->ex_name, start, size);
414		panic("extent_alloc_region: overflow");
415	}
416	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
417		panic("extent_alloc_region: EX_CONFLICTOK and EX_WAITSPACE "
418		    "are mutually exclusive");
419#endif
420
421	/*
422	 * Make sure the requested region lies within the
423	 * extent.
424	 */
425	if ((start < ex->ex_start) || (end > ex->ex_end)) {
426#ifdef DIAGNOSTIC
427		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
428		    ex->ex_name, ex->ex_start, ex->ex_end);
429		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
430		    start, end);
431		panic("extent_alloc_region: region lies outside extent");
432#else
433		return (EINVAL);
434#endif
435	}
436
437	/*
438	 * Allocate the region descriptor.  It will be freed later
439	 * if we can coalesce with another region.
440	 */
441	myrp = extent_alloc_region_descriptor(ex, flags);
442	if (myrp == NULL) {
443#ifdef DIAGNOSTIC
444		printf(
445		    "extent_alloc_region: can't allocate region descriptor\n");
446#endif
447		return (ENOMEM);
448	}
449
450 alloc_start:
451	/*
452	 * Attempt to place ourselves in the desired area of the
453	 * extent.  We save ourselves some work by keeping the list sorted.
454	 * In other words, if the start of the current region is greater
455	 * than the end of our region, we don't have to search any further.
456	 */
457
458	/*
459	 * Keep a pointer to the last region we looked at so
460	 * that we don't have to traverse the list again when
461	 * we insert ourselves.  If "last" is NULL when we
462	 * finally insert ourselves, we go at the head of the
463	 * list.  See extent_insert_and_optimize() for details.
464	 */
465	last = NULL;
466
467	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
468		if (rp->er_start > end) {
469			/*
470			 * We lie before this region and don't
471			 * conflict.
472			 */
473			break;
474		}
475
476		/*
477		 * The current region begins before we end.
478		 * Check for a conflict.
479		 */
480		if (rp->er_end >= start) {
481			/*
482			 * We conflict.  If we can (and want to) wait,
483			 * do so.
484			 */
485			if (flags & EX_WAITSPACE) {
486				ex->ex_flags |= EXF_WANTED;
487				error = tsleep(ex,
488				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
489				    "extnt", 0);
490				if (error)
491					return (error);
492				goto alloc_start;
493			}
494
495			/*
496			 * If we tolerate conflicts adjust things such
497			 * that all space in the requested region is
498			 * allocated.
499			 */
500			if (flags & EX_CONFLICTOK) {
501				/*
502				 * There are two possibilities:
503				 *
504				 * 1. The current region overlaps.
505				 *    Adjust the requested region to
506				 *    start at the end of the current
507				 *    region, and try again.
508				 *
509				 * 2. The current region falls
510                                 *    completely within the requested
511                                 *    region.  Free the current region
512                                 *    and try again.
513				 */
514				if (rp->er_start <= start) {
515					start = rp->er_end + 1;
516					size = end - start + 1;
517				} else {
518					LIST_REMOVE(rp, er_link);
519					extent_free_region_descriptor(ex, rp);
520				}
521				goto alloc_start;
522			}
523
524			extent_free_region_descriptor(ex, myrp);
525			return (EAGAIN);
526		}
527		/*
528		 * We don't conflict, but this region lies before
529		 * us.  Keep a pointer to this region, and keep
530		 * trying.
531		 */
532		last = rp;
533	}
534
535	/*
536	 * We don't conflict with any regions.  "last" points
537	 * to the region we fall after, or is NULL if we belong
538	 * at the beginning of the region list.  Insert ourselves.
539	 */
540	extent_insert_and_optimize(ex, start, size, last, myrp);
541	return (0);
542}
543
544/*
545 * Macro to check (x + y) <= z.  This check is designed to fail
546 * if an overflow occurs.
547 */
548#define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
549
550/*
551 * Allocate a region in an extent map subregion.
552 *
553 * If EX_FAST is specified, we return the first fit in the map.
554 * Otherwise, we try to minimize fragmentation by finding the
555 * smallest gap that will hold the request.
556 *
557 * The allocated region is aligned to "alignment", which must be
558 * a power of 2.
559 */
560int
561extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
562    u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
563    u_long *result)
564{
565	struct extent_region *rp, *myrp, *last, *bestlast;
566	u_long newstart, newend, exend, beststart, bestovh, ovh;
567	u_long dontcross;
568	int error;
569
570#ifdef DIAGNOSTIC
571	/* Check arguments. */
572	if (ex == NULL)
573		panic("extent_alloc_subregion: NULL extent");
574	if (result == NULL)
575		panic("extent_alloc_subregion: NULL result pointer");
576	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
577	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
578  printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
579		    ex->ex_name, ex->ex_start, ex->ex_end);
580		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
581		    substart, subend);
582		panic("extent_alloc_subregion: bad subregion");
583	}
584	if ((size < 1) || ((size - 1) > (subend - substart))) {
585		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
586		    ex->ex_name, size);
587		panic("extent_alloc_subregion: bad size");
588	}
589	if (alignment == 0)
590		panic("extent_alloc_subregion: bad alignment");
591	if (boundary && (boundary < size)) {
592		printf(
593		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
594		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
595		panic("extent_alloc_subregion: bad boundary");
596	}
597#endif
598
599	/*
600	 * Allocate the region descriptor.  It will be freed later
601	 * if we can coalesce with another region.
602	 */
603	myrp = extent_alloc_region_descriptor(ex, flags);
604	if (myrp == NULL) {
605#ifdef DIAGNOSTIC
606		printf(
607		 "extent_alloc_subregion: can't allocate region descriptor\n");
608#endif
609		return (ENOMEM);
610	}
611
612 alloc_start:
613	/*
614	 * Keep a pointer to the last region we looked at so
615	 * that we don't have to traverse the list again when
616	 * we insert ourselves.  If "last" is NULL when we
617	 * finally insert ourselves, we go at the head of the
618	 * list.  See extent_insert_and_optimize() for deatails.
619	 */
620	last = NULL;
621
622	/*
623	 * Keep track of size and location of the smallest
624	 * chunk we fit in.
625	 *
626	 * Since the extent can be as large as the numeric range
627	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
628	 * best overhead value can be the maximum unsigned integer.
629	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
630	 * into the region list immediately on an exact match (which
631	 * is the only case where "bestovh" would be set to 0).
632	 */
633	bestovh = 0;
634	beststart = 0;
635	bestlast = NULL;
636
637	/*
638	 * Keep track of end of free region.  This is either the end of extent
639	 * or the start of a region past the subend.
640	 */
641	exend = ex->ex_end;
642
643	/*
644	 * For N allocated regions, we must make (N + 1)
645	 * checks for unallocated space.  The first chunk we
646	 * check is the area from the beginning of the subregion
647	 * to the first allocated region after that point.
648	 */
649	newstart = extent_align(substart, alignment, skew);
650	if (newstart < ex->ex_start) {
651#ifdef DIAGNOSTIC
652		printf(
653      "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
654		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
655		panic("extent_alloc_subregion: overflow after alignment");
656#else
657		extent_free_region_descriptor(ex, myrp);
658		return (EINVAL);
659#endif
660	}
661
662	/*
663	 * Find the first allocated region that begins on or after
664	 * the subregion start, advancing the "last" pointer along
665	 * the way.
666	 */
667	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
668		if (rp->er_start >= newstart)
669			break;
670		last = rp;
671	}
672
673	/*
674	 * Relocate the start of our candidate region to the end of
675	 * the last allocated region (if there was one overlapping
676	 * our subrange).
677	 */
678	if (last != NULL && last->er_end >= newstart)
679		newstart = extent_align((last->er_end + 1), alignment, skew);
680
681	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
682		/*
683		 * If the region pasts the subend, bail out and see
684		 * if we fit against the subend.
685		 */
686		if (rp->er_start > subend) {
687			exend = rp->er_start;
688			break;
689		}
690
691		/*
692		 * Check the chunk before "rp".  Note that our
693		 * comparison is safe from overflow conditions.
694		 */
695		if (LE_OV(newstart, size, rp->er_start)) {
696			/*
697			 * Do a boundary check, if necessary.  Note
698			 * that a region may *begin* on the boundary,
699			 * but it must end before the boundary.
700			 */
701			if (boundary) {
702				newend = newstart + (size - 1);
703
704				/*
705				 * Calculate the next boundary after the start
706				 * of this region.
707				 */
708				dontcross = extent_align(newstart+1, boundary,
709				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
710				    - 1;
711
712#if 0
713				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
714				    newstart, newend, ex->ex_start, ex->ex_end,
715				    boundary, dontcross);
716#endif
717
718				/* Check for overflow */
719				if (dontcross < ex->ex_start)
720					dontcross = ex->ex_end;
721				else if (newend > dontcross) {
722					/*
723					 * Candidate region crosses boundary.
724					 * Throw away the leading part and see
725					 * if we still fit.
726					 */
727					newstart = dontcross + 1;
728					newend = newstart + (size - 1);
729					dontcross += boundary;
730					if (!LE_OV(newstart, size, rp->er_start))
731						goto skip;
732				}
733
734				/*
735				 * If we run past the end of
736				 * the extent or the boundary
737				 * overflows, then the request
738				 * can't fit.
739				 */
740				if (newstart + size - 1 > ex->ex_end ||
741				    dontcross < newstart)
742					goto fail;
743			}
744
745			/*
746			 * We would fit into this space.  Calculate
747			 * the overhead (wasted space).  If we exactly
748			 * fit, or we're taking the first fit, insert
749			 * ourselves into the region list.
750			 */
751			ovh = rp->er_start - newstart - size;
752			if ((flags & EX_FAST) || (ovh == 0))
753				goto found;
754
755			/*
756			 * Don't exactly fit, but check to see
757			 * if we're better than any current choice.
758			 */
759			if ((bestovh == 0) || (ovh < bestovh)) {
760				bestovh = ovh;
761				beststart = newstart;
762				bestlast = last;
763			}
764		}
765
766skip:
767		/*
768		 * Skip past the current region and check again.
769		 */
770		newstart = extent_align((rp->er_end + 1), alignment, skew);
771		if (newstart < rp->er_end) {
772			/*
773			 * Overflow condition.  Don't error out, since
774			 * we might have a chunk of space that we can
775			 * use.
776			 */
777			goto fail;
778		}
779
780		last = rp;
781	}
782
783	/*
784	 * The final check is from the current starting point to the
785	 * end of the subregion.  If there were no allocated regions,
786	 * "newstart" is set to the beginning of the subregion, or
787	 * just past the end of the last allocated region, adjusted
788	 * for alignment in either case.
789	 */
790	if (LE_OV(newstart, (size - 1), subend)) {
791		/*
792		 * Do a boundary check, if necessary.  Note
793		 * that a region may *begin* on the boundary,
794		 * but it must end before the boundary.
795		 */
796		if (boundary) {
797			newend = newstart + (size - 1);
798
799			/*
800			 * Calculate the next boundary after the start
801			 * of this region.
802			 */
803			dontcross = extent_align(newstart+1, boundary,
804			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
805			    - 1;
806
807#if 0
808			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
809			    newstart, newend, ex->ex_start, ex->ex_end,
810			    boundary, dontcross);
811#endif
812
813			/* Check for overflow */
814			if (dontcross < ex->ex_start)
815				dontcross = ex->ex_end;
816			else if (newend > dontcross) {
817				/*
818				 * Candidate region crosses boundary.
819				 * Throw away the leading part and see
820				 * if we still fit.
821				 */
822				newstart = dontcross + 1;
823				newend = newstart + (size - 1);
824				dontcross += boundary;
825				if (!LE_OV(newstart, (size - 1), subend))
826					goto fail;
827			}
828
829			/*
830			 * If we run past the end of
831			 * the extent or the boundary
832			 * overflows, then the request
833			 * can't fit.
834			 */
835			if (newstart + size - 1 > ex->ex_end ||
836			    dontcross < newstart)
837				goto fail;
838		}
839
840		/*
841		 * We would fit into this space.  Calculate
842		 * the overhead (wasted space).  If we exactly
843		 * fit, or we're taking the first fit, insert
844		 * ourselves into the region list.
845		 */
846		ovh = exend - newstart - (size - 1);
847		if ((flags & EX_FAST) || (ovh == 0))
848			goto found;
849
850		/*
851		 * Don't exactly fit, but check to see
852		 * if we're better than any current choice.
853		 */
854		if ((bestovh == 0) || (ovh < bestovh)) {
855			bestovh = ovh;
856			beststart = newstart;
857			bestlast = last;
858		}
859	}
860
861 fail:
862	/*
863	 * One of the following two conditions have
864	 * occurred:
865	 *
866	 *	There is no chunk large enough to hold the request.
867	 *
868	 *	If EX_FAST was not specified, there is not an
869	 *	exact match for the request.
870	 *
871	 * Note that if we reach this point and EX_FAST is
872	 * set, then we know there is no space in the extent for
873	 * the request.
874	 */
875	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
876		/*
877		 * We have a match that's "good enough".
878		 */
879		newstart = beststart;
880		last = bestlast;
881		goto found;
882	}
883
884	/*
885	 * No space currently available.  Wait for it to free up,
886	 * if possible.
887	 */
888	if (flags & EX_WAITSPACE) {
889		ex->ex_flags |= EXF_WANTED;
890		error = tsleep(ex,
891		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
892		if (error)
893			return (error);
894		goto alloc_start;
895	}
896
897	extent_free_region_descriptor(ex, myrp);
898	return (EAGAIN);
899
900 found:
901	/*
902	 * Insert ourselves into the region list.
903	 */
904	extent_insert_and_optimize(ex, newstart, size, last, myrp);
905	*result = newstart;
906	return (0);
907}
908
909int
910extent_free(struct extent *ex, u_long start, u_long size, int flags)
911{
912	struct extent_region *rp, *nrp = NULL;
913	u_long end = start + (size - 1);
914	int exflags;
915
916#ifdef DIAGNOSTIC
917	/* Check arguments. */
918	if (ex == NULL)
919		panic("extent_free: NULL extent");
920	if ((start < ex->ex_start) || (end > ex->ex_end)) {
921		extent_print(ex);
922		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
923		    ex->ex_name, start, size);
924		panic("extent_free: extent `%s', region not within extent",
925		    ex->ex_name);
926	}
927	/* Check for an overflow. */
928	if (end < start) {
929		extent_print(ex);
930		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
931		    ex->ex_name, start, size);
932		panic("extent_free: overflow");
933	}
934#endif
935
936	/*
937	 * If we're allowing coalescing, we must allocate a region
938	 * descriptor now, since it might block.
939	 *
940	 * XXX Make a static, create-time flags word, so we don't
941	 * XXX have to lock to read it!
942	 */
943	exflags = ex->ex_flags;
944
945	if ((exflags & EXF_NOCOALESCE) == 0) {
946		/* Allocate a region descriptor. */
947		nrp = extent_alloc_region_descriptor(ex, flags);
948		if (nrp == NULL)
949			return (ENOMEM);
950	}
951
952	/*
953	 * Find region and deallocate.  Several possibilities:
954	 *
955	 *	1. (start == er_start) && (end == er_end):
956	 *	   Free descriptor.
957	 *
958	 *	2. (start == er_start) && (end < er_end):
959	 *	   Adjust er_start.
960	 *
961	 *	3. (start > er_start) && (end == er_end):
962	 *	   Adjust er_end.
963	 *
964	 *	4. (start > er_start) && (end < er_end):
965	 *	   Fragment region.  Requires descriptor alloc.
966	 *
967	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
968	 * is not set.
969	 */
970	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
971		/*
972		 * Save ourselves some comparisons; does the current
973		 * region end before chunk to be freed begins?  If so,
974		 * then we haven't found the appropriate region descriptor.
975		 */
976		if (rp->er_end < start)
977			continue;
978
979		/*
980		 * Save ourselves some traversal; does the current
981		 * region begin after the chunk to be freed ends?  If so,
982		 * then we've already passed any possible region descriptors
983		 * that might have contained the chunk to be freed.
984		 */
985		if (rp->er_start > end)
986			break;
987
988		/* Case 1. */
989		if ((start == rp->er_start) && (end == rp->er_end)) {
990			LIST_REMOVE(rp, er_link);
991			extent_free_region_descriptor(ex, rp);
992			goto done;
993		}
994
995		/*
996		 * The following cases all require that EXF_NOCOALESCE
997		 * is not set.
998		 */
999		if (ex->ex_flags & EXF_NOCOALESCE)
1000			continue;
1001
1002		/* Case 2. */
1003		if ((start == rp->er_start) && (end < rp->er_end)) {
1004			rp->er_start = (end + 1);
1005			goto done;
1006		}
1007
1008		/* Case 3. */
1009		if ((start > rp->er_start) && (end == rp->er_end)) {
1010			rp->er_end = (start - 1);
1011			goto done;
1012		}
1013
1014		/* Case 4. */
1015		if ((start > rp->er_start) && (end < rp->er_end)) {
1016			/* Fill in new descriptor. */
1017			nrp->er_start = end + 1;
1018			nrp->er_end = rp->er_end;
1019
1020			/* Adjust current descriptor. */
1021			rp->er_end = start - 1;
1022
1023			/* Insert new descriptor after current. */
1024			LIST_INSERT_AFTER(rp, nrp, er_link);
1025
1026			/* We used the new descriptor, so don't free it below */
1027			nrp = NULL;
1028			goto done;
1029		}
1030	}
1031
1032	/* Region not found, or request otherwise invalid. */
1033#if defined(DIAGNOSTIC) || defined(DDB)
1034	extent_print(ex);
1035#endif
1036	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1037	panic("extent_free: region not found");
1038
1039 done:
1040	if (nrp != NULL)
1041		extent_free_region_descriptor(ex, nrp);
1042	if (ex->ex_flags & EXF_WANTED) {
1043		ex->ex_flags &= ~EXF_WANTED;
1044		wakeup(ex);
1045	}
1046	return (0);
1047}
1048
1049static struct extent_region *
1050extent_alloc_region_descriptor(struct extent *ex, int flags)
1051{
1052	struct extent_region *rp;
1053	int s;
1054
1055	if (ex->ex_flags & EXF_FIXED) {
1056		struct extent_fixed *fex = (struct extent_fixed *)ex;
1057
1058		while (LIST_EMPTY(&fex->fex_freelist)) {
1059			if (flags & EX_MALLOCOK)
1060				goto alloc;
1061
1062			if ((flags & EX_WAITOK) == 0)
1063				return (NULL);
1064			ex->ex_flags |= EXF_FLWANTED;
1065			if (tsleep(&fex->fex_freelist,
1066			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1067			    "extnt", 0))
1068				return (NULL);
1069		}
1070		rp = LIST_FIRST(&fex->fex_freelist);
1071		LIST_REMOVE(rp, er_link);
1072
1073		/*
1074		 * Don't muck with flags after pulling it off the
1075		 * freelist; it may be a dynamiclly allocated
1076		 * region pointer that was kindly given to us,
1077		 * and we need to preserve that information.
1078		 */
1079
1080		return (rp);
1081	}
1082
1083 alloc:
1084	s = splvm();
1085	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK : 0);
1086	splx(s);
1087	if (rp != NULL)
1088		rp->er_flags = ER_ALLOC;
1089
1090	return (rp);
1091}
1092
1093static void
1094extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1095{
1096	int s;
1097
1098	if (ex->ex_flags & EXF_FIXED) {
1099		struct extent_fixed *fex = (struct extent_fixed *)ex;
1100
1101		/*
1102		 * If someone's waiting for a region descriptor,
1103		 * be nice and give them this one, rather than
1104		 * just free'ing it back to the system.
1105		 */
1106		if (rp->er_flags & ER_ALLOC) {
1107			if (ex->ex_flags & EXF_FLWANTED) {
1108				/* Clear all but ER_ALLOC flag. */
1109				rp->er_flags = ER_ALLOC;
1110				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1111				    er_link);
1112				goto wake_em_up;
1113			} else {
1114				s = splvm();
1115				pool_put(&ex_region_pl, rp);
1116				splx(s);
1117			}
1118		} else {
1119			/* Clear all flags. */
1120			rp->er_flags = 0;
1121			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1122		}
1123
1124		if (ex->ex_flags & EXF_FLWANTED) {
1125 wake_em_up:
1126			ex->ex_flags &= ~EXF_FLWANTED;
1127			wakeup(&fex->fex_freelist);
1128		}
1129		return;
1130	}
1131
1132	/*
1133	 * We know it's dynamically allocated if we get here.
1134	 */
1135	s = splvm();
1136	pool_put(&ex_region_pl, rp);
1137	splx(s);
1138}
1139
1140
1141#if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1142
1143void
1144extent_print(struct extent *ex)
1145{
1146	extent_print1(ex, printf);
1147}
1148
1149void
1150extent_print1(struct extent *ex, int (*pr)(const char *, ...))
1151{
1152	struct extent_region *rp;
1153
1154	if (ex == NULL)
1155		panic("extent_print: NULL extent");
1156
1157#ifdef _KERNEL
1158	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1159	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1160#else
1161	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1162	    ex->ex_start, ex->ex_end, ex->ex_flags);
1163#endif
1164
1165	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1166		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1167}
1168#endif
1169