1/*
2Copyright (c) 2001 Wolfram Gloger
3Copyright (c) 2006 Cavium networks
4
5Permission to use, copy, modify, distribute, and sell this software
6and its documentation for any purpose is hereby granted without fee,
7provided that (i) the above copyright notices and this permission
8notice appear in all copies of the software and related documentation,
9and (ii) the name of Wolfram Gloger may not be used in any advertising
10or publicity relating to the software.
11
12THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
13EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
14WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
15
16IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL,
17INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
18DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
19WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY
20OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
21PERFORMANCE OF THIS SOFTWARE.
22*/
23
24/*
25  This is a version (aka ptmalloc2) of malloc/free/realloc written by
26  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger.
27
28* Version ptmalloc2-20011215
29  $Id: malloc.c 30481 2007-12-05 21:46:59Z rfranz $
30  based on:
31  VERSION 2.7.1pre1 Sat May 12 07:41:21 2001  Doug Lea  (dl at gee)
32
33   Note: There may be an updated version of this malloc obtainable at
34           http://www.malloc.de/malloc/ptmalloc2.tar.gz
35         Check before installing!
36
37* Quickstart
38
39  In order to compile this implementation, a Makefile is provided with
40  the ptmalloc2 distribution, which has pre-defined targets for some
41  popular systems (e.g. "make posix" for Posix threads).  All that is
42  typically required with regard to compiler flags is the selection of
43  the thread package via defining one out of USE_PTHREADS, USE_THR or
44  USE_SPROC.  Check the thread-m.h file for what effects this has.
45  Many/most systems will additionally require USE_TSD_DATA_HACK to be
46  defined, so this is the default for "make posix".
47
48* Why use this malloc?
49
50  This is not the fastest, most space-conserving, most portable, or
51  most tunable malloc ever written. However it is among the fastest
52  while also being among the most space-conserving, portable and tunable.
53  Consistent balance across these factors results in a good general-purpose
54  allocator for malloc-intensive programs.
55
56  The main properties of the algorithms are:
57  * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
58    with ties normally decided via FIFO (i.e. least recently used).
59  * For small (<= 64 bytes by default) requests, it is a caching
60    allocator, that maintains pools of quickly recycled chunks.
61  * In between, and for combinations of large and small requests, it does
62    the best it can trying to meet both goals at once.
63  * For very large requests (>= 128KB by default), it relies on system
64    memory mapping facilities, if supported.
65
66  For a longer but slightly out of date high-level description, see
67     http://gee.cs.oswego.edu/dl/html/malloc.html
68
69  You may already by default be using a C library containing a malloc
70  that is  based on some version of this malloc (for example in
71  linux). You might still want to use the one in this file in order to
72  customize settings or to avoid overheads associated with library
73  versions.
74
75* Contents, described in more detail in "description of public routines" below.
76
77  Standard (ANSI/SVID/...)  functions:
78    malloc(size_t n);
79    calloc(size_t n_elements, size_t element_size);
80    free(Void_t* p);
81    realloc(Void_t* p, size_t n);
82    memalign(size_t alignment, size_t n);
83    valloc(size_t n);
84    mallinfo()
85    mallopt(int parameter_number, int parameter_value)
86
87  Additional functions:
88    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
89    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
90    pvalloc(size_t n);
91    cfree(Void_t* p);
92    malloc_trim(size_t pad);
93    malloc_usable_size(Void_t* p);
94    malloc_stats();
95
96* Vital statistics:
97
98  Supported pointer representation:       4 or 8 bytes
99  Supported size_t  representation:       4 or 8 bytes
100       Note that size_t is allowed to be 4 bytes even if pointers are 8.
101       You can adjust this by defining INTERNAL_SIZE_T
102
103  Alignment:                              2 * sizeof(size_t) (default)
104       (i.e., 8 byte alignment with 4byte size_t). This suffices for
105       nearly all current machines and C compilers. However, you can
106       define MALLOC_ALIGNMENT to be wider than this if necessary.
107
108  Minimum overhead per allocated chunk:   4 or 8 bytes
109       Each malloced chunk has a hidden word of overhead holding size
110       and status information.
111
112  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead)
113                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead)
114
115       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
116       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
117       needed; 4 (8) for a trailing size field and 8 (16) bytes for
118       free list pointers. Thus, the minimum allocatable size is
119       16/24/32 bytes.
120
121       Even a request for zero bytes (i.e., malloc(0)) returns a
122       pointer to something of the minimum allocatable size.
123
124       The maximum overhead wastage (i.e., number of extra bytes
125       allocated than were requested in malloc) is less than or equal
126       to the minimum size, except for requests >= mmap_threshold that
127       are serviced via mmap(), where the worst case wastage is 2 *
128       sizeof(size_t) bytes plus the remainder from a system page (the
129       minimal mmap unit); typically 4096 or 8192 bytes.
130
131  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages
132                           8-byte size_t: 2^64 minus about two pages
133
134       It is assumed that (possibly signed) size_t values suffice to
135       represent chunk sizes. `Possibly signed' is due to the fact
136       that `size_t' may be defined on a system as either a signed or
137       an unsigned type. The ISO C standard says that it must be
138       unsigned, but a few systems are known not to adhere to this.
139       Additionally, even when size_t is unsigned, sbrk (which is by
140       default used to obtain memory from system) accepts signed
141       arguments, and may not be able to handle size_t-wide arguments
142       with negative sign bit.  Generally, values that would
143       appear as negative after accounting for overhead and alignment
144       are supported only via mmap(), which does not have this
145       limitation.
146
147       Requests for sizes outside the allowed range will perform an optional
148       failure action and then return null. (Requests may also
149       also fail because a system is out of memory.)
150
151  Thread-safety: thread-safe unless NO_THREADS is defined
152
153  Compliance: I believe it is compliant with the 1997 Single Unix Specification
154       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably
155       others as well.
156
157* Synopsis of compile-time options:
158
159    People have reported using previous versions of this malloc on all
160    versions of Unix, sometimes by tweaking some of the defines
161    below. It has been tested most extensively on Solaris and
162    Linux. It is also reported to work on WIN32 platforms.
163    People also report using it in stand-alone embedded systems.
164
165    The implementation is in straight, hand-tuned ANSI C.  It is not
166    at all modular. (Sorry!)  It uses a lot of macros.  To be at all
167    usable, this code should be compiled using an optimizing compiler
168    (for example gcc -O3) that can simplify expressions and control
169    paths. (FAQ: some macros import variables as arguments rather than
170    declare locals because people reported that some debuggers
171    otherwise get confused.)
172
173    OPTION                     DEFAULT VALUE
174
175    Compilation Environment options:
176
177    __STD_C                    derived from C compiler defines
178    WIN32                      NOT defined
179    HAVE_MEMCPY                defined
180    USE_MEMCPY                 1 if HAVE_MEMCPY is defined
181    HAVE_MMAP                  defined as 1
182    MMAP_CLEARS                1
183    HAVE_MREMAP                0 unless linux defined
184    USE_ARENAS                 the same as HAVE_MMAP
185    malloc_getpagesize         derived from system #includes, or 4096 if not
186    HAVE_USR_INCLUDE_MALLOC_H  NOT defined
187    LACKS_UNISTD_H             NOT defined unless WIN32
188    LACKS_SYS_PARAM_H          NOT defined unless WIN32
189    LACKS_SYS_MMAN_H           NOT defined unless WIN32
190
191    Changing default word sizes:
192
193    INTERNAL_SIZE_T            size_t
194    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T)
195
196    Configuration and functionality options:
197
198    USE_DL_PREFIX              NOT defined
199    USE_PUBLIC_MALLOC_WRAPPERS NOT defined
200    USE_MALLOC_LOCK            NOT defined
201    MALLOC_DEBUG               NOT defined
202    REALLOC_ZERO_BYTES_FREES   1
203    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op
204    TRIM_FASTBINS              0
205    FIRST_SORTED_BIN_SIZE      512
206
207    Options for customizing MORECORE:
208
209    MORECORE                   sbrk
210    MORECORE_FAILURE           -1
211    MORECORE_CONTIGUOUS        1
212    MORECORE_CANNOT_TRIM       NOT defined
213    MORECORE_CLEARS            1
214    MMAP_AS_MORECORE_SIZE      (1024 * 1024)
215
216    Tuning options that are also dynamically changeable via mallopt:
217
218    DEFAULT_MXFAST             64
219    DEFAULT_TRIM_THRESHOLD     128 * 1024
220    DEFAULT_TOP_PAD            0
221    DEFAULT_MMAP_THRESHOLD     128 * 1024
222    DEFAULT_MMAP_MAX           65536
223
224    There are several other #defined constants and macros that you
225    probably don't want to touch unless you are extending or adapting malloc.  */
226
227/*
228  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
229  compiler, or a C compiler sufficiently close to ANSI to get away
230  with it.
231*/
232
233#include "cvmx-config.h"
234#include "cvmx.h"
235#include "cvmx-spinlock.h"
236#include "cvmx-malloc.h"
237
238
239#ifndef __STD_C
240#if defined(__STDC__) || defined(__cplusplus)
241#define __STD_C     1
242#else
243#define __STD_C     0
244#endif
245#endif /*__STD_C*/
246
247
248/*
249  Void_t* is the pointer type that malloc should say it returns
250*/
251
252#ifndef Void_t
253#if 1
254#define Void_t      void
255#else
256#define Void_t      char
257#endif
258#endif /*Void_t*/
259
260
261#ifdef __cplusplus
262extern "C" {
263#endif
264
265/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
266
267/* #define  LACKS_UNISTD_H */
268
269#ifndef LACKS_UNISTD_H
270#include <unistd.h>
271#endif
272
273/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
274
275/* #define  LACKS_SYS_PARAM_H */
276
277
278#include <stdio.h>    /* needed for malloc_stats */
279#include <errno.h>    /* needed for optional MALLOC_FAILURE_ACTION */
280
281
282/*
283  Debugging:
284
285  Because freed chunks may be overwritten with bookkeeping fields, this
286  malloc will often die when freed memory is overwritten by user
287  programs.  This can be very effective (albeit in an annoying way)
288  in helping track down dangling pointers.
289
290  If you compile with -DMALLOC_DEBUG, a number of assertion checks are
291  enabled that will catch more memory errors. You probably won't be
292  able to make much sense of the actual assertion errors, but they
293  should help you locate incorrectly overwritten memory.  The checking
294  is fairly extensive, and will slow down execution
295  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set
296  will attempt to check every non-mmapped allocated and free chunk in
297  the course of computing the summmaries. (By nature, mmapped regions
298  cannot be checked very much automatically.)
299
300  Setting MALLOC_DEBUG may also be helpful if you are trying to modify
301  this code. The assertions in the check routines spell out in more
302  detail the assumptions and invariants underlying the algorithms.
303
304  Setting MALLOC_DEBUG does NOT provide an automated mechanism for
305  checking that all accesses to malloced memory stay within their
306  bounds. However, there are several add-ons and adaptations of this
307  or other mallocs available that do this.
308*/
309
310#define MALLOC_DEBUG 1
311#if MALLOC_DEBUG
312#include <assert.h>
313#else
314#define assert(x) ((void)0)
315#endif
316
317
318/*
319  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
320  of chunk sizes.
321
322  The default version is the same as size_t.
323
324  While not strictly necessary, it is best to define this as an
325  unsigned type, even if size_t is a signed type. This may avoid some
326  artificial size limitations on some systems.
327
328  On a 64-bit machine, you may be able to reduce malloc overhead by
329  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
330  expense of not being able to handle more than 2^32 of malloced
331  space. If this limitation is acceptable, you are encouraged to set
332  this unless you are on a platform requiring 16byte alignments. In
333  this case the alignment requirements turn out to negate any
334  potential advantages of decreasing size_t word size.
335
336  Implementors: Beware of the possible combinations of:
337     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
338       and might be the same width as int or as long
339     - size_t might have different width and signedness as INTERNAL_SIZE_T
340     - int and long might be 32 or 64 bits, and might be the same width
341  To deal with this, most comparisons and difference computations
342  among INTERNAL_SIZE_Ts should cast them to unsigned long, being
343  aware of the fact that casting an unsigned int to a wider long does
344  not sign-extend. (This also makes checking for negative numbers
345  awkward.) Some of these casts result in harmless compiler warnings
346  on some systems.
347*/
348
349#ifndef INTERNAL_SIZE_T
350#define INTERNAL_SIZE_T size_t
351#endif
352
353/* The corresponding word size */
354#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
355
356
357/*
358  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
359  It must be a power of two at least 2 * SIZE_SZ, even on machines
360  for which smaller alignments would suffice. It may be defined as
361  larger than this though. Note however that code and data structures
362  are optimized for the case of 8-byte alignment.
363*/
364
365
366#ifndef MALLOC_ALIGNMENT
367#define MALLOC_ALIGNMENT       (2 * SIZE_SZ)
368#endif
369
370/* The corresponding bit mask value */
371#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
372
373
374
375/*
376  REALLOC_ZERO_BYTES_FREES should be set if a call to
377  realloc with zero bytes should be the same as a call to free.
378  This is required by the C standard. Otherwise, since this malloc
379  returns a unique pointer for malloc(0), so does realloc(p, 0).
380*/
381
382#ifndef REALLOC_ZERO_BYTES_FREES
383#define REALLOC_ZERO_BYTES_FREES 1
384#endif
385
386/*
387  TRIM_FASTBINS controls whether free() of a very small chunk can
388  immediately lead to trimming. Setting to true (1) can reduce memory
389  footprint, but will almost always slow down programs that use a lot
390  of small chunks.
391
392  Define this only if you are willing to give up some speed to more
393  aggressively reduce system-level memory footprint when releasing
394  memory in programs that use many small chunks.  You can get
395  essentially the same effect by setting MXFAST to 0, but this can
396  lead to even greater slowdowns in programs using many small chunks.
397  TRIM_FASTBINS is an in-between compile-time option, that disables
398  only those chunks bordering topmost memory from being placed in
399  fastbins.
400*/
401
402#ifndef TRIM_FASTBINS
403#define TRIM_FASTBINS  0
404#endif
405
406
407/*
408  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
409  This is necessary when you only want to use this malloc in one part
410  of a program, using your regular system malloc elsewhere.
411*/
412
413#define USE_DL_PREFIX
414
415
416/*
417   Two-phase name translation.
418   All of the actual routines are given mangled names.
419   When wrappers are used, they become the public callable versions.
420   When DL_PREFIX is used, the callable names are prefixed.
421*/
422
423#ifdef USE_DL_PREFIX
424#define public_cALLOc    cvmx_calloc
425#define public_fREe      cvmx_free
426#define public_cFREe     dlcfree
427#define public_mALLOc    cvmx_malloc
428#define public_mEMALIGn  cvmx_memalign
429#define public_rEALLOc   cvmx_realloc
430#define public_vALLOc    dlvalloc
431#define public_pVALLOc   dlpvalloc
432#define public_mALLINFo  dlmallinfo
433#define public_mALLOPt   dlmallopt
434#define public_mTRIm     dlmalloc_trim
435#define public_mSTATs    dlmalloc_stats
436#define public_mUSABLe   dlmalloc_usable_size
437#define public_iCALLOc   dlindependent_calloc
438#define public_iCOMALLOc dlindependent_comalloc
439#define public_gET_STATe dlget_state
440#define public_sET_STATe dlset_state
441#else /* USE_DL_PREFIX */
442#ifdef _LIBC
443#error _LIBC defined and should not be
444/* Special defines for the GNU C library.  */
445#define public_cALLOc    __libc_calloc
446#define public_fREe      __libc_free
447#define public_cFREe     __libc_cfree
448#define public_mALLOc    __libc_malloc
449#define public_mEMALIGn  __libc_memalign
450#define public_rEALLOc   __libc_realloc
451#define public_vALLOc    __libc_valloc
452#define public_pVALLOc   __libc_pvalloc
453#define public_mALLINFo  __libc_mallinfo
454#define public_mALLOPt   __libc_mallopt
455#define public_mTRIm     __malloc_trim
456#define public_mSTATs    __malloc_stats
457#define public_mUSABLe   __malloc_usable_size
458#define public_iCALLOc   __libc_independent_calloc
459#define public_iCOMALLOc __libc_independent_comalloc
460#define public_gET_STATe __malloc_get_state
461#define public_sET_STATe __malloc_set_state
462#define malloc_getpagesize __getpagesize()
463#define open             __open
464#define mmap             __mmap
465#define munmap           __munmap
466#define mremap           __mremap
467#define mprotect         __mprotect
468#define MORECORE         (*__morecore)
469#define MORECORE_FAILURE 0
470
471Void_t * __default_morecore (ptrdiff_t);
472Void_t *(*__morecore)(ptrdiff_t) = __default_morecore;
473
474#else /* !_LIBC */
475#define public_cALLOc    calloc
476#define public_fREe      free
477#define public_cFREe     cfree
478#define public_mALLOc    malloc
479#define public_mEMALIGn  memalign
480#define public_rEALLOc   realloc
481#define public_vALLOc    valloc
482#define public_pVALLOc   pvalloc
483#define public_mALLINFo  mallinfo
484#define public_mALLOPt   mallopt
485#define public_mTRIm     malloc_trim
486#define public_mSTATs    malloc_stats
487#define public_mUSABLe   malloc_usable_size
488#define public_iCALLOc   independent_calloc
489#define public_iCOMALLOc independent_comalloc
490#define public_gET_STATe malloc_get_state
491#define public_sET_STATe malloc_set_state
492#endif /* _LIBC */
493#endif /* USE_DL_PREFIX */
494
495
496/*
497  HAVE_MEMCPY should be defined if you are not otherwise using
498  ANSI STD C, but still have memcpy and memset in your C library
499  and want to use them in calloc and realloc. Otherwise simple
500  macro versions are defined below.
501
502  USE_MEMCPY should be defined as 1 if you actually want to
503  have memset and memcpy called. People report that the macro
504  versions are faster than libc versions on some systems.
505
506  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
507  (of <= 36 bytes) are manually unrolled in realloc and calloc.
508*/
509
510#define HAVE_MEMCPY
511
512#ifndef USE_MEMCPY
513#ifdef HAVE_MEMCPY
514#define USE_MEMCPY 1
515#else
516#define USE_MEMCPY 0
517#endif
518#endif
519
520
521#if (__STD_C || defined(HAVE_MEMCPY))
522
523#ifdef WIN32
524/* On Win32 memset and memcpy are already declared in windows.h */
525#else
526#if __STD_C
527void* memset(void*, int, size_t);
528void* memcpy(void*, const void*, size_t);
529#else
530Void_t* memset();
531Void_t* memcpy();
532#endif
533#endif
534#endif
535
536/*
537  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
538  malloc fails to be able to return memory, either because memory is
539  exhausted or because of illegal arguments.
540
541  By default, sets errno if running on STD_C platform, else does nothing.
542*/
543
544#ifndef MALLOC_FAILURE_ACTION
545#if __STD_C
546#define MALLOC_FAILURE_ACTION \
547   errno = ENOMEM;
548
549#else
550#define MALLOC_FAILURE_ACTION
551#endif
552#endif
553
554/*
555  MORECORE-related declarations. By default, rely on sbrk
556*/
557
558
559#ifdef LACKS_UNISTD_H
560#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
561#if __STD_C
562extern Void_t*     sbrk(ptrdiff_t);
563#else
564extern Void_t*     sbrk();
565#endif
566#endif
567#endif
568
569/*
570  MORECORE is the name of the routine to call to obtain more memory
571  from the system.  See below for general guidance on writing
572  alternative MORECORE functions, as well as a version for WIN32 and a
573  sample version for pre-OSX macos.
574*/
575#undef MORECORE  // not supported
576#ifndef MORECORE
577#define MORECORE notsupported
578#endif
579
580/*
581  MORECORE_FAILURE is the value returned upon failure of MORECORE
582  as well as mmap. Since it cannot be an otherwise valid memory address,
583  and must reflect values of standard sys calls, you probably ought not
584  try to redefine it.
585*/
586
587#ifndef MORECORE_FAILURE
588#define MORECORE_FAILURE (-1)
589#endif
590
591/*
592  If MORECORE_CONTIGUOUS is true, take advantage of fact that
593  consecutive calls to MORECORE with positive arguments always return
594  contiguous increasing addresses.  This is true of unix sbrk.  Even
595  if not defined, when regions happen to be contiguous, malloc will
596  permit allocations spanning regions obtained from different
597  calls. But defining this when applicable enables some stronger
598  consistency checks and space efficiencies.
599*/
600
601#ifndef MORECORE_CONTIGUOUS
602#define MORECORE_CONTIGUOUS 0
603#endif
604
605/*
606  Define MORECORE_CANNOT_TRIM if your version of MORECORE
607  cannot release space back to the system when given negative
608  arguments. This is generally necessary only if you are using
609  a hand-crafted MORECORE function that cannot handle negative arguments.
610*/
611
612#define MORECORE_CANNOT_TRIM 1
613
614/*  MORECORE_CLEARS           (default 1)
615     The degree to which the routine mapped to MORECORE zeroes out
616     memory: never (0), only for newly allocated space (1) or always
617     (2).  The distinction between (1) and (2) is necessary because on
618     some systems, if the application first decrements and then
619     increments the break value, the contents of the reallocated space
620     are unspecified.
621*/
622
623#ifndef MORECORE_CLEARS
624#define MORECORE_CLEARS 0
625#endif
626
627
628/*
629  Define HAVE_MMAP as true to optionally make malloc() use mmap() to
630  allocate very large blocks.  These will be returned to the
631  operating system immediately after a free(). Also, if mmap
632  is available, it is used as a backup strategy in cases where
633  MORECORE fails to provide space from system.
634
635  This malloc is best tuned to work with mmap for large requests.
636  If you do not have mmap, operations involving very large chunks (1MB
637  or so) may be slower than you'd like.
638*/
639
640#undef HAVE_MMAP
641#ifndef HAVE_MMAP
642#define HAVE_MMAP 0
643
644/*
645   Standard unix mmap using /dev/zero clears memory so calloc doesn't
646   need to.
647*/
648
649#ifndef MMAP_CLEARS
650#define MMAP_CLEARS 0
651#endif
652
653#else /* no mmap */
654#ifndef MMAP_CLEARS
655#define MMAP_CLEARS 0
656#endif
657#endif
658
659
660/*
661   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
662   sbrk fails, and mmap is used as a backup (which is done only if
663   HAVE_MMAP).  The value must be a multiple of page size.  This
664   backup strategy generally applies only when systems have "holes" in
665   address space, so sbrk cannot perform contiguous expansion, but
666   there is still space available on system.  On systems for which
667   this is known to be useful (i.e. most linux kernels), this occurs
668   only when programs allocate huge amounts of memory.  Between this,
669   and the fact that mmap regions tend to be limited, the size should
670   be large, to avoid too many mmap calls and thus avoid running out
671   of kernel resources.
672*/
673
674#ifndef MMAP_AS_MORECORE_SIZE
675#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
676#endif
677
678/*
679  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
680  large blocks.  This is currently only possible on Linux with
681  kernel versions newer than 1.3.77.
682*/
683#undef linux
684#ifndef HAVE_MREMAP
685#ifdef linux
686#define HAVE_MREMAP 1
687#else
688#define HAVE_MREMAP 0
689#endif
690
691#endif /* HAVE_MMAP */
692
693/* Define USE_ARENAS to enable support for multiple `arenas'.  These
694   are allocated using mmap(), are necessary for threads and
695   occasionally useful to overcome address space limitations affecting
696   sbrk(). */
697
698#ifndef USE_ARENAS
699#define USE_ARENAS 1  // we 'manually' mmap the arenas.....
700#endif
701
702
703/*
704  The system page size. To the extent possible, this malloc manages
705  memory from the system in page-size units.  Note that this value is
706  cached during initialization into a field of malloc_state. So even
707  if malloc_getpagesize is a function, it is only called once.
708
709  The following mechanics for getpagesize were adapted from bsd/gnu
710  getpagesize.h. If none of the system-probes here apply, a value of
711  4096 is used, which should be OK: If they don't apply, then using
712  the actual value probably doesn't impact performance.
713*/
714
715
716#define malloc_getpagesize (4096)
717#ifndef malloc_getpagesize
718
719#ifndef LACKS_UNISTD_H
720#  include <unistd.h>
721#endif
722
723#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
724#    ifndef _SC_PAGE_SIZE
725#      define _SC_PAGE_SIZE _SC_PAGESIZE
726#    endif
727#  endif
728
729#  ifdef _SC_PAGE_SIZE
730#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
731#  else
732#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
733       extern size_t getpagesize();
734#      define malloc_getpagesize getpagesize()
735#    else
736#      ifdef WIN32 /* use supplied emulation of getpagesize */
737#        define malloc_getpagesize getpagesize()
738#      else
739#        ifndef LACKS_SYS_PARAM_H
740#          include <sys/param.h>
741#        endif
742#        ifdef EXEC_PAGESIZE
743#          define malloc_getpagesize EXEC_PAGESIZE
744#        else
745#          ifdef NBPG
746#            ifndef CLSIZE
747#              define malloc_getpagesize NBPG
748#            else
749#              define malloc_getpagesize (NBPG * CLSIZE)
750#            endif
751#          else
752#            ifdef NBPC
753#              define malloc_getpagesize NBPC
754#            else
755#              ifdef PAGESIZE
756#                define malloc_getpagesize PAGESIZE
757#              else /* just guess */
758#                define malloc_getpagesize (4096)
759#              endif
760#            endif
761#          endif
762#        endif
763#      endif
764#    endif
765#  endif
766#endif
767
768/*
769  This version of malloc supports the standard SVID/XPG mallinfo
770  routine that returns a struct containing usage properties and
771  statistics. It should work on any SVID/XPG compliant system that has
772  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
773  install such a thing yourself, cut out the preliminary declarations
774  as described above and below and save them in a malloc.h file. But
775  there's no compelling reason to bother to do this.)
776
777  The main declaration needed is the mallinfo struct that is returned
778  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
779  bunch of fields that are not even meaningful in this version of
780  malloc.  These fields are are instead filled by mallinfo() with
781  other numbers that might be of interest.
782
783  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
784  /usr/include/malloc.h file that includes a declaration of struct
785  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
786  version is declared below.  These must be precisely the same for
787  mallinfo() to work.  The original SVID version of this struct,
788  defined on most systems with mallinfo, declares all fields as
789  ints. But some others define as unsigned long. If your system
790  defines the fields using a type of different width than listed here,
791  you must #include your system version and #define
792  HAVE_USR_INCLUDE_MALLOC_H.
793*/
794
795/* #define HAVE_USR_INCLUDE_MALLOC_H */
796
797#ifdef HAVE_USR_INCLUDE_MALLOC_H
798#include "/usr/include/malloc.h"
799#endif
800
801
802/* ---------- description of public routines ------------ */
803
804/*
805  malloc(size_t n)
806  Returns a pointer to a newly allocated chunk of at least n bytes, or null
807  if no space is available. Additionally, on failure, errno is
808  set to ENOMEM on ANSI C systems.
809
810  If n is zero, malloc returns a minumum-sized chunk. (The minimum
811  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
812  systems.)  On most systems, size_t is an unsigned type, so calls
813  with negative arguments are interpreted as requests for huge amounts
814  of space, which will often fail. The maximum supported value of n
815  differs across systems, but is in all cases less than the maximum
816  representable value of a size_t.
817*/
818#if __STD_C
819Void_t*  public_mALLOc(cvmx_arena_list_t arena_list, size_t);
820#else
821Void_t*  public_mALLOc();
822#endif
823
824/*
825  free(Void_t* p)
826  Releases the chunk of memory pointed to by p, that had been previously
827  allocated using malloc or a related routine such as realloc.
828  It has no effect if p is null. It can have arbitrary (i.e., bad!)
829  effects if p has already been freed.
830
831  Unless disabled (using mallopt), freeing very large spaces will
832  when possible, automatically trigger operations that give
833  back unused memory to the system, thus reducing program footprint.
834*/
835#if __STD_C
836void     public_fREe(Void_t*);
837#else
838void     public_fREe();
839#endif
840
841/*
842  calloc(size_t n_elements, size_t element_size);
843  Returns a pointer to n_elements * element_size bytes, with all locations
844  set to zero.
845*/
846#if __STD_C
847Void_t*  public_cALLOc(cvmx_arena_list_t arena_list, size_t, size_t);
848#else
849Void_t*  public_cALLOc();
850#endif
851
852/*
853  realloc(Void_t* p, size_t n)
854  Returns a pointer to a chunk of size n that contains the same data
855  as does chunk p up to the minimum of (n, p's size) bytes, or null
856  if no space is available.
857
858  The returned pointer may or may not be the same as p. The algorithm
859  prefers extending p when possible, otherwise it employs the
860  equivalent of a malloc-copy-free sequence.
861
862  If p is null, realloc is equivalent to malloc.
863
864  If space is not available, realloc returns null, errno is set (if on
865  ANSI) and p is NOT freed.
866
867  if n is for fewer bytes than already held by p, the newly unused
868  space is lopped off and freed if possible.  Unless the #define
869  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
870  zero (re)allocates a minimum-sized chunk.
871
872  Large chunks that were internally obtained via mmap will always
873  be reallocated using malloc-copy-free sequences unless
874  the system supports MREMAP (currently only linux).
875
876  The old unix realloc convention of allowing the last-free'd chunk
877  to be used as an argument to realloc is not supported.
878*/
879#if __STD_C
880Void_t*  public_rEALLOc(cvmx_arena_list_t arena_list, Void_t*, size_t);
881#else
882Void_t*  public_rEALLOc();
883#endif
884
885/*
886  memalign(size_t alignment, size_t n);
887  Returns a pointer to a newly allocated chunk of n bytes, aligned
888  in accord with the alignment argument.
889
890  The alignment argument should be a power of two. If the argument is
891  not a power of two, the nearest greater power is used.
892  8-byte alignment is guaranteed by normal malloc calls, so don't
893  bother calling memalign with an argument of 8 or less.
894
895  Overreliance on memalign is a sure way to fragment space.
896*/
897#if __STD_C
898Void_t*  public_mEMALIGn(cvmx_arena_list_t arena_list, size_t, size_t);
899#else
900Void_t*  public_mEMALIGn();
901#endif
902
903/*
904  valloc(size_t n);
905  Equivalent to memalign(pagesize, n), where pagesize is the page
906  size of the system. If the pagesize is unknown, 4096 is used.
907*/
908#if __STD_C
909Void_t*  public_vALLOc(size_t);
910#else
911Void_t*  public_vALLOc();
912#endif
913
914
915
916/*
917  mallopt(int parameter_number, int parameter_value)
918  Sets tunable parameters The format is to provide a
919  (parameter-number, parameter-value) pair.  mallopt then sets the
920  corresponding parameter to the argument value if it can (i.e., so
921  long as the value is meaningful), and returns 1 if successful else
922  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
923  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
924  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
925  so setting them has no effect. But this malloc also supports four
926  other options in mallopt. See below for details.  Briefly, supported
927  parameters are as follows (listed defaults are for "typical"
928  configurations).
929
930  Symbol            param #   default    allowed param values
931  M_MXFAST          1         64         0-80  (0 disables fastbins)
932  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming)
933  M_TOP_PAD        -2         0          any
934  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support)
935  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
936*/
937#if __STD_C
938int      public_mALLOPt(int, int);
939#else
940int      public_mALLOPt();
941#endif
942
943
944/*
945  mallinfo()
946  Returns (by copy) a struct containing various summary statistics:
947
948  arena:     current total non-mmapped bytes allocated from system
949  ordblks:   the number of free chunks
950  smblks:    the number of fastbin blocks (i.e., small chunks that
951               have been freed but not use resused or consolidated)
952  hblks:     current number of mmapped regions
953  hblkhd:    total bytes held in mmapped regions
954  usmblks:   the maximum total allocated space. This will be greater
955                than current total if trimming has occurred.
956  fsmblks:   total bytes held in fastbin blocks
957  uordblks:  current total allocated space (normal or mmapped)
958  fordblks:  total free space
959  keepcost:  the maximum number of bytes that could ideally be released
960               back to system via malloc_trim. ("ideally" means that
961               it ignores page restrictions etc.)
962
963  Because these fields are ints, but internal bookkeeping may
964  be kept as longs, the reported values may wrap around zero and
965  thus be inaccurate.
966*/
967#if __STD_C
968struct mallinfo public_mALLINFo(void);
969#else
970struct mallinfo public_mALLINFo();
971#endif
972
973/*
974  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
975
976  independent_calloc is similar to calloc, but instead of returning a
977  single cleared space, it returns an array of pointers to n_elements
978  independent elements that can hold contents of size elem_size, each
979  of which starts out cleared, and can be independently freed,
980  realloc'ed etc. The elements are guaranteed to be adjacently
981  allocated (this is not guaranteed to occur with multiple callocs or
982  mallocs), which may also improve cache locality in some
983  applications.
984
985  The "chunks" argument is optional (i.e., may be null, which is
986  probably the most typical usage). If it is null, the returned array
987  is itself dynamically allocated and should also be freed when it is
988  no longer needed. Otherwise, the chunks array must be of at least
989  n_elements in length. It is filled in with the pointers to the
990  chunks.
991
992  In either case, independent_calloc returns this pointer array, or
993  null if the allocation failed.  If n_elements is zero and "chunks"
994  is null, it returns a chunk representing an array with zero elements
995  (which should be freed if not wanted).
996
997  Each element must be individually freed when it is no longer
998  needed. If you'd like to instead be able to free all at once, you
999  should instead use regular calloc and assign pointers into this
1000  space to represent elements.  (In this case though, you cannot
1001  independently free elements.)
1002
1003  independent_calloc simplifies and speeds up implementations of many
1004  kinds of pools.  It may also be useful when constructing large data
1005  structures that initially have a fixed number of fixed-sized nodes,
1006  but the number is not known at compile time, and some of the nodes
1007  may later need to be freed. For example:
1008
1009  struct Node { int item; struct Node* next; };
1010
1011  struct Node* build_list() {
1012    struct Node** pool;
1013    int n = read_number_of_nodes_needed();
1014    if (n <= 0) return 0;
1015    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
1016    if (pool == 0) die();
1017    // organize into a linked list...
1018    struct Node* first = pool[0];
1019    for (i = 0; i < n-1; ++i)
1020      pool[i]->next = pool[i+1];
1021    free(pool);     // Can now free the array (or not, if it is needed later)
1022    return first;
1023  }
1024*/
1025#if __STD_C
1026Void_t** public_iCALLOc(size_t, size_t, Void_t**);
1027#else
1028Void_t** public_iCALLOc();
1029#endif
1030
1031/*
1032  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
1033
1034  independent_comalloc allocates, all at once, a set of n_elements
1035  chunks with sizes indicated in the "sizes" array.    It returns
1036  an array of pointers to these elements, each of which can be
1037  independently freed, realloc'ed etc. The elements are guaranteed to
1038  be adjacently allocated (this is not guaranteed to occur with
1039  multiple callocs or mallocs), which may also improve cache locality
1040  in some applications.
1041
1042  The "chunks" argument is optional (i.e., may be null). If it is null
1043  the returned array is itself dynamically allocated and should also
1044  be freed when it is no longer needed. Otherwise, the chunks array
1045  must be of at least n_elements in length. It is filled in with the
1046  pointers to the chunks.
1047
1048  In either case, independent_comalloc returns this pointer array, or
1049  null if the allocation failed.  If n_elements is zero and chunks is
1050  null, it returns a chunk representing an array with zero elements
1051  (which should be freed if not wanted).
1052
1053  Each element must be individually freed when it is no longer
1054  needed. If you'd like to instead be able to free all at once, you
1055  should instead use a single regular malloc, and assign pointers at
1056  particular offsets in the aggregate space. (In this case though, you
1057  cannot independently free elements.)
1058
1059  independent_comallac differs from independent_calloc in that each
1060  element may have a different size, and also that it does not
1061  automatically clear elements.
1062
1063  independent_comalloc can be used to speed up allocation in cases
1064  where several structs or objects must always be allocated at the
1065  same time.  For example:
1066
1067  struct Head { ... }
1068  struct Foot { ... }
1069
1070  void send_message(char* msg) {
1071    int msglen = strlen(msg);
1072    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
1073    void* chunks[3];
1074    if (independent_comalloc(3, sizes, chunks) == 0)
1075      die();
1076    struct Head* head = (struct Head*)(chunks[0]);
1077    char*        body = (char*)(chunks[1]);
1078    struct Foot* foot = (struct Foot*)(chunks[2]);
1079    // ...
1080  }
1081
1082  In general though, independent_comalloc is worth using only for
1083  larger values of n_elements. For small values, you probably won't
1084  detect enough difference from series of malloc calls to bother.
1085
1086  Overuse of independent_comalloc can increase overall memory usage,
1087  since it cannot reuse existing noncontiguous small chunks that
1088  might be available for some of the elements.
1089*/
1090#if __STD_C
1091Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**);
1092#else
1093Void_t** public_iCOMALLOc();
1094#endif
1095
1096
1097/*
1098  pvalloc(size_t n);
1099  Equivalent to valloc(minimum-page-that-holds(n)), that is,
1100  round up n to nearest pagesize.
1101 */
1102#if __STD_C
1103Void_t*  public_pVALLOc(size_t);
1104#else
1105Void_t*  public_pVALLOc();
1106#endif
1107
1108/*
1109  cfree(Void_t* p);
1110  Equivalent to free(p).
1111
1112  cfree is needed/defined on some systems that pair it with calloc,
1113  for odd historical reasons (such as: cfree is used in example
1114  code in the first edition of K&R).
1115*/
1116#if __STD_C
1117void     public_cFREe(Void_t*);
1118#else
1119void     public_cFREe();
1120#endif
1121
1122/*
1123  malloc_trim(size_t pad);
1124
1125  If possible, gives memory back to the system (via negative
1126  arguments to sbrk) if there is unused memory at the `high' end of
1127  the malloc pool. You can call this after freeing large blocks of
1128  memory to potentially reduce the system-level memory requirements
1129  of a program. However, it cannot guarantee to reduce memory. Under
1130  some allocation patterns, some large free blocks of memory will be
1131  locked between two used chunks, so they cannot be given back to
1132  the system.
1133
1134  The `pad' argument to malloc_trim represents the amount of free
1135  trailing space to leave untrimmed. If this argument is zero,
1136  only the minimum amount of memory to maintain internal data
1137  structures will be left (one page or less). Non-zero arguments
1138  can be supplied to maintain enough trailing space to service
1139  future expected allocations without having to re-obtain memory
1140  from the system.
1141
1142  Malloc_trim returns 1 if it actually released any memory, else 0.
1143  On systems that do not support "negative sbrks", it will always
1144  rreturn 0.
1145*/
1146#if __STD_C
1147int      public_mTRIm(size_t);
1148#else
1149int      public_mTRIm();
1150#endif
1151
1152/*
1153  malloc_usable_size(Void_t* p);
1154
1155  Returns the number of bytes you can actually use in
1156  an allocated chunk, which may be more than you requested (although
1157  often not) due to alignment and minimum size constraints.
1158  You can use this many bytes without worrying about
1159  overwriting other allocated objects. This is not a particularly great
1160  programming practice. malloc_usable_size can be more useful in
1161  debugging and assertions, for example:
1162
1163  p = malloc(n);
1164  assert(malloc_usable_size(p) >= 256);
1165
1166*/
1167#if __STD_C
1168size_t   public_mUSABLe(Void_t*);
1169#else
1170size_t   public_mUSABLe();
1171#endif
1172
1173/*
1174  malloc_stats();
1175  Prints on stderr the amount of space obtained from the system (both
1176  via sbrk and mmap), the maximum amount (which may be more than
1177  current if malloc_trim and/or munmap got called), and the current
1178  number of bytes allocated via malloc (or realloc, etc) but not yet
1179  freed. Note that this is the number of bytes allocated, not the
1180  number requested. It will be larger than the number requested
1181  because of alignment and bookkeeping overhead. Because it includes
1182  alignment wastage as being in use, this figure may be greater than
1183  zero even when no user-level chunks are allocated.
1184
1185  The reported current and maximum system memory can be inaccurate if
1186  a program makes other calls to system memory allocation functions
1187  (normally sbrk) outside of malloc.
1188
1189  malloc_stats prints only the most commonly interesting statistics.
1190  More information can be obtained by calling mallinfo.
1191
1192*/
1193#if __STD_C
1194void     public_mSTATs(void);
1195#else
1196void     public_mSTATs();
1197#endif
1198
1199/*
1200  malloc_get_state(void);
1201
1202  Returns the state of all malloc variables in an opaque data
1203  structure.
1204*/
1205#if __STD_C
1206Void_t*  public_gET_STATe(void);
1207#else
1208Void_t*  public_gET_STATe();
1209#endif
1210
1211/*
1212  malloc_set_state(Void_t* state);
1213
1214  Restore the state of all malloc variables from data obtained with
1215  malloc_get_state().
1216*/
1217#if __STD_C
1218int      public_sET_STATe(Void_t*);
1219#else
1220int      public_sET_STATe();
1221#endif
1222
1223#ifdef _LIBC
1224/*
1225  posix_memalign(void **memptr, size_t alignment, size_t size);
1226
1227  POSIX wrapper like memalign(), checking for validity of size.
1228*/
1229int      __posix_memalign(void **, size_t, size_t);
1230#endif
1231
1232/* mallopt tuning options */
1233
1234/*
1235  M_MXFAST is the maximum request size used for "fastbins", special bins
1236  that hold returned chunks without consolidating their spaces. This
1237  enables future requests for chunks of the same size to be handled
1238  very quickly, but can increase fragmentation, and thus increase the
1239  overall memory footprint of a program.
1240
1241  This malloc manages fastbins very conservatively yet still
1242  efficiently, so fragmentation is rarely a problem for values less
1243  than or equal to the default.  The maximum supported value of MXFAST
1244  is 80. You wouldn't want it any higher than this anyway.  Fastbins
1245  are designed especially for use with many small structs, objects or
1246  strings -- the default handles structs/objects/arrays with sizes up
1247  to 8 4byte fields, or small strings representing words, tokens,
1248  etc. Using fastbins for larger objects normally worsens
1249  fragmentation without improving speed.
1250
1251  M_MXFAST is set in REQUEST size units. It is internally used in
1252  chunksize units, which adds padding and alignment.  You can reduce
1253  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
1254  algorithm to be a closer approximation of fifo-best-fit in all cases,
1255  not just for larger requests, but will generally cause it to be
1256  slower.
1257*/
1258
1259
1260/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
1261#ifndef M_MXFAST
1262#define M_MXFAST            1
1263#endif
1264
1265#ifndef DEFAULT_MXFAST
1266#define DEFAULT_MXFAST     64
1267#endif
1268
1269
1270/*
1271  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
1272  to keep before releasing via malloc_trim in free().
1273
1274  Automatic trimming is mainly useful in long-lived programs.
1275  Because trimming via sbrk can be slow on some systems, and can
1276  sometimes be wasteful (in cases where programs immediately
1277  afterward allocate more large chunks) the value should be high
1278  enough so that your overall system performance would improve by
1279  releasing this much memory.
1280
1281  The trim threshold and the mmap control parameters (see below)
1282  can be traded off with one another. Trimming and mmapping are
1283  two different ways of releasing unused memory back to the
1284  system. Between these two, it is often possible to keep
1285  system-level demands of a long-lived program down to a bare
1286  minimum. For example, in one test suite of sessions measuring
1287  the XF86 X server on Linux, using a trim threshold of 128K and a
1288  mmap threshold of 192K led to near-minimal long term resource
1289  consumption.
1290
1291  If you are using this malloc in a long-lived program, it should
1292  pay to experiment with these values.  As a rough guide, you
1293  might set to a value close to the average size of a process
1294  (program) running on your system.  Releasing this much memory
1295  would allow such a process to run in memory.  Generally, it's
1296  worth it to tune for trimming rather tham memory mapping when a
1297  program undergoes phases where several large chunks are
1298  allocated and released in ways that can reuse each other's
1299  storage, perhaps mixed with phases where there are no such
1300  chunks at all.  And in well-behaved long-lived programs,
1301  controlling release of large blocks via trimming versus mapping
1302  is usually faster.
1303
1304  However, in most programs, these parameters serve mainly as
1305  protection against the system-level effects of carrying around
1306  massive amounts of unneeded memory. Since frequent calls to
1307  sbrk, mmap, and munmap otherwise degrade performance, the default
1308  parameters are set to relatively high values that serve only as
1309  safeguards.
1310
1311  The trim value It must be greater than page size to have any useful
1312  effect.  To disable trimming completely, you can set to
1313  (unsigned long)(-1)
1314
1315  Trim settings interact with fastbin (MXFAST) settings: Unless
1316  TRIM_FASTBINS is defined, automatic trimming never takes place upon
1317  freeing a chunk with size less than or equal to MXFAST. Trimming is
1318  instead delayed until subsequent freeing of larger chunks. However,
1319  you can still force an attempted trim by calling malloc_trim.
1320
1321  Also, trimming is not generally possible in cases where
1322  the main arena is obtained via mmap.
1323
1324  Note that the trick some people use of mallocing a huge space and
1325  then freeing it at program startup, in an attempt to reserve system
1326  memory, doesn't have the intended effect under automatic trimming,
1327  since that memory will immediately be returned to the system.
1328*/
1329
1330#define M_TRIM_THRESHOLD       -1
1331
1332#ifndef DEFAULT_TRIM_THRESHOLD
1333#define DEFAULT_TRIM_THRESHOLD (128 * 1024)
1334#endif
1335
1336/*
1337  M_TOP_PAD is the amount of extra `padding' space to allocate or
1338  retain whenever sbrk is called. It is used in two ways internally:
1339
1340  * When sbrk is called to extend the top of the arena to satisfy
1341  a new malloc request, this much padding is added to the sbrk
1342  request.
1343
1344  * When malloc_trim is called automatically from free(),
1345  it is used as the `pad' argument.
1346
1347  In both cases, the actual amount of padding is rounded
1348  so that the end of the arena is always a system page boundary.
1349
1350  The main reason for using padding is to avoid calling sbrk so
1351  often. Having even a small pad greatly reduces the likelihood
1352  that nearly every malloc request during program start-up (or
1353  after trimming) will invoke sbrk, which needlessly wastes
1354  time.
1355
1356  Automatic rounding-up to page-size units is normally sufficient
1357  to avoid measurable overhead, so the default is 0.  However, in
1358  systems where sbrk is relatively slow, it can pay to increase
1359  this value, at the expense of carrying around more memory than
1360  the program needs.
1361*/
1362
1363#define M_TOP_PAD              -2
1364
1365#ifndef DEFAULT_TOP_PAD
1366#define DEFAULT_TOP_PAD        (0)
1367#endif
1368
1369/*
1370  M_MMAP_THRESHOLD is the request size threshold for using mmap()
1371  to service a request. Requests of at least this size that cannot
1372  be allocated using already-existing space will be serviced via mmap.
1373  (If enough normal freed space already exists it is used instead.)
1374
1375  Using mmap segregates relatively large chunks of memory so that
1376  they can be individually obtained and released from the host
1377  system. A request serviced through mmap is never reused by any
1378  other request (at least not directly; the system may just so
1379  happen to remap successive requests to the same locations).
1380
1381  Segregating space in this way has the benefits that:
1382
1383   1. Mmapped space can ALWAYS be individually released back
1384      to the system, which helps keep the system level memory
1385      demands of a long-lived program low.
1386   2. Mapped memory can never become `locked' between
1387      other chunks, as can happen with normally allocated chunks, which
1388      means that even trimming via malloc_trim would not release them.
1389   3. On some systems with "holes" in address spaces, mmap can obtain
1390      memory that sbrk cannot.
1391
1392  However, it has the disadvantages that:
1393
1394   1. The space cannot be reclaimed, consolidated, and then
1395      used to service later requests, as happens with normal chunks.
1396   2. It can lead to more wastage because of mmap page alignment
1397      requirements
1398   3. It causes malloc performance to be more dependent on host
1399      system memory management support routines which may vary in
1400      implementation quality and may impose arbitrary
1401      limitations. Generally, servicing a request via normal
1402      malloc steps is faster than going through a system's mmap.
1403
1404  The advantages of mmap nearly always outweigh disadvantages for
1405  "large" chunks, but the value of "large" varies across systems.  The
1406  default is an empirically derived value that works well in most
1407  systems.
1408*/
1409
1410#define M_MMAP_THRESHOLD      -3
1411
1412#ifndef DEFAULT_MMAP_THRESHOLD
1413#define DEFAULT_MMAP_THRESHOLD (128 * 1024)
1414#endif
1415
1416/*
1417  M_MMAP_MAX is the maximum number of requests to simultaneously
1418  service using mmap. This parameter exists because
1419  some systems have a limited number of internal tables for
1420  use by mmap, and using more than a few of them may degrade
1421  performance.
1422
1423  The default is set to a value that serves only as a safeguard.
1424  Setting to 0 disables use of mmap for servicing large requests.  If
1425  HAVE_MMAP is not set, the default value is 0, and attempts to set it
1426  to non-zero values in mallopt will fail.
1427*/
1428
1429#define M_MMAP_MAX             -4
1430
1431#ifndef DEFAULT_MMAP_MAX
1432#if HAVE_MMAP
1433#define DEFAULT_MMAP_MAX       (65536)
1434#else
1435#define DEFAULT_MMAP_MAX       (0)
1436#endif
1437#endif
1438
1439#ifdef __cplusplus
1440};  /* end of extern "C" */
1441#endif
1442
1443#include <cvmx-spinlock.h>
1444#include "malloc.h"
1445#include "thread-m.h"
1446
1447#ifdef DEBUG_PRINTS
1448#define debug_printf    printf
1449#else
1450#define debug_printf(format, args...)
1451#endif
1452
1453#ifndef BOUNDED_N
1454#define BOUNDED_N(ptr, sz) (ptr)
1455#endif
1456#ifndef RETURN_ADDRESS
1457#define RETURN_ADDRESS(X_) (NULL)
1458#endif
1459
1460/* On some platforms we can compile internal, not exported functions better.
1461   Let the environment provide a macro and define it to be empty if it
1462   is not available.  */
1463#ifndef internal_function
1464# define internal_function
1465#endif
1466
1467/* Forward declarations.  */
1468struct malloc_chunk;
1469typedef struct malloc_chunk* mchunkptr;
1470
1471/* Internal routines.  */
1472
1473#if __STD_C
1474
1475static Void_t*         _int_malloc(mstate, size_t);
1476static void            _int_free(mstate, Void_t*);
1477static Void_t*         _int_realloc(mstate, Void_t*, size_t);
1478static Void_t*         _int_memalign(mstate, size_t, size_t);
1479static Void_t*         _int_valloc(mstate, size_t);
1480static Void_t*  _int_pvalloc(mstate, size_t);
1481static Void_t*  cALLOc(cvmx_arena_list_t arena_list, size_t, size_t);
1482static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**);
1483static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**);
1484static int      mTRIm(size_t);
1485static size_t   mUSABLe(Void_t*);
1486static void     mSTATs(void);
1487static int      mALLOPt(int, int);
1488static struct mallinfo mALLINFo(mstate);
1489
1490static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz);
1491static int internal_function top_check(void);
1492static void internal_function munmap_chunk(mchunkptr p);
1493#if HAVE_MREMAP
1494static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size);
1495#endif
1496
1497static Void_t*   malloc_check(size_t sz, const Void_t *caller);
1498static void      free_check(Void_t* mem, const Void_t *caller);
1499static Void_t*   realloc_check(Void_t* oldmem, size_t bytes,
1500			       const Void_t *caller);
1501static Void_t*   memalign_check(size_t alignment, size_t bytes,
1502				const Void_t *caller);
1503#ifndef NO_THREADS
1504static Void_t*   malloc_starter(size_t sz, const Void_t *caller);
1505static void      free_starter(Void_t* mem, const Void_t *caller);
1506static Void_t*   malloc_atfork(size_t sz, const Void_t *caller);
1507static void      free_atfork(Void_t* mem, const Void_t *caller);
1508#endif
1509
1510#else
1511
1512Void_t*         _int_malloc();
1513void            _int_free();
1514Void_t*         _int_realloc();
1515Void_t*         _int_memalign();
1516Void_t*         _int_valloc();
1517Void_t*         _int_pvalloc();
1518/*static Void_t*  cALLOc();*/
1519static Void_t** _int_icalloc();
1520static Void_t** _int_icomalloc();
1521static int      mTRIm();
1522static size_t   mUSABLe();
1523static void     mSTATs();
1524static int      mALLOPt();
1525static struct mallinfo mALLINFo();
1526
1527#endif
1528
1529
1530
1531
1532/* ------------- Optional versions of memcopy ---------------- */
1533
1534
1535#if USE_MEMCPY
1536
1537/*
1538  Note: memcpy is ONLY invoked with non-overlapping regions,
1539  so the (usually slower) memmove is not needed.
1540*/
1541
1542#define MALLOC_COPY(dest, src, nbytes)  memcpy(dest, src, nbytes)
1543#define MALLOC_ZERO(dest, nbytes)       memset(dest, 0,   nbytes)
1544
1545#else /* !USE_MEMCPY */
1546
1547/* Use Duff's device for good zeroing/copying performance. */
1548
1549#define MALLOC_ZERO(charp, nbytes)                                            \
1550do {                                                                          \
1551  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
1552  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
1553  long mcn;                                                                   \
1554  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
1555  switch (mctmp) {                                                            \
1556    case 0: for(;;) { *mzp++ = 0;                                             \
1557    case 7:           *mzp++ = 0;                                             \
1558    case 6:           *mzp++ = 0;                                             \
1559    case 5:           *mzp++ = 0;                                             \
1560    case 4:           *mzp++ = 0;                                             \
1561    case 3:           *mzp++ = 0;                                             \
1562    case 2:           *mzp++ = 0;                                             \
1563    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
1564  }                                                                           \
1565} while(0)
1566
1567#define MALLOC_COPY(dest,src,nbytes)                                          \
1568do {                                                                          \
1569  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
1570  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
1571  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
1572  long mcn;                                                                   \
1573  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
1574  switch (mctmp) {                                                            \
1575    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
1576    case 7:           *mcdst++ = *mcsrc++;                                    \
1577    case 6:           *mcdst++ = *mcsrc++;                                    \
1578    case 5:           *mcdst++ = *mcsrc++;                                    \
1579    case 4:           *mcdst++ = *mcsrc++;                                    \
1580    case 3:           *mcdst++ = *mcsrc++;                                    \
1581    case 2:           *mcdst++ = *mcsrc++;                                    \
1582    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
1583  }                                                                           \
1584} while(0)
1585
1586#endif
1587
1588/* ------------------ MMAP support ------------------  */
1589
1590
1591#if HAVE_MMAP
1592
1593#include <fcntl.h>
1594#ifndef LACKS_SYS_MMAN_H
1595#include <sys/mman.h>
1596#endif
1597
1598#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
1599# define MAP_ANONYMOUS MAP_ANON
1600#endif
1601#if !defined(MAP_FAILED)
1602# define MAP_FAILED ((char*)-1)
1603#endif
1604
1605#ifndef MAP_NORESERVE
1606# ifdef MAP_AUTORESRV
1607#  define MAP_NORESERVE MAP_AUTORESRV
1608# else
1609#  define MAP_NORESERVE 0
1610# endif
1611#endif
1612
1613/*
1614   Nearly all versions of mmap support MAP_ANONYMOUS,
1615   so the following is unlikely to be needed, but is
1616   supplied just in case.
1617*/
1618
1619#ifndef MAP_ANONYMOUS
1620
1621static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
1622
1623#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \
1624 (dev_zero_fd = open("/dev/zero", O_RDWR), \
1625  mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \
1626   mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
1627
1628#else
1629
1630#define MMAP(addr, size, prot, flags) \
1631 (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
1632
1633#endif
1634
1635
1636#endif /* HAVE_MMAP */
1637
1638
1639/*
1640  -----------------------  Chunk representations -----------------------
1641*/
1642
1643
1644/*
1645  This struct declaration is misleading (but accurate and necessary).
1646  It declares a "view" into memory allowing access to necessary
1647  fields at known offsets from a given base. See explanation below.
1648*/
1649struct malloc_chunk {
1650
1651  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
1652  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
1653  mstate               arena_ptr;  /* ptr to arena chunk belongs to */
1654
1655  struct malloc_chunk* fd;         /* double links -- used only if free. */
1656  struct malloc_chunk* bk;
1657};
1658
1659
1660/*
1661   malloc_chunk details:
1662
1663    (The following includes lightly edited explanations by Colin Plumb.)
1664
1665    Chunks of memory are maintained using a `boundary tag' method as
1666    described in e.g., Knuth or Standish.  (See the paper by Paul
1667    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
1668    survey of such techniques.)  Sizes of free chunks are stored both
1669    in the front of each chunk and at the end.  This makes
1670    consolidating fragmented chunks into bigger chunks very fast.  The
1671    size fields also hold bits representing whether chunks are free or
1672    in use.
1673
1674    An allocated chunk looks like this:
1675
1676
1677    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1678            |             Size of previous chunk, if allocated            | |
1679            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1680            |             Size of chunk, in bytes                         |P|
1681      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1682            |             User data starts here...                          .
1683            .                                                               .
1684            .             (malloc_usable_space() bytes)                     .
1685            .                                                               |
1686nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1687            |             Size of chunk                                     |
1688            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1689
1690
1691    Where "chunk" is the front of the chunk for the purpose of most of
1692    the malloc code, but "mem" is the pointer that is returned to the
1693    user.  "Nextchunk" is the beginning of the next contiguous chunk.
1694
1695    Chunks always begin on even word boundries, so the mem portion
1696    (which is returned to the user) is also on an even word boundary, and
1697    thus at least double-word aligned.
1698
1699    Free chunks are stored in circular doubly-linked lists, and look like this:
1700
1701    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1702            |             Size of previous chunk                            |
1703            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1704    `head:' |             Size of chunk, in bytes                         |P|
1705      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1706            |             Forward pointer to next chunk in list             |
1707            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1708            |             Back pointer to previous chunk in list            |
1709            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1710            |             Unused space (may be 0 bytes long)                .
1711            .                                                               .
1712            .                                                               |
1713nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1714    `foot:' |             Size of chunk, in bytes                           |
1715            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
1716
1717    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
1718    chunk size (which is always a multiple of two words), is an in-use
1719    bit for the *previous* chunk.  If that bit is *clear*, then the
1720    word before the current chunk size contains the previous chunk
1721    size, and can be used to find the front of the previous chunk.
1722    The very first chunk allocated always has this bit set,
1723    preventing access to non-existent (or non-owned) memory. If
1724    prev_inuse is set for any given chunk, then you CANNOT determine
1725    the size of the previous chunk, and might even get a memory
1726    addressing fault when trying to do so.
1727
1728    Note that the `foot' of the current chunk is actually represented
1729    as the prev_size of the NEXT chunk. This makes it easier to
1730    deal with alignments etc but can be very confusing when trying
1731    to extend or adapt this code.
1732
1733    The two exceptions to all this are
1734
1735     1. The special chunk `top' doesn't bother using the
1736        trailing size field since there is no next contiguous chunk
1737        that would have to index off it. After initialization, `top'
1738        is forced to always exist.  If it would become less than
1739        MINSIZE bytes long, it is replenished.
1740
1741     2. Chunks allocated via mmap, which have the second-lowest-order
1742        bit (IS_MMAPPED) set in their size fields.  Because they are
1743        allocated one-by-one, each must contain its own trailing size field.
1744
1745*/
1746
1747/*
1748  ---------- Size and alignment checks and conversions ----------
1749*/
1750
1751/* conversion from malloc headers to user pointers, and back */
1752/* Added size for pointer to make room for arena_ptr */
1753#define chunk2mem(p)   ((Void_t*)((char*)(p) + 2*SIZE_SZ + sizeof(void *)))
1754#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ - sizeof(void *)))
1755
1756/* The smallest possible chunk */
1757#define MIN_CHUNK_SIZE        (sizeof(struct malloc_chunk))
1758
1759/* The smallest size we can malloc is an aligned minimal chunk */
1760
1761#define MINSIZE  \
1762  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
1763
1764/* Check if m has acceptable alignment */
1765
1766#define aligned_OK(m)  (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0)
1767
1768
1769/*
1770   Check if a request is so large that it would wrap around zero when
1771   padded and aligned. To simplify some other code, the bound is made
1772   low enough so that adding MINSIZE will also not wrap around zero.
1773*/
1774
1775#define REQUEST_OUT_OF_RANGE(req)                                 \
1776  ((unsigned long)(req) >=                                        \
1777   (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE))
1778
1779/* pad request bytes into a usable size -- internal version */
1780
1781
1782/* prev_size field of next chunk is overwritten with data
1783** when in use.  NOTE - last SIZE_SZ of arena must be left
1784** unused for last chunk to use
1785*/
1786/* Added sizeof(void *) to make room for arena_ptr */
1787#define request2size(req)                                         \
1788  (((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
1789   MINSIZE :                                                      \
1790   ((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
1791
1792/*  Same, except also perform argument check */
1793
1794#define checked_request2size(req, sz)                             \
1795  if (REQUEST_OUT_OF_RANGE(req)) {                                \
1796    MALLOC_FAILURE_ACTION;                                        \
1797    return 0;                                                     \
1798  }                                                               \
1799  (sz) = request2size(req);
1800
1801/*
1802  --------------- Physical chunk operations ---------------
1803*/
1804
1805
1806/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
1807#define PREV_INUSE 0x1
1808
1809/* extract inuse bit of previous chunk */
1810#define prev_inuse(p)       ((p)->size & PREV_INUSE)
1811
1812
1813/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
1814#define IS_MMAPPED 0x2
1815
1816/* check for mmap()'ed chunk */
1817#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
1818
1819
1820
1821/*
1822  Bits to mask off when extracting size
1823
1824  Note: IS_MMAPPED is intentionally not masked off from size field in
1825  macros for which mmapped chunks should never be seen. This should
1826  cause helpful core dumps to occur if it is tried by accident by
1827  people extending or adapting this malloc.
1828*/
1829#define SIZE_BITS (PREV_INUSE|IS_MMAPPED)
1830
1831/* Get size, ignoring use bits */
1832#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
1833
1834
1835/* Ptr to next physical malloc_chunk. */
1836#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
1837
1838/* Ptr to previous physical malloc_chunk */
1839#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
1840
1841/* Treat space at ptr + offset as a chunk */
1842#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
1843
1844/* extract p's inuse bit */
1845#define inuse(p)\
1846((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
1847
1848/* set/clear chunk as being inuse without otherwise disturbing */
1849#define set_inuse(p)\
1850((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
1851
1852#define clear_inuse(p)\
1853((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
1854
1855
1856/* check/set/clear inuse bits in known places */
1857#define inuse_bit_at_offset(p, s)\
1858 (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
1859
1860#define set_inuse_bit_at_offset(p, s)\
1861 (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
1862
1863#define clear_inuse_bit_at_offset(p, s)\
1864 (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
1865
1866
1867/* Set size at head, without disturbing its use bit */
1868#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s)))
1869
1870/* Set size/use field */
1871#define set_head(p, s)       ((p)->size = (s))
1872
1873/* Set size at footer (only when chunk is not in use) */
1874#define set_foot(p, s)       (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
1875
1876
1877/*
1878  -------------------- Internal data structures --------------------
1879
1880   All internal state is held in an instance of malloc_state defined
1881   below. There are no other static variables, except in two optional
1882   cases:
1883   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
1884   * If HAVE_MMAP is true, but mmap doesn't support
1885     MAP_ANONYMOUS, a dummy file descriptor for mmap.
1886
1887   Beware of lots of tricks that minimize the total bookkeeping space
1888   requirements. The result is a little over 1K bytes (for 4byte
1889   pointers and size_t.)
1890*/
1891
1892/*
1893  Bins
1894
1895    An array of bin headers for free chunks. Each bin is doubly
1896    linked.  The bins are approximately proportionally (log) spaced.
1897    There are a lot of these bins (128). This may look excessive, but
1898    works very well in practice.  Most bins hold sizes that are
1899    unusual as malloc request sizes, but are more usual for fragments
1900    and consolidated sets of chunks, which is what these bins hold, so
1901    they can be found quickly.  All procedures maintain the invariant
1902    that no consolidated chunk physically borders another one, so each
1903    chunk in a list is known to be preceeded and followed by either
1904    inuse chunks or the ends of memory.
1905
1906    Chunks in bins are kept in size order, with ties going to the
1907    approximately least recently used chunk. Ordering isn't needed
1908    for the small bins, which all contain the same-sized chunks, but
1909    facilitates best-fit allocation for larger chunks. These lists
1910    are just sequential. Keeping them in order almost never requires
1911    enough traversal to warrant using fancier ordered data
1912    structures.
1913
1914    Chunks of the same size are linked with the most
1915    recently freed at the front, and allocations are taken from the
1916    back.  This results in LRU (FIFO) allocation order, which tends
1917    to give each chunk an equal opportunity to be consolidated with
1918    adjacent freed chunks, resulting in larger free chunks and less
1919    fragmentation.
1920
1921    To simplify use in double-linked lists, each bin header acts
1922    as a malloc_chunk. This avoids special-casing for headers.
1923    But to conserve space and improve locality, we allocate
1924    only the fd/bk pointers of bins, and then use repositioning tricks
1925    to treat these as the fields of a malloc_chunk*.
1926*/
1927
1928typedef struct malloc_chunk* mbinptr;
1929
1930/* addressing -- note that bin_at(0) does not exist */
1931#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1)))
1932
1933/* analog of ++bin */
1934#define next_bin(b)  ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
1935
1936/* Reminders about list directionality within bins */
1937#define first(b)     ((b)->fd)
1938#define last(b)      ((b)->bk)
1939
1940/* Take a chunk off a bin list */
1941#define unlink(P, BK, FD) {                                            \
1942  FD = P->fd;                                                          \
1943  BK = P->bk;                                                          \
1944  FD->bk = BK;                                                         \
1945  BK->fd = FD;                                                         \
1946}
1947
1948/*
1949  Indexing
1950
1951    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
1952    8 bytes apart. Larger bins are approximately logarithmically spaced:
1953
1954    64 bins of size       8
1955    32 bins of size      64
1956    16 bins of size     512
1957     8 bins of size    4096
1958     4 bins of size   32768
1959     2 bins of size  262144
1960     1 bin  of size what's left
1961
1962    There is actually a little bit of slop in the numbers in bin_index
1963    for the sake of speed. This makes no difference elsewhere.
1964
1965    The bins top out around 1MB because we expect to service large
1966    requests via mmap.
1967*/
1968
1969#define NBINS             128
1970#define NSMALLBINS         64
1971#define SMALLBIN_WIDTH      8
1972#define MIN_LARGE_SIZE    512
1973
1974#define in_smallbin_range(sz)  \
1975  ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
1976
1977#define smallbin_index(sz)     (((unsigned)(sz)) >> 3)
1978
1979#define largebin_index(sz)                                                   \
1980(((((unsigned long)(sz)) >>  6) <= 32)?  56 + (((unsigned long)(sz)) >>  6): \
1981 ((((unsigned long)(sz)) >>  9) <= 20)?  91 + (((unsigned long)(sz)) >>  9): \
1982 ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
1983 ((((unsigned long)(sz)) >> 15) <=  4)? 119 + (((unsigned long)(sz)) >> 15): \
1984 ((((unsigned long)(sz)) >> 18) <=  2)? 124 + (((unsigned long)(sz)) >> 18): \
1985                                        126)
1986
1987#define bin_index(sz) \
1988 ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
1989
1990/*
1991  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
1992  first bin that is maintained in sorted order. This must
1993  be the smallest size corresponding to a given bin.
1994
1995  Normally, this should be MIN_LARGE_SIZE. But you can weaken
1996  best fit guarantees to sometimes speed up malloc by increasing value.
1997  Doing this means that malloc may choose a chunk that is
1998  non-best-fitting by up to the width of the bin.
1999
2000  Some useful cutoff values:
2001      512 - all bins sorted
2002     2560 - leaves bins <=     64 bytes wide unsorted
2003    12288 - leaves bins <=    512 bytes wide unsorted
2004    65536 - leaves bins <=   4096 bytes wide unsorted
2005   262144 - leaves bins <=  32768 bytes wide unsorted
2006       -1 - no bins sorted (not recommended!)
2007*/
2008
2009#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
2010/* #define FIRST_SORTED_BIN_SIZE 65536 */
2011
2012/*
2013  Unsorted chunks
2014
2015    All remainders from chunk splits, as well as all returned chunks,
2016    are first placed in the "unsorted" bin. They are then placed
2017    in regular bins after malloc gives them ONE chance to be used before
2018    binning. So, basically, the unsorted_chunks list acts as a queue,
2019    with chunks being placed on it in free (and malloc_consolidate),
2020    and taken off (to be either used or placed in bins) in malloc.
2021
2022    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it
2023    does not have to be taken into account in size comparisons.
2024*/
2025
2026/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
2027#define unsorted_chunks(M)          (bin_at(M, 1))
2028
2029/*
2030  Top
2031
2032    The top-most available chunk (i.e., the one bordering the end of
2033    available memory) is treated specially. It is never included in
2034    any bin, is used only if no other chunk is available, and is
2035    released back to the system if it is very large (see
2036    M_TRIM_THRESHOLD).  Because top initially
2037    points to its own bin with initial zero size, thus forcing
2038    extension on the first malloc request, we avoid having any special
2039    code in malloc to check whether it even exists yet. But we still
2040    need to do so when getting memory from system, so we make
2041    initial_top treat the bin as a legal but unusable chunk during the
2042    interval between initialization and the first call to
2043    sYSMALLOc. (This is somewhat delicate, since it relies on
2044    the 2 preceding words to be zero during this interval as well.)
2045*/
2046
2047/* Conveniently, the unsorted bin can be used as dummy top on first call */
2048#define initial_top(M)              (unsorted_chunks(M))
2049
2050/*
2051  Binmap
2052
2053    To help compensate for the large number of bins, a one-level index
2054    structure is used for bin-by-bin searching.  `binmap' is a
2055    bitvector recording whether bins are definitely empty so they can
2056    be skipped over during during traversals.  The bits are NOT always
2057    cleared as soon as bins are empty, but instead only
2058    when they are noticed to be empty during traversal in malloc.
2059*/
2060
2061/* Conservatively use 32 bits per map word, even if on 64bit system */
2062#define BINMAPSHIFT      5
2063#define BITSPERMAP       (1U << BINMAPSHIFT)
2064#define BINMAPSIZE       (NBINS / BITSPERMAP)
2065
2066#define idx2block(i)     ((i) >> BINMAPSHIFT)
2067#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
2068
2069#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i))
2070#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
2071#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i))
2072
2073/*
2074  Fastbins
2075
2076    An array of lists holding recently freed small chunks.  Fastbins
2077    are not doubly linked.  It is faster to single-link them, and
2078    since chunks are never removed from the middles of these lists,
2079    double linking is not necessary. Also, unlike regular bins, they
2080    are not even processed in FIFO order (they use faster LIFO) since
2081    ordering doesn't much matter in the transient contexts in which
2082    fastbins are normally used.
2083
2084    Chunks in fastbins keep their inuse bit set, so they cannot
2085    be consolidated with other free chunks. malloc_consolidate
2086    releases all chunks in fastbins and consolidates them with
2087    other free chunks.
2088*/
2089
2090typedef struct malloc_chunk* mfastbinptr;
2091
2092/* offset 2 to use otherwise unindexable first 2 bins */
2093#define fastbin_index(sz)        ((int)((((unsigned int)(sz)) >> 3) - 2))
2094
2095/* The maximum fastbin request size we support */
2096#define MAX_FAST_SIZE     80
2097
2098#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1)
2099
2100/*
2101  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
2102  that triggers automatic consolidation of possibly-surrounding
2103  fastbin chunks. This is a heuristic, so the exact value should not
2104  matter too much. It is defined at half the default trim threshold as a
2105  compromise heuristic to only attempt consolidation if it is likely
2106  to lead to trimming. However, it is not dynamically tunable, since
2107  consolidation reduces fragmentation surrounding large chunks even
2108  if trimming is not used.
2109*/
2110
2111#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL)
2112
2113/*
2114  Since the lowest 2 bits in max_fast don't matter in size comparisons,
2115  they are used as flags.
2116*/
2117
2118/*
2119  FASTCHUNKS_BIT held in max_fast indicates that there are probably
2120  some fastbin chunks. It is set true on entering a chunk into any
2121  fastbin, and cleared only in malloc_consolidate.
2122
2123  The truth value is inverted so that have_fastchunks will be true
2124  upon startup (since statics are zero-filled), simplifying
2125  initialization checks.
2126*/
2127
2128#define FASTCHUNKS_BIT        (1U)
2129
2130#define have_fastchunks(M)     (((M)->max_fast &  FASTCHUNKS_BIT) == 0)
2131#define clear_fastchunks(M)    ((M)->max_fast |=  FASTCHUNKS_BIT)
2132#define set_fastchunks(M)      ((M)->max_fast &= ~FASTCHUNKS_BIT)
2133
2134/*
2135  NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous
2136  regions.  Otherwise, contiguity is exploited in merging together,
2137  when possible, results from consecutive MORECORE calls.
2138
2139  The initial value comes from MORECORE_CONTIGUOUS, but is
2140  changed dynamically if mmap is ever used as an sbrk substitute.
2141*/
2142
2143#define NONCONTIGUOUS_BIT     (2U)
2144
2145#define contiguous(M)          (((M)->max_fast &  NONCONTIGUOUS_BIT) == 0)
2146#define noncontiguous(M)       (((M)->max_fast &  NONCONTIGUOUS_BIT) != 0)
2147#define set_noncontiguous(M)   ((M)->max_fast |=  NONCONTIGUOUS_BIT)
2148#define set_contiguous(M)      ((M)->max_fast &= ~NONCONTIGUOUS_BIT)
2149
2150/*
2151   Set value of max_fast.
2152   Use impossibly small value if 0.
2153   Precondition: there are no existing fastbin chunks.
2154   Setting the value clears fastchunk bit but preserves noncontiguous bit.
2155*/
2156
2157#define set_max_fast(M, s) \
2158  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
2159  FASTCHUNKS_BIT | \
2160  ((M)->max_fast &  NONCONTIGUOUS_BIT)
2161
2162
2163/*
2164   ----------- Internal state representation and initialization -----------
2165*/
2166
2167struct malloc_state {
2168  /* Serialize access.  */
2169  mutex_t mutex;
2170
2171  /* Statistics for locking.  Only used if THREAD_STATS is defined.  */
2172  long stat_lock_direct, stat_lock_loop, stat_lock_wait;
2173  long pad0_[1]; /* try to give the mutex its own cacheline */
2174
2175  /* The maximum chunk size to be eligible for fastbin */
2176  INTERNAL_SIZE_T  max_fast;   /* low 2 bits used as flags */
2177
2178  /* Fastbins */
2179  mfastbinptr      fastbins[NFASTBINS];
2180
2181  /* Base of the topmost chunk -- not otherwise kept in a bin */
2182  mchunkptr        top;
2183
2184  /* The remainder from the most recent split of a small request */
2185  mchunkptr        last_remainder;
2186
2187  /* Normal bins packed as described above */
2188  mchunkptr        bins[NBINS * 2];
2189
2190  /* Bitmap of bins */
2191  unsigned int     binmap[BINMAPSIZE];
2192
2193  /* Linked list */
2194  struct malloc_state *next;
2195
2196  /* Memory allocated from the system in this arena.  */
2197  INTERNAL_SIZE_T system_mem;
2198  INTERNAL_SIZE_T max_system_mem;
2199};
2200
2201struct malloc_par {
2202  /* Tunable parameters */
2203  unsigned long    trim_threshold;
2204  INTERNAL_SIZE_T  top_pad;
2205  INTERNAL_SIZE_T  mmap_threshold;
2206
2207  /* Memory map support */
2208  int              n_mmaps;
2209  int              n_mmaps_max;
2210  int              max_n_mmaps;
2211
2212  /* Cache malloc_getpagesize */
2213  unsigned int     pagesize;
2214
2215  /* Statistics */
2216  INTERNAL_SIZE_T  mmapped_mem;
2217  /*INTERNAL_SIZE_T  sbrked_mem;*/
2218  /*INTERNAL_SIZE_T  max_sbrked_mem;*/
2219  INTERNAL_SIZE_T  max_mmapped_mem;
2220  INTERNAL_SIZE_T  max_total_mem; /* only kept for NO_THREADS */
2221
2222  /* First address handed out by MORECORE/sbrk.  */
2223  char*            sbrk_base;
2224};
2225
2226/* There are several instances of this struct ("arenas") in this
2227   malloc.  If you are adapting this malloc in a way that does NOT use
2228   a static or mmapped malloc_state, you MUST explicitly zero-fill it
2229   before using. This malloc relies on the property that malloc_state
2230   is initialized to all zeroes (as is true of C statics).  */
2231
2232
2233
2234/*
2235  Initialize a malloc_state struct.
2236
2237  This is called only from within malloc_consolidate, which needs
2238  be called in the same contexts anyway.  It is never called directly
2239  outside of malloc_consolidate because some optimizing compilers try
2240  to inline it at all call points, which turns out not to be an
2241  optimization at all. (Inlining it in malloc_consolidate is fine though.)
2242*/
2243
2244#if __STD_C
2245static void malloc_init_state(mstate av)
2246#else
2247static void malloc_init_state(av) mstate av;
2248#endif
2249{
2250  int     i;
2251  mbinptr bin;
2252
2253  /* Establish circular links for normal bins */
2254  for (i = 1; i < NBINS; ++i) {
2255    bin = bin_at(av,i);
2256    bin->fd = bin->bk = bin;
2257  }
2258
2259  set_noncontiguous(av);
2260
2261  set_max_fast(av, DEFAULT_MXFAST);
2262
2263  av->top            = initial_top(av);
2264}
2265
2266/*
2267   Other internal utilities operating on mstates
2268*/
2269
2270#if __STD_C
2271static Void_t*  sYSMALLOc(INTERNAL_SIZE_T, mstate);
2272static void     malloc_consolidate(mstate);
2273//static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**);
2274#else
2275static Void_t*  sYSMALLOc();
2276static void     malloc_consolidate();
2277static Void_t** iALLOc();
2278#endif
2279
2280/* ------------------- Support for multiple arenas -------------------- */
2281#include "arena.c"
2282
2283/*
2284  Debugging support
2285
2286  These routines make a number of assertions about the states
2287  of data structures that should be true at all times. If any
2288  are not true, it's very likely that a user program has somehow
2289  trashed memory. (It's also possible that there is a coding error
2290  in malloc. In which case, please report it!)
2291*/
2292
2293#if ! MALLOC_DEBUG
2294
2295#define check_chunk(A,P)
2296#define check_free_chunk(A,P)
2297#define check_inuse_chunk(A,P)
2298#define check_remalloced_chunk(A,P,N)
2299#define check_malloced_chunk(A,P,N)
2300#define check_malloc_state(A)
2301
2302#else
2303
2304#define check_chunk(A,P)              do_check_chunk(A,P)
2305#define check_free_chunk(A,P)         do_check_free_chunk(A,P)
2306#define check_inuse_chunk(A,P)        do_check_inuse_chunk(A,P)
2307#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N)
2308#define check_malloced_chunk(A,P,N)   do_check_malloced_chunk(A,P,N)
2309#define check_malloc_state(A)         do_check_malloc_state(A)
2310
2311/*
2312  Properties of all chunks
2313*/
2314
2315#if __STD_C
2316static void do_check_chunk(mstate av, mchunkptr p)
2317#else
2318static void do_check_chunk(av, p) mstate av; mchunkptr p;
2319#endif
2320{
2321  unsigned long sz = chunksize(p);
2322  /* min and max possible addresses assuming contiguous allocation */
2323  char* max_address = (char*)(av->top) + chunksize(av->top);
2324  char* min_address = max_address - av->system_mem;
2325
2326  if (!chunk_is_mmapped(p)) {
2327
2328    /* Has legal address ... */
2329    if (p != av->top) {
2330      if (contiguous(av)) {
2331        assert(((char*)p) >= min_address);
2332        assert(((char*)p + sz) <= ((char*)(av->top)));
2333      }
2334    }
2335    else {
2336      /* top size is always at least MINSIZE */
2337      assert((unsigned long)(sz) >= MINSIZE);
2338      /* top predecessor always marked inuse */
2339      assert(prev_inuse(p));
2340    }
2341
2342  }
2343  else {
2344#if HAVE_MMAP
2345    /* address is outside main heap  */
2346    if (contiguous(av) && av->top != initial_top(av)) {
2347      assert(((char*)p) < min_address || ((char*)p) > max_address);
2348    }
2349    /* chunk is page-aligned */
2350    assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0);
2351    /* mem is aligned */
2352    assert(aligned_OK(chunk2mem(p)));
2353#else
2354    /* force an appropriate assert violation if debug set */
2355    assert(!chunk_is_mmapped(p));
2356#endif
2357  }
2358}
2359
2360/*
2361  Properties of free chunks
2362*/
2363
2364#if __STD_C
2365static void do_check_free_chunk(mstate av, mchunkptr p)
2366#else
2367static void do_check_free_chunk(av, p) mstate av; mchunkptr p;
2368#endif
2369{
2370  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE);
2371  mchunkptr next = chunk_at_offset(p, sz);
2372
2373  do_check_chunk(av, p);
2374
2375  /* Chunk must claim to be free ... */
2376  assert(!inuse(p));
2377  assert (!chunk_is_mmapped(p));
2378
2379  /* Unless a special marker, must have OK fields */
2380  if ((unsigned long)(sz) >= MINSIZE)
2381  {
2382    assert((sz & MALLOC_ALIGN_MASK) == 0);
2383    assert(aligned_OK(chunk2mem(p)));
2384    /* ... matching footer field */
2385    assert(next->prev_size == sz);
2386    /* ... and is fully consolidated */
2387    assert(prev_inuse(p));
2388    assert (next == av->top || inuse(next));
2389
2390    /* ... and has minimally sane links */
2391    assert(p->fd->bk == p);
2392    assert(p->bk->fd == p);
2393  }
2394  else /* markers are always of size SIZE_SZ */
2395    assert(sz == SIZE_SZ);
2396}
2397
2398/*
2399  Properties of inuse chunks
2400*/
2401
2402#if __STD_C
2403static void do_check_inuse_chunk(mstate av, mchunkptr p)
2404#else
2405static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p;
2406#endif
2407{
2408  mchunkptr next;
2409
2410  do_check_chunk(av, p);
2411
2412  assert(av == arena_for_chunk(p));
2413  if (chunk_is_mmapped(p))
2414    return; /* mmapped chunks have no next/prev */
2415
2416  /* Check whether it claims to be in use ... */
2417  assert(inuse(p));
2418
2419  next = next_chunk(p);
2420
2421  /* ... and is surrounded by OK chunks.
2422    Since more things can be checked with free chunks than inuse ones,
2423    if an inuse chunk borders them and debug is on, it's worth doing them.
2424  */
2425  if (!prev_inuse(p))  {
2426    /* Note that we cannot even look at prev unless it is not inuse */
2427    mchunkptr prv = prev_chunk(p);
2428    assert(next_chunk(prv) == p);
2429    do_check_free_chunk(av, prv);
2430  }
2431
2432  if (next == av->top) {
2433    assert(prev_inuse(next));
2434    assert(chunksize(next) >= MINSIZE);
2435  }
2436  else if (!inuse(next))
2437    do_check_free_chunk(av, next);
2438}
2439
2440/*
2441  Properties of chunks recycled from fastbins
2442*/
2443
2444#if __STD_C
2445static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
2446#else
2447static void do_check_remalloced_chunk(av, p, s)
2448mstate av; mchunkptr p; INTERNAL_SIZE_T s;
2449#endif
2450{
2451  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE);
2452
2453  if (!chunk_is_mmapped(p)) {
2454    assert(av == arena_for_chunk(p));
2455  }
2456
2457  do_check_inuse_chunk(av, p);
2458
2459  /* Legal size ... */
2460  assert((sz & MALLOC_ALIGN_MASK) == 0);
2461  assert((unsigned long)(sz) >= MINSIZE);
2462  /* ... and alignment */
2463  assert(aligned_OK(chunk2mem(p)));
2464  /* chunk is less than MINSIZE more than request */
2465  assert((long)(sz) - (long)(s) >= 0);
2466  assert((long)(sz) - (long)(s + MINSIZE) < 0);
2467}
2468
2469/*
2470  Properties of nonrecycled chunks at the point they are malloced
2471*/
2472
2473#if __STD_C
2474static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
2475#else
2476static void do_check_malloced_chunk(av, p, s)
2477mstate av; mchunkptr p; INTERNAL_SIZE_T s;
2478#endif
2479{
2480  /* same as recycled case ... */
2481  do_check_remalloced_chunk(av, p, s);
2482
2483  /*
2484    ... plus,  must obey implementation invariant that prev_inuse is
2485    always true of any allocated chunk; i.e., that each allocated
2486    chunk borders either a previously allocated and still in-use
2487    chunk, or the base of its memory arena. This is ensured
2488    by making all allocations from the the `lowest' part of any found
2489    chunk.  This does not necessarily hold however for chunks
2490    recycled via fastbins.
2491  */
2492
2493  assert(prev_inuse(p));
2494}
2495
2496
2497/*
2498  Properties of malloc_state.
2499
2500  This may be useful for debugging malloc, as well as detecting user
2501  programmer errors that somehow write into malloc_state.
2502
2503  If you are extending or experimenting with this malloc, you can
2504  probably figure out how to hack this routine to print out or
2505  display chunk addresses, sizes, bins, and other instrumentation.
2506*/
2507
2508static void do_check_malloc_state(mstate av)
2509{
2510  int i;
2511  mchunkptr p;
2512  mchunkptr q;
2513  mbinptr b;
2514  unsigned int binbit;
2515  int empty;
2516  unsigned int idx;
2517  INTERNAL_SIZE_T size;
2518  unsigned long total = 0;
2519  int max_fast_bin;
2520
2521  /* internal size_t must be no wider than pointer type */
2522  assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*));
2523
2524  /* alignment is a power of 2 */
2525  assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0);
2526
2527  /* cannot run remaining checks until fully initialized */
2528  if (av->top == 0 || av->top == initial_top(av))
2529    return;
2530
2531
2532  /* properties of fastbins */
2533
2534  /* max_fast is in allowed range */
2535  assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE));
2536
2537  max_fast_bin = fastbin_index(av->max_fast);
2538
2539  for (i = 0; i < NFASTBINS; ++i) {
2540    p = av->fastbins[i];
2541
2542    /* all bins past max_fast are empty */
2543    if (i > max_fast_bin)
2544      assert(p == 0);
2545
2546    while (p != 0) {
2547      /* each chunk claims to be inuse */
2548      do_check_inuse_chunk(av, p);
2549      total += chunksize(p);
2550      /* chunk belongs in this bin */
2551      assert(fastbin_index(chunksize(p)) == i);
2552      p = p->fd;
2553    }
2554  }
2555
2556  if (total != 0)
2557    assert(have_fastchunks(av));
2558  else if (!have_fastchunks(av))
2559    assert(total == 0);
2560
2561  /* check normal bins */
2562  for (i = 1; i < NBINS; ++i) {
2563    b = bin_at(av,i);
2564
2565    /* binmap is accurate (except for bin 1 == unsorted_chunks) */
2566    if (i >= 2) {
2567      binbit = get_binmap(av,i);
2568      empty = last(b) == b;
2569      if (!binbit)
2570        assert(empty);
2571      else if (!empty)
2572        assert(binbit);
2573    }
2574
2575    for (p = last(b); p != b; p = p->bk) {
2576      /* each chunk claims to be free */
2577      do_check_free_chunk(av, p);
2578      size = chunksize(p);
2579      total += size;
2580      if (i >= 2) {
2581        /* chunk belongs in bin */
2582        idx = bin_index(size);
2583        assert(idx == (unsigned int)i);
2584        /* lists are sorted */
2585        if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) {
2586	  assert(p->bk == b ||
2587		 (unsigned long)chunksize(p->bk) >=
2588		 (unsigned long)chunksize(p));
2589	}
2590      }
2591      /* chunk is followed by a legal chain of inuse chunks */
2592      for (q = next_chunk(p);
2593           (q != av->top && inuse(q) &&
2594             (unsigned long)(chunksize(q)) >= MINSIZE);
2595           q = next_chunk(q))
2596        do_check_inuse_chunk(av, q);
2597    }
2598  }
2599
2600  /* top chunk is OK */
2601  check_chunk(av, av->top);
2602
2603  /* sanity checks for statistics */
2604
2605
2606  assert((unsigned long)(av->system_mem) <=
2607         (unsigned long)(av->max_system_mem));
2608
2609
2610}
2611#endif
2612
2613
2614
2615/* ----------- Routines dealing with system allocation -------------- */
2616
2617/* No system allocation routines supported */
2618
2619
2620/*------------------------ Public wrappers. --------------------------------*/
2621
2622
2623
2624#undef DEBUG_MALLOC
2625Void_t*
2626public_mALLOc(cvmx_arena_list_t arena_list, size_t bytes)
2627{
2628  mstate ar_ptr, orig_ar_ptr;
2629  Void_t *victim = NULL;
2630  static mstate debug_prev_ar;  // debug only!
2631#ifdef DEBUG_MALLOC
2632  int arena_cnt=0;
2633#endif
2634
2635  ar_ptr = arena_list;
2636
2637  if (!ar_ptr)
2638  {
2639     return(NULL);
2640  }
2641
2642  if (debug_prev_ar != ar_ptr)
2643  {
2644      debug_printf("New arena: %p\n", ar_ptr);
2645#ifdef CVMX_SPINLOCK_DEBUG
2646      cvmx_dprintf("lock wait count for arena: %p is %ld\n", ar_ptr, ar_ptr->mutex.wait_cnt);
2647#endif
2648      debug_prev_ar = ar_ptr;
2649  }
2650  orig_ar_ptr = ar_ptr;
2651
2652  // try to get an arena without contention
2653  do
2654  {
2655#ifdef DEBUG_MALLOC
2656  arena_cnt++;
2657#endif
2658      if (!mutex_trylock(&ar_ptr->mutex))
2659      {
2660          // we locked it
2661          victim = _int_malloc(ar_ptr, bytes);
2662          (void)mutex_unlock(&ar_ptr->mutex);
2663          if(victim)
2664          {
2665              break;
2666          }
2667      }
2668      ar_ptr = ar_ptr->next;
2669  } while (ar_ptr != orig_ar_ptr);
2670
2671  // we couldn't get the memory without contention, so try all
2672  // arenas.  SLOW!
2673  if (!victim)
2674  {
2675      ar_ptr = orig_ar_ptr;
2676      do
2677      {
2678#ifdef DEBUG_MALLOC
2679  arena_cnt++;
2680#endif
2681          mutex_lock(&ar_ptr->mutex);
2682          victim = _int_malloc(ar_ptr, bytes);
2683          (void)mutex_unlock(&ar_ptr->mutex);
2684          if(victim)
2685          {
2686              break;
2687          }
2688          ar_ptr = ar_ptr->next;
2689      } while (ar_ptr != orig_ar_ptr);
2690  }
2691
2692
2693  assert(!victim || chunk_is_mmapped(mem2chunk(victim)) ||
2694	 ar_ptr == arena_for_chunk(mem2chunk(victim)));
2695
2696#ifdef DEBUG_MALLOC
2697  if (!victim)
2698  {
2699     cvmx_dprintf("Malloc failed: size: %ld, arena_cnt: %d\n", bytes, arena_cnt);
2700  }
2701#endif
2702
2703  debug_printf("cvmx_malloc(%ld) = %p\n", bytes, victim);
2704
2705  // remember which arena we last used.....
2706  tsd_setspecific(arena_key, (Void_t *)ar_ptr);
2707  return victim;
2708}
2709
2710
2711
2712void
2713public_fREe(Void_t* mem)
2714{
2715  mstate ar_ptr;
2716  mchunkptr p;                          /* chunk corresponding to mem */
2717
2718  debug_printf("cvmx_free(%p)\n", mem);
2719
2720
2721  if (mem == 0)                              /* free(0) has no effect */
2722    return;
2723
2724  p = mem2chunk(mem);
2725
2726
2727  ar_ptr = arena_for_chunk(p);
2728  assert(ar_ptr);
2729#if THREAD_STATS
2730  if(!mutex_trylock(&ar_ptr->mutex))
2731    ++(ar_ptr->stat_lock_direct);
2732  else {
2733    (void)mutex_lock(&ar_ptr->mutex);
2734    ++(ar_ptr->stat_lock_wait);
2735  }
2736#else
2737  (void)mutex_lock(&ar_ptr->mutex);
2738#endif
2739  _int_free(ar_ptr, mem);
2740  (void)mutex_unlock(&ar_ptr->mutex);
2741}
2742
2743Void_t*
2744public_rEALLOc(cvmx_arena_list_t arena_list, Void_t* oldmem, size_t bytes)
2745{
2746  mstate ar_ptr;
2747  INTERNAL_SIZE_T    nb;      /* padded request size */
2748
2749  mchunkptr oldp;             /* chunk corresponding to oldmem */
2750  INTERNAL_SIZE_T    oldsize; /* its size */
2751
2752  Void_t* newp;             /* chunk to return */
2753
2754
2755#if REALLOC_ZERO_BYTES_FREES
2756  if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; }
2757#endif
2758
2759  /* realloc of null is supposed to be same as malloc */
2760  if (oldmem == 0) return public_mALLOc(arena_list, bytes);
2761
2762  oldp    = mem2chunk(oldmem);
2763  oldsize = chunksize(oldp);
2764
2765  checked_request2size(bytes, nb);
2766
2767
2768  ar_ptr = arena_for_chunk(oldp);
2769  (void)mutex_lock(&ar_ptr->mutex);
2770
2771
2772  newp = _int_realloc(ar_ptr, oldmem, bytes);
2773
2774  (void)mutex_unlock(&ar_ptr->mutex);
2775  assert(!newp || chunk_is_mmapped(mem2chunk(newp)) ||
2776	 ar_ptr == arena_for_chunk(mem2chunk(newp)));
2777  return newp;
2778}
2779
2780#undef DEBUG_MEMALIGN
2781Void_t*
2782public_mEMALIGn(cvmx_arena_list_t arena_list, size_t alignment, size_t bytes)
2783{
2784  mstate ar_ptr, orig_ar_ptr;
2785  Void_t *p = NULL;
2786#ifdef DEBUG_MEMALIGN
2787  int arena_cnt=0;
2788#endif
2789
2790
2791  /* If need less alignment than we give anyway, just relay to malloc */
2792  if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(arena_list, bytes);
2793
2794  /* Otherwise, ensure that it is at least a minimum chunk size */
2795  if (alignment <  MINSIZE) alignment = MINSIZE;
2796
2797
2798  ar_ptr = arena_list;
2799
2800  if (!ar_ptr)
2801  {
2802     return(NULL);
2803  }
2804
2805  orig_ar_ptr = ar_ptr;
2806
2807
2808  // try to get an arena without contention
2809  do
2810  {
2811
2812#ifdef DEBUG_MEMALIGN
2813   arena_cnt++;
2814#endif
2815      if (!mutex_trylock(&ar_ptr->mutex))
2816      {
2817          // we locked it
2818          p = _int_memalign(ar_ptr, alignment, bytes);
2819          (void)mutex_unlock(&ar_ptr->mutex);
2820          if(p)
2821          {
2822              break;
2823          }
2824      }
2825      ar_ptr = ar_ptr->next;
2826  } while (ar_ptr != orig_ar_ptr);
2827
2828
2829  // we couldn't get the memory without contention, so try all
2830  // arenas.  SLOW!
2831  if (!p)
2832  {
2833#ifdef DEBUG_MEMALIGN
2834   arena_cnt++;
2835#endif
2836      ar_ptr = orig_ar_ptr;
2837      do
2838      {
2839          mutex_lock(&ar_ptr->mutex);
2840          p = _int_memalign(ar_ptr, alignment, bytes);
2841          (void)mutex_unlock(&ar_ptr->mutex);
2842          if(p)
2843          {
2844              break;
2845          }
2846          ar_ptr = ar_ptr->next;
2847      } while (ar_ptr != orig_ar_ptr);
2848  }
2849
2850
2851  if (p)
2852  {
2853     assert(ar_ptr == arena_for_chunk(mem2chunk(p)));
2854  }
2855  else
2856  {
2857#ifdef DEBUG_MEMALIGN
2858     cvmx_dprintf("Memalign failed: align: 0x%x, size: %ld, arena_cnt: %ld\n", alignment, bytes, arena_cnt);
2859#endif
2860  }
2861
2862  assert(!p || ar_ptr == arena_for_chunk(mem2chunk(p)));
2863  return p;
2864}
2865
2866
2867
2868Void_t*
2869public_cALLOc(cvmx_arena_list_t arena_list, size_t n, size_t elem_size)
2870{
2871  mstate av;
2872  mchunkptr oldtop, p;
2873  INTERNAL_SIZE_T sz, csz, oldtopsize;
2874  Void_t* mem;
2875  unsigned long clearsize;
2876  unsigned long nclears;
2877  INTERNAL_SIZE_T* d;
2878
2879
2880  /* FIXME: check for overflow on multiplication.  */
2881  sz = n * elem_size;
2882
2883  mem = public_mALLOc(arena_list, sz);
2884  if (mem)
2885  {
2886     memset(mem, 0, sz);
2887  }
2888
2889  return mem;
2890}
2891
2892
2893#ifndef _LIBC
2894
2895void
2896public_cFREe(Void_t* m)
2897{
2898  public_fREe(m);
2899}
2900
2901#endif /* _LIBC */
2902
2903/*
2904  ------------------------------ malloc ------------------------------
2905*/
2906
2907static Void_t*
2908_int_malloc(mstate av, size_t bytes)
2909{
2910  INTERNAL_SIZE_T nb;               /* normalized request size */
2911  unsigned int    idx;              /* associated bin index */
2912  mbinptr         bin;              /* associated bin */
2913  mfastbinptr*    fb;               /* associated fastbin */
2914
2915  mchunkptr       victim;           /* inspected/selected chunk */
2916  INTERNAL_SIZE_T size;             /* its size */
2917  int             victim_index;     /* its bin index */
2918
2919  mchunkptr       remainder;        /* remainder from a split */
2920  unsigned long   remainder_size;   /* its size */
2921
2922  unsigned int    block;            /* bit map traverser */
2923  unsigned int    bit;              /* bit map traverser */
2924  unsigned int    map;              /* current word of binmap */
2925
2926  mchunkptr       fwd;              /* misc temp for linking */
2927  mchunkptr       bck;              /* misc temp for linking */
2928
2929  /*
2930    Convert request size to internal form by adding SIZE_SZ bytes
2931    overhead plus possibly more to obtain necessary alignment and/or
2932    to obtain a size of at least MINSIZE, the smallest allocatable
2933    size. Also, checked_request2size traps (returning 0) request sizes
2934    that are so large that they wrap around zero when padded and
2935    aligned.
2936  */
2937
2938
2939  checked_request2size(bytes, nb);
2940
2941  /*
2942    If the size qualifies as a fastbin, first check corresponding bin.
2943    This code is safe to execute even if av is not yet initialized, so we
2944    can try it without checking, which saves some time on this fast path.
2945  */
2946
2947  if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) {
2948    fb = &(av->fastbins[(fastbin_index(nb))]);
2949    if ( (victim = *fb) != 0) {
2950      *fb = victim->fd;
2951      check_remalloced_chunk(av, victim, nb);
2952      set_arena_for_chunk(victim, av);
2953      return chunk2mem(victim);
2954    }
2955  }
2956
2957  /*
2958    If a small request, check regular bin.  Since these "smallbins"
2959    hold one size each, no searching within bins is necessary.
2960    (For a large request, we need to wait until unsorted chunks are
2961    processed to find best fit. But for small ones, fits are exact
2962    anyway, so we can check now, which is faster.)
2963  */
2964
2965  if (in_smallbin_range(nb)) {
2966    idx = smallbin_index(nb);
2967    bin = bin_at(av,idx);
2968
2969    if ( (victim = last(bin)) != bin) {
2970      if (victim == 0) /* initialization check */
2971        malloc_consolidate(av);
2972      else {
2973        bck = victim->bk;
2974        set_inuse_bit_at_offset(victim, nb);
2975        bin->bk = bck;
2976        bck->fd = bin;
2977
2978        set_arena_for_chunk(victim, av);
2979        check_malloced_chunk(av, victim, nb);
2980        return chunk2mem(victim);
2981      }
2982    }
2983  }
2984
2985  /*
2986     If this is a large request, consolidate fastbins before continuing.
2987     While it might look excessive to kill all fastbins before
2988     even seeing if there is space available, this avoids
2989     fragmentation problems normally associated with fastbins.
2990     Also, in practice, programs tend to have runs of either small or
2991     large requests, but less often mixtures, so consolidation is not
2992     invoked all that often in most programs. And the programs that
2993     it is called frequently in otherwise tend to fragment.
2994  */
2995
2996  else {
2997    idx = largebin_index(nb);
2998    if (have_fastchunks(av))
2999      malloc_consolidate(av);
3000  }
3001
3002  /*
3003    Process recently freed or remaindered chunks, taking one only if
3004    it is exact fit, or, if this a small request, the chunk is remainder from
3005    the most recent non-exact fit.  Place other traversed chunks in
3006    bins.  Note that this step is the only place in any routine where
3007    chunks are placed in bins.
3008
3009    The outer loop here is needed because we might not realize until
3010    near the end of malloc that we should have consolidated, so must
3011    do so and retry. This happens at most once, and only when we would
3012    otherwise need to expand memory to service a "small" request.
3013  */
3014
3015  for(;;) {
3016
3017    while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
3018      bck = victim->bk;
3019      size = chunksize(victim);
3020
3021      /*
3022         If a small request, try to use last remainder if it is the
3023         only chunk in unsorted bin.  This helps promote locality for
3024         runs of consecutive small requests. This is the only
3025         exception to best-fit, and applies only when there is
3026         no exact fit for a small chunk.
3027      */
3028
3029      if (in_smallbin_range(nb) &&
3030          bck == unsorted_chunks(av) &&
3031          victim == av->last_remainder &&
3032          (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) {
3033
3034        /* split and reattach remainder */
3035        remainder_size = size - nb;
3036        remainder = chunk_at_offset(victim, nb);
3037        unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
3038        av->last_remainder = remainder;
3039        remainder->bk = remainder->fd = unsorted_chunks(av);
3040
3041        set_head(victim, nb | PREV_INUSE);
3042        set_head(remainder, remainder_size | PREV_INUSE);
3043        set_foot(remainder, remainder_size);
3044
3045        set_arena_for_chunk(victim, av);
3046        check_malloced_chunk(av, victim, nb);
3047        return chunk2mem(victim);
3048      }
3049
3050      /* remove from unsorted list */
3051      unsorted_chunks(av)->bk = bck;
3052      bck->fd = unsorted_chunks(av);
3053
3054      /* Take now instead of binning if exact fit */
3055
3056      if (size == nb) {
3057        set_inuse_bit_at_offset(victim, size);
3058        set_arena_for_chunk(victim, av);
3059        check_malloced_chunk(av, victim, nb);
3060        return chunk2mem(victim);
3061      }
3062
3063      /* place chunk in bin */
3064
3065      if (in_smallbin_range(size)) {
3066        victim_index = smallbin_index(size);
3067        bck = bin_at(av, victim_index);
3068        fwd = bck->fd;
3069      }
3070      else {
3071        victim_index = largebin_index(size);
3072        bck = bin_at(av, victim_index);
3073        fwd = bck->fd;
3074
3075        if (fwd != bck) {
3076          /* if smaller than smallest, place first */
3077          if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) {
3078            fwd = bck;
3079            bck = bck->bk;
3080          }
3081          else if ((unsigned long)(size) >=
3082                   (unsigned long)(FIRST_SORTED_BIN_SIZE)) {
3083
3084            /* maintain large bins in sorted order */
3085            size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
3086            while ((unsigned long)(size) < (unsigned long)(fwd->size)) {
3087              fwd = fwd->fd;
3088	    }
3089            bck = fwd->bk;
3090          }
3091        }
3092      }
3093
3094      mark_bin(av, victim_index);
3095      victim->bk = bck;
3096      victim->fd = fwd;
3097      fwd->bk = victim;
3098      bck->fd = victim;
3099    }
3100
3101    /*
3102      If a large request, scan through the chunks of current bin in
3103      sorted order to find smallest that fits.  This is the only step
3104      where an unbounded number of chunks might be scanned without doing
3105      anything useful with them. However the lists tend to be short.
3106    */
3107
3108    if (!in_smallbin_range(nb)) {
3109      bin = bin_at(av, idx);
3110
3111      for (victim = last(bin); victim != bin; victim = victim->bk) {
3112	size = chunksize(victim);
3113
3114	if ((unsigned long)(size) >= (unsigned long)(nb)) {
3115	  remainder_size = size - nb;
3116	  unlink(victim, bck, fwd);
3117
3118	  /* Exhaust */
3119	  if (remainder_size < MINSIZE)  {
3120	    set_inuse_bit_at_offset(victim, size);
3121        set_arena_for_chunk(victim, av);
3122	    check_malloced_chunk(av, victim, nb);
3123	    return chunk2mem(victim);
3124	  }
3125	  /* Split */
3126	  else {
3127	    remainder = chunk_at_offset(victim, nb);
3128	    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
3129	    remainder->bk = remainder->fd = unsorted_chunks(av);
3130	    set_head(victim, nb | PREV_INUSE);
3131	    set_head(remainder, remainder_size | PREV_INUSE);
3132	    set_foot(remainder, remainder_size);
3133        set_arena_for_chunk(victim, av);
3134	    check_malloced_chunk(av, victim, nb);
3135	    return chunk2mem(victim);
3136	  }
3137	}
3138      }
3139    }
3140
3141    /*
3142      Search for a chunk by scanning bins, starting with next largest
3143      bin. This search is strictly by best-fit; i.e., the smallest
3144      (with ties going to approximately the least recently used) chunk
3145      that fits is selected.
3146
3147      The bitmap avoids needing to check that most blocks are nonempty.
3148      The particular case of skipping all bins during warm-up phases
3149      when no chunks have been returned yet is faster than it might look.
3150    */
3151
3152    ++idx;
3153    bin = bin_at(av,idx);
3154    block = idx2block(idx);
3155    map = av->binmap[block];
3156    bit = idx2bit(idx);
3157
3158    for (;;) {
3159
3160      /* Skip rest of block if there are no more set bits in this block.  */
3161      if (bit > map || bit == 0) {
3162        do {
3163          if (++block >= BINMAPSIZE)  /* out of bins */
3164            goto use_top;
3165        } while ( (map = av->binmap[block]) == 0);
3166
3167        bin = bin_at(av, (block << BINMAPSHIFT));
3168        bit = 1;
3169      }
3170
3171      /* Advance to bin with set bit. There must be one. */
3172      while ((bit & map) == 0) {
3173        bin = next_bin(bin);
3174        bit <<= 1;
3175        assert(bit != 0);
3176      }
3177
3178      /* Inspect the bin. It is likely to be non-empty */
3179      victim = last(bin);
3180
3181      /*  If a false alarm (empty bin), clear the bit. */
3182      if (victim == bin) {
3183        av->binmap[block] = map &= ~bit; /* Write through */
3184        bin = next_bin(bin);
3185        bit <<= 1;
3186      }
3187
3188      else {
3189        size = chunksize(victim);
3190
3191        /*  We know the first chunk in this bin is big enough to use. */
3192        assert((unsigned long)(size) >= (unsigned long)(nb));
3193
3194        remainder_size = size - nb;
3195
3196        /* unlink */
3197        bck = victim->bk;
3198        bin->bk = bck;
3199        bck->fd = bin;
3200
3201        /* Exhaust */
3202        if (remainder_size < MINSIZE) {
3203          set_inuse_bit_at_offset(victim, size);
3204          set_arena_for_chunk(victim, av);
3205          check_malloced_chunk(av, victim, nb);
3206          return chunk2mem(victim);
3207        }
3208
3209        /* Split */
3210        else {
3211          remainder = chunk_at_offset(victim, nb);
3212
3213          unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
3214          remainder->bk = remainder->fd = unsorted_chunks(av);
3215          /* advertise as last remainder */
3216          if (in_smallbin_range(nb))
3217            av->last_remainder = remainder;
3218
3219          set_head(victim, nb | PREV_INUSE);
3220          set_head(remainder, remainder_size | PREV_INUSE);
3221          set_foot(remainder, remainder_size);
3222          set_arena_for_chunk(victim, av);
3223          check_malloced_chunk(av, victim, nb);
3224          return chunk2mem(victim);
3225        }
3226      }
3227    }
3228
3229  use_top:
3230    /*
3231      If large enough, split off the chunk bordering the end of memory
3232      (held in av->top). Note that this is in accord with the best-fit
3233      search rule.  In effect, av->top is treated as larger (and thus
3234      less well fitting) than any other available chunk since it can
3235      be extended to be as large as necessary (up to system
3236      limitations).
3237
3238      We require that av->top always exists (i.e., has size >=
3239      MINSIZE) after initialization, so if it would otherwise be
3240      exhuasted by current request, it is replenished. (The main
3241      reason for ensuring it exists is that we may need MINSIZE space
3242      to put in fenceposts in sysmalloc.)
3243    */
3244
3245    victim = av->top;
3246    size = chunksize(victim);
3247
3248    if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
3249      remainder_size = size - nb;
3250      remainder = chunk_at_offset(victim, nb);
3251      av->top = remainder;
3252      set_head(victim, nb | PREV_INUSE);
3253      set_head(remainder, remainder_size | PREV_INUSE);
3254
3255      set_arena_for_chunk(victim, av);
3256      check_malloced_chunk(av, victim, nb);
3257      return chunk2mem(victim);
3258    }
3259
3260    /*
3261      If there is space available in fastbins, consolidate and retry,
3262      to possibly avoid expanding memory. This can occur only if nb is
3263      in smallbin range so we didn't consolidate upon entry.
3264    */
3265
3266    else if (have_fastchunks(av)) {
3267      assert(in_smallbin_range(nb));
3268      malloc_consolidate(av);
3269      idx = smallbin_index(nb); /* restore original bin index */
3270    }
3271
3272    /*
3273       Otherwise, relay to handle system-dependent cases
3274    */
3275    else
3276      return(NULL); // sysmalloc not supported
3277  }
3278}
3279
3280/*
3281  ------------------------------ free ------------------------------
3282*/
3283
3284static void
3285_int_free(mstate av, Void_t* mem)
3286{
3287  mchunkptr       p;           /* chunk corresponding to mem */
3288  INTERNAL_SIZE_T size;        /* its size */
3289  mfastbinptr*    fb;          /* associated fastbin */
3290  mchunkptr       nextchunk;   /* next contiguous chunk */
3291  INTERNAL_SIZE_T nextsize;    /* its size */
3292  int             nextinuse;   /* true if nextchunk is used */
3293  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
3294  mchunkptr       bck;         /* misc temp for linking */
3295  mchunkptr       fwd;         /* misc temp for linking */
3296
3297
3298  /* free(0) has no effect */
3299  if (mem != 0) {
3300    p = mem2chunk(mem);
3301    size = chunksize(p);
3302
3303    check_inuse_chunk(av, p);
3304
3305    /*
3306      If eligible, place chunk on a fastbin so it can be found
3307      and used quickly in malloc.
3308    */
3309
3310    if ((unsigned long)(size) <= (unsigned long)(av->max_fast)
3311
3312#if TRIM_FASTBINS
3313        /*
3314           If TRIM_FASTBINS set, don't place chunks
3315           bordering top into fastbins
3316        */
3317        && (chunk_at_offset(p, size) != av->top)
3318#endif
3319        ) {
3320
3321      set_fastchunks(av);
3322      fb = &(av->fastbins[fastbin_index(size)]);
3323      p->fd = *fb;
3324      *fb = p;
3325    }
3326
3327    /*
3328       Consolidate other non-mmapped chunks as they arrive.
3329    */
3330
3331    else if (!chunk_is_mmapped(p)) {
3332      nextchunk = chunk_at_offset(p, size);
3333      nextsize = chunksize(nextchunk);
3334      assert(nextsize > 0);
3335
3336      /* consolidate backward */
3337      if (!prev_inuse(p)) {
3338        prevsize = p->prev_size;
3339        size += prevsize;
3340        p = chunk_at_offset(p, -((long) prevsize));
3341        unlink(p, bck, fwd);
3342      }
3343
3344      if (nextchunk != av->top) {
3345        /* get and clear inuse bit */
3346        nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
3347
3348        /* consolidate forward */
3349        if (!nextinuse) {
3350          unlink(nextchunk, bck, fwd);
3351          size += nextsize;
3352        } else
3353	  clear_inuse_bit_at_offset(nextchunk, 0);
3354
3355        /*
3356          Place the chunk in unsorted chunk list. Chunks are
3357          not placed into regular bins until after they have
3358          been given one chance to be used in malloc.
3359        */
3360
3361        bck = unsorted_chunks(av);
3362        fwd = bck->fd;
3363        p->bk = bck;
3364        p->fd = fwd;
3365        bck->fd = p;
3366        fwd->bk = p;
3367
3368        set_head(p, size | PREV_INUSE);
3369        set_foot(p, size);
3370
3371        check_free_chunk(av, p);
3372      }
3373
3374      /*
3375         If the chunk borders the current high end of memory,
3376         consolidate into top
3377      */
3378
3379      else {
3380        size += nextsize;
3381        set_head(p, size | PREV_INUSE);
3382        av->top = p;
3383        check_chunk(av, p);
3384      }
3385
3386      /*
3387        If freeing a large space, consolidate possibly-surrounding
3388        chunks. Then, if the total unused topmost memory exceeds trim
3389        threshold, ask malloc_trim to reduce top.
3390
3391        Unless max_fast is 0, we don't know if there are fastbins
3392        bordering top, so we cannot tell for sure whether threshold
3393        has been reached unless fastbins are consolidated.  But we
3394        don't want to consolidate on each free.  As a compromise,
3395        consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
3396        is reached.
3397      */
3398
3399      if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
3400        if (have_fastchunks(av))
3401          malloc_consolidate(av);
3402      }
3403    }
3404  }
3405}
3406
3407/*
3408  ------------------------- malloc_consolidate -------------------------
3409
3410  malloc_consolidate is a specialized version of free() that tears
3411  down chunks held in fastbins.  Free itself cannot be used for this
3412  purpose since, among other things, it might place chunks back onto
3413  fastbins.  So, instead, we need to use a minor variant of the same
3414  code.
3415
3416  Also, because this routine needs to be called the first time through
3417  malloc anyway, it turns out to be the perfect place to trigger
3418  initialization code.
3419*/
3420
3421#if __STD_C
3422static void malloc_consolidate(mstate av)
3423#else
3424static void malloc_consolidate(av) mstate av;
3425#endif
3426{
3427  mfastbinptr*    fb;                 /* current fastbin being consolidated */
3428  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
3429  mchunkptr       p;                  /* current chunk being consolidated */
3430  mchunkptr       nextp;              /* next chunk to consolidate */
3431  mchunkptr       unsorted_bin;       /* bin header */
3432  mchunkptr       first_unsorted;     /* chunk to link to */
3433
3434  /* These have same use as in free() */
3435  mchunkptr       nextchunk;
3436  INTERNAL_SIZE_T size;
3437  INTERNAL_SIZE_T nextsize;
3438  INTERNAL_SIZE_T prevsize;
3439  int             nextinuse;
3440  mchunkptr       bck;
3441  mchunkptr       fwd;
3442
3443  /*
3444    If max_fast is 0, we know that av hasn't
3445    yet been initialized, in which case do so below
3446  */
3447
3448  if (av->max_fast != 0) {
3449    clear_fastchunks(av);
3450
3451    unsorted_bin = unsorted_chunks(av);
3452
3453    /*
3454      Remove each chunk from fast bin and consolidate it, placing it
3455      then in unsorted bin. Among other reasons for doing this,
3456      placing in unsorted bin avoids needing to calculate actual bins
3457      until malloc is sure that chunks aren't immediately going to be
3458      reused anyway.
3459    */
3460
3461    maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
3462    fb = &(av->fastbins[0]);
3463    do {
3464      if ( (p = *fb) != 0) {
3465        *fb = 0;
3466
3467        do {
3468          check_inuse_chunk(av, p);
3469          nextp = p->fd;
3470
3471          /* Slightly streamlined version of consolidation code in free() */
3472          size = p->size & ~(PREV_INUSE);
3473          nextchunk = chunk_at_offset(p, size);
3474          nextsize = chunksize(nextchunk);
3475
3476          if (!prev_inuse(p)) {
3477            prevsize = p->prev_size;
3478            size += prevsize;
3479            p = chunk_at_offset(p, -((long) prevsize));
3480            unlink(p, bck, fwd);
3481          }
3482
3483          if (nextchunk != av->top) {
3484            nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
3485
3486            if (!nextinuse) {
3487              size += nextsize;
3488              unlink(nextchunk, bck, fwd);
3489            } else
3490	      clear_inuse_bit_at_offset(nextchunk, 0);
3491
3492            first_unsorted = unsorted_bin->fd;
3493            unsorted_bin->fd = p;
3494            first_unsorted->bk = p;
3495
3496            set_head(p, size | PREV_INUSE);
3497            p->bk = unsorted_bin;
3498            p->fd = first_unsorted;
3499            set_foot(p, size);
3500          }
3501
3502          else {
3503            size += nextsize;
3504            set_head(p, size | PREV_INUSE);
3505            av->top = p;
3506          }
3507
3508        } while ( (p = nextp) != 0);
3509
3510      }
3511    } while (fb++ != maxfb);
3512  }
3513  else {
3514    malloc_init_state(av);
3515    check_malloc_state(av);
3516  }
3517}
3518
3519/*
3520  ------------------------------ realloc ------------------------------
3521*/
3522
3523static Void_t*
3524_int_realloc(mstate av, Void_t* oldmem, size_t bytes)
3525{
3526  INTERNAL_SIZE_T  nb;              /* padded request size */
3527
3528  mchunkptr        oldp;            /* chunk corresponding to oldmem */
3529  INTERNAL_SIZE_T  oldsize;         /* its size */
3530
3531  mchunkptr        newp;            /* chunk to return */
3532  INTERNAL_SIZE_T  newsize;         /* its size */
3533  Void_t*          newmem;          /* corresponding user mem */
3534
3535  mchunkptr        next;            /* next contiguous chunk after oldp */
3536
3537  mchunkptr        remainder;       /* extra space at end of newp */
3538  unsigned long    remainder_size;  /* its size */
3539
3540  mchunkptr        bck;             /* misc temp for linking */
3541  mchunkptr        fwd;             /* misc temp for linking */
3542
3543  unsigned long    copysize;        /* bytes to copy */
3544  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
3545  INTERNAL_SIZE_T* s;               /* copy source */
3546  INTERNAL_SIZE_T* d;               /* copy destination */
3547
3548
3549#if REALLOC_ZERO_BYTES_FREES
3550  if (bytes == 0) {
3551    _int_free(av, oldmem);
3552    return 0;
3553  }
3554#endif
3555
3556  /* realloc of null is supposed to be same as malloc */
3557  if (oldmem == 0) return _int_malloc(av, bytes);
3558
3559  checked_request2size(bytes, nb);
3560
3561  oldp    = mem2chunk(oldmem);
3562  oldsize = chunksize(oldp);
3563
3564  check_inuse_chunk(av, oldp);
3565
3566  // force to act like not mmapped
3567  if (1) {
3568
3569    if ((unsigned long)(oldsize) >= (unsigned long)(nb)) {
3570      /* already big enough; split below */
3571      newp = oldp;
3572      newsize = oldsize;
3573    }
3574
3575    else {
3576      next = chunk_at_offset(oldp, oldsize);
3577
3578      /* Try to expand forward into top */
3579      if (next == av->top &&
3580          (unsigned long)(newsize = oldsize + chunksize(next)) >=
3581          (unsigned long)(nb + MINSIZE)) {
3582        set_head_size(oldp, nb );
3583        av->top = chunk_at_offset(oldp, nb);
3584        set_head(av->top, (newsize - nb) | PREV_INUSE);
3585    	check_inuse_chunk(av, oldp);
3586        set_arena_for_chunk(oldp, av);
3587        return chunk2mem(oldp);
3588      }
3589
3590      /* Try to expand forward into next chunk;  split off remainder below */
3591      else if (next != av->top &&
3592               !inuse(next) &&
3593               (unsigned long)(newsize = oldsize + chunksize(next)) >=
3594               (unsigned long)(nb)) {
3595        newp = oldp;
3596        unlink(next, bck, fwd);
3597      }
3598
3599      /* allocate, copy, free */
3600      else {
3601        newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK);
3602        if (newmem == 0)
3603          return 0; /* propagate failure */
3604
3605        newp = mem2chunk(newmem);
3606        newsize = chunksize(newp);
3607
3608        /*
3609          Avoid copy if newp is next chunk after oldp.
3610        */
3611        if (newp == next) {
3612          newsize += oldsize;
3613          newp = oldp;
3614        }
3615        else {
3616          /*
3617            Unroll copy of <= 36 bytes (72 if 8byte sizes)
3618            We know that contents have an odd number of
3619            INTERNAL_SIZE_T-sized words; minimally 3.
3620          */
3621
3622          copysize = oldsize - SIZE_SZ;
3623          s = (INTERNAL_SIZE_T*)(oldmem);
3624          d = (INTERNAL_SIZE_T*)(newmem);
3625          ncopies = copysize / sizeof(INTERNAL_SIZE_T);
3626          assert(ncopies >= 3);
3627
3628          if (ncopies > 9)
3629            MALLOC_COPY(d, s, copysize);
3630
3631          else {
3632            *(d+0) = *(s+0);
3633            *(d+1) = *(s+1);
3634            *(d+2) = *(s+2);
3635            if (ncopies > 4) {
3636              *(d+3) = *(s+3);
3637              *(d+4) = *(s+4);
3638              if (ncopies > 6) {
3639                *(d+5) = *(s+5);
3640                *(d+6) = *(s+6);
3641                if (ncopies > 8) {
3642                  *(d+7) = *(s+7);
3643                  *(d+8) = *(s+8);
3644                }
3645              }
3646            }
3647          }
3648
3649          _int_free(av, oldmem);
3650          set_arena_for_chunk(newp, av);
3651          check_inuse_chunk(av, newp);
3652          return chunk2mem(newp);
3653        }
3654      }
3655    }
3656
3657    /* If possible, free extra space in old or extended chunk */
3658
3659    assert((unsigned long)(newsize) >= (unsigned long)(nb));
3660
3661    remainder_size = newsize - nb;
3662
3663    if (remainder_size < MINSIZE) { /* not enough extra to split off */
3664      set_head_size(newp, newsize);
3665      set_inuse_bit_at_offset(newp, newsize);
3666    }
3667    else { /* split remainder */
3668      remainder = chunk_at_offset(newp, nb);
3669      set_head_size(newp, nb );
3670      set_head(remainder, remainder_size | PREV_INUSE );
3671      /* Mark remainder as inuse so free() won't complain */
3672      set_inuse_bit_at_offset(remainder, remainder_size);
3673      set_arena_for_chunk(remainder, av);
3674      _int_free(av, chunk2mem(remainder));
3675    }
3676
3677    set_arena_for_chunk(newp, av);
3678    check_inuse_chunk(av, newp);
3679    return chunk2mem(newp);
3680  }
3681
3682  /*
3683    Handle mmap cases
3684  */
3685
3686  else {
3687    /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
3688    check_malloc_state(av);
3689    MALLOC_FAILURE_ACTION;
3690    return 0;
3691  }
3692}
3693
3694/*
3695  ------------------------------ memalign ------------------------------
3696*/
3697
3698static Void_t*
3699_int_memalign(mstate av, size_t alignment, size_t bytes)
3700{
3701  INTERNAL_SIZE_T nb;             /* padded  request size */
3702  char*           m;              /* memory returned by malloc call */
3703  mchunkptr       p;              /* corresponding chunk */
3704  char*           brk;            /* alignment point within p */
3705  mchunkptr       newp;           /* chunk to return */
3706  INTERNAL_SIZE_T newsize;        /* its size */
3707  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
3708  mchunkptr       remainder;      /* spare room at end to split off */
3709  unsigned long   remainder_size; /* its size */
3710  INTERNAL_SIZE_T size;
3711
3712  /* If need less alignment than we give anyway, just relay to malloc */
3713
3714  if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes);
3715
3716  /* Otherwise, ensure that it is at least a minimum chunk size */
3717
3718  if (alignment <  MINSIZE) alignment = MINSIZE;
3719
3720  /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
3721  if ((alignment & (alignment - 1)) != 0) {
3722    size_t a = MALLOC_ALIGNMENT * 2;
3723    while ((unsigned long)a < (unsigned long)alignment) a <<= 1;
3724    alignment = a;
3725  }
3726
3727  checked_request2size(bytes, nb);
3728
3729  /*
3730    Strategy: find a spot within that chunk that meets the alignment
3731    request, and then possibly free the leading and trailing space.
3732  */
3733
3734
3735  /* Call malloc with worst case padding to hit alignment. */
3736
3737  m  = (char*)(_int_malloc(av, nb + alignment + MINSIZE));
3738
3739  if (m == 0) return 0; /* propagate failure */
3740
3741  p = mem2chunk(m);
3742
3743  if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */
3744
3745    /*
3746      Find an aligned spot inside chunk.  Since we need to give back
3747      leading space in a chunk of at least MINSIZE, if the first
3748      calculation places us at a spot with less than MINSIZE leader,
3749      we can move to the next aligned spot -- we've allocated enough
3750      total room so that this is always possible.
3751    */
3752
3753    brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) &
3754                           -((signed long) alignment));
3755    if ((unsigned long)(brk - (char*)(p)) < MINSIZE)
3756      brk += alignment;
3757
3758    newp = (mchunkptr)brk;
3759    leadsize = brk - (char*)(p);
3760    newsize = chunksize(p) - leadsize;
3761
3762    /* For mmapped chunks, just adjust offset */
3763    if (chunk_is_mmapped(p)) {
3764      newp->prev_size = p->prev_size + leadsize;
3765      set_head(newp, newsize|IS_MMAPPED);
3766      set_arena_for_chunk(newp, av);
3767      return chunk2mem(newp);
3768    }
3769
3770    /* Otherwise, give back leader, use the rest */
3771    set_head(newp, newsize | PREV_INUSE );
3772    set_inuse_bit_at_offset(newp, newsize);
3773    set_head_size(p, leadsize);
3774    set_arena_for_chunk(p, av);
3775    _int_free(av, chunk2mem(p));
3776    p = newp;
3777
3778    assert (newsize >= nb &&
3779            (((unsigned long)(chunk2mem(p))) % alignment) == 0);
3780  }
3781
3782  /* Also give back spare room at the end */
3783  if (!chunk_is_mmapped(p)) {
3784    size = chunksize(p);
3785    if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) {
3786      remainder_size = size - nb;
3787      remainder = chunk_at_offset(p, nb);
3788      set_head(remainder, remainder_size | PREV_INUSE );
3789      set_head_size(p, nb);
3790      set_arena_for_chunk(remainder, av);
3791      _int_free(av, chunk2mem(remainder));
3792    }
3793  }
3794
3795  set_arena_for_chunk(p, av);
3796  check_inuse_chunk(av, p);
3797  return chunk2mem(p);
3798}
3799
3800#if 1
3801/*
3802  ------------------------------ calloc ------------------------------
3803*/
3804
3805#if __STD_C
3806Void_t* cALLOc(cvmx_arena_list_t arena_list, size_t n_elements, size_t elem_size)
3807#else
3808Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size;
3809#endif
3810{
3811  mchunkptr p;
3812  unsigned long clearsize;
3813  unsigned long nclears;
3814  INTERNAL_SIZE_T* d;
3815
3816  Void_t* mem = public_mALLOc(arena_list, n_elements * elem_size);
3817
3818  if (mem != 0) {
3819    p = mem2chunk(mem);
3820
3821    {
3822      /*
3823        Unroll clear of <= 36 bytes (72 if 8byte sizes)
3824        We know that contents have an odd number of
3825        INTERNAL_SIZE_T-sized words; minimally 3.
3826      */
3827
3828      d = (INTERNAL_SIZE_T*)mem;
3829      clearsize = chunksize(p) - SIZE_SZ;
3830      nclears = clearsize / sizeof(INTERNAL_SIZE_T);
3831      assert(nclears >= 3);
3832
3833      if (nclears > 9)
3834        MALLOC_ZERO(d, clearsize);
3835
3836      else {
3837        *(d+0) = 0;
3838        *(d+1) = 0;
3839        *(d+2) = 0;
3840        if (nclears > 4) {
3841          *(d+3) = 0;
3842          *(d+4) = 0;
3843          if (nclears > 6) {
3844            *(d+5) = 0;
3845            *(d+6) = 0;
3846            if (nclears > 8) {
3847              *(d+7) = 0;
3848              *(d+8) = 0;
3849            }
3850          }
3851        }
3852      }
3853    }
3854  }
3855  return mem;
3856}
3857#endif
3858
3859
3860/*
3861  ------------------------- malloc_usable_size -------------------------
3862*/
3863
3864#if __STD_C
3865size_t mUSABLe(Void_t* mem)
3866#else
3867size_t mUSABLe(mem) Void_t* mem;
3868#endif
3869{
3870  mchunkptr p;
3871  if (mem != 0) {
3872    p = mem2chunk(mem);
3873    if (chunk_is_mmapped(p))
3874      return chunksize(p) - 3*SIZE_SZ; /* updated size for adding arena_ptr */
3875    else if (inuse(p))
3876      return chunksize(p) - 2*SIZE_SZ; /* updated size for adding arena_ptr */
3877  }
3878  return 0;
3879}
3880
3881/*
3882  ------------------------------ mallinfo ------------------------------
3883*/
3884
3885struct mallinfo mALLINFo(mstate av)
3886{
3887  struct mallinfo mi;
3888  int i;
3889  mbinptr b;
3890  mchunkptr p;
3891  INTERNAL_SIZE_T avail;
3892  INTERNAL_SIZE_T fastavail;
3893  int nblocks;
3894  int nfastblocks;
3895
3896  /* Ensure initialization */
3897  if (av->top == 0)  malloc_consolidate(av);
3898
3899  check_malloc_state(av);
3900
3901  /* Account for top */
3902  avail = chunksize(av->top);
3903  nblocks = 1;  /* top always exists */
3904
3905  /* traverse fastbins */
3906  nfastblocks = 0;
3907  fastavail = 0;
3908
3909  for (i = 0; i < NFASTBINS; ++i) {
3910    for (p = av->fastbins[i]; p != 0; p = p->fd) {
3911      ++nfastblocks;
3912      fastavail += chunksize(p);
3913    }
3914  }
3915
3916  avail += fastavail;
3917
3918  /* traverse regular bins */
3919  for (i = 1; i < NBINS; ++i) {
3920    b = bin_at(av, i);
3921    for (p = last(b); p != b; p = p->bk) {
3922      ++nblocks;
3923      avail += chunksize(p);
3924    }
3925  }
3926
3927  mi.smblks = nfastblocks;
3928  mi.ordblks = nblocks;
3929  mi.fordblks = avail;
3930  mi.uordblks = av->system_mem - avail;
3931  mi.arena = av->system_mem;
3932  mi.fsmblks = fastavail;
3933  mi.keepcost = chunksize(av->top);
3934  return mi;
3935}
3936
3937/*
3938  ------------------------------ malloc_stats ------------------------------
3939*/
3940
3941void mSTATs()
3942{
3943}
3944
3945
3946/*
3947  ------------------------------ mallopt ------------------------------
3948*/
3949
3950#if 0
3951#if __STD_C
3952int mALLOPt(int param_number, int value)
3953#else
3954int mALLOPt(param_number, value) int param_number; int value;
3955#endif
3956{
3957}
3958#endif
3959
3960
3961/*
3962  -------------------- Alternative MORECORE functions --------------------
3963*/
3964
3965
3966/*
3967  General Requirements for MORECORE.
3968
3969  The MORECORE function must have the following properties:
3970
3971  If MORECORE_CONTIGUOUS is false:
3972
3973    * MORECORE must allocate in multiples of pagesize. It will
3974      only be called with arguments that are multiples of pagesize.
3975
3976    * MORECORE(0) must return an address that is at least
3977      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
3978
3979  else (i.e. If MORECORE_CONTIGUOUS is true):
3980
3981    * Consecutive calls to MORECORE with positive arguments
3982      return increasing addresses, indicating that space has been
3983      contiguously extended.
3984
3985    * MORECORE need not allocate in multiples of pagesize.
3986      Calls to MORECORE need not have args of multiples of pagesize.
3987
3988    * MORECORE need not page-align.
3989
3990  In either case:
3991
3992    * MORECORE may allocate more memory than requested. (Or even less,
3993      but this will generally result in a malloc failure.)
3994
3995    * MORECORE must not allocate memory when given argument zero, but
3996      instead return one past the end address of memory from previous
3997      nonzero call. This malloc does NOT call MORECORE(0)
3998      until at least one call with positive arguments is made, so
3999      the initial value returned is not important.
4000
4001    * Even though consecutive calls to MORECORE need not return contiguous
4002      addresses, it must be OK for malloc'ed chunks to span multiple
4003      regions in those cases where they do happen to be contiguous.
4004
4005    * MORECORE need not handle negative arguments -- it may instead
4006      just return MORECORE_FAILURE when given negative arguments.
4007      Negative arguments are always multiples of pagesize. MORECORE
4008      must not misinterpret negative args as large positive unsigned
4009      args. You can suppress all such calls from even occurring by defining
4010      MORECORE_CANNOT_TRIM,
4011
4012  There is some variation across systems about the type of the
4013  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
4014  actually be size_t, because sbrk supports negative args, so it is
4015  normally the signed type of the same width as size_t (sometimes
4016  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much
4017  matter though. Internally, we use "long" as arguments, which should
4018  work across all reasonable possibilities.
4019
4020  Additionally, if MORECORE ever returns failure for a positive
4021  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
4022  system allocator. This is a useful backup strategy for systems with
4023  holes in address spaces -- in this case sbrk cannot contiguously
4024  expand the heap, but mmap may be able to map noncontiguous space.
4025
4026  If you'd like mmap to ALWAYS be used, you can define MORECORE to be
4027  a function that always returns MORECORE_FAILURE.
4028
4029  If you are using this malloc with something other than sbrk (or its
4030  emulation) to supply memory regions, you probably want to set
4031  MORECORE_CONTIGUOUS as false.  As an example, here is a custom
4032  allocator kindly contributed for pre-OSX macOS.  It uses virtually
4033  but not necessarily physically contiguous non-paged memory (locked
4034  in, present and won't get swapped out).  You can use it by
4035  uncommenting this section, adding some #includes, and setting up the
4036  appropriate defines above:
4037
4038      #define MORECORE osMoreCore
4039      #define MORECORE_CONTIGUOUS 0
4040
4041  There is also a shutdown routine that should somehow be called for
4042  cleanup upon program exit.
4043
4044  #define MAX_POOL_ENTRIES 100
4045  #define MINIMUM_MORECORE_SIZE  (64 * 1024)
4046  static int next_os_pool;
4047  void *our_os_pools[MAX_POOL_ENTRIES];
4048
4049  void *osMoreCore(int size)
4050  {
4051    void *ptr = 0;
4052    static void *sbrk_top = 0;
4053
4054    if (size > 0)
4055    {
4056      if (size < MINIMUM_MORECORE_SIZE)
4057         size = MINIMUM_MORECORE_SIZE;
4058      if (CurrentExecutionLevel() == kTaskLevel)
4059         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
4060      if (ptr == 0)
4061      {
4062        return (void *) MORECORE_FAILURE;
4063      }
4064      // save ptrs so they can be freed during cleanup
4065      our_os_pools[next_os_pool] = ptr;
4066      next_os_pool++;
4067      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
4068      sbrk_top = (char *) ptr + size;
4069      return ptr;
4070    }
4071    else if (size < 0)
4072    {
4073      // we don't currently support shrink behavior
4074      return (void *) MORECORE_FAILURE;
4075    }
4076    else
4077    {
4078      return sbrk_top;
4079    }
4080  }
4081
4082  // cleanup any allocated memory pools
4083  // called as last thing before shutting down driver
4084
4085  void osCleanupMem(void)
4086  {
4087    void **ptr;
4088
4089    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
4090      if (*ptr)
4091      {
4092         PoolDeallocate(*ptr);
4093         *ptr = 0;
4094      }
4095  }
4096
4097*/
4098
4099
4100
4101/* ------------------------------------------------------------
4102History:
4103
4104[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc]
4105
4106*/
4107