From 8d532c51318bad2436880ecac972c9dfa3996c9b Mon Sep 17 00:00:00 2001
From: Eric Andersen <andersen@codepoet.org>
Date: Tue, 30 Dec 2003 10:40:49 +0000
Subject: Rework malloc.  The new default implementation is based on dlmalloc
 from Doug Lea.  It is about 2x faster than the old malloc-930716, and behave
 itself much better -- it will properly release memory back to the system, and
 it uses a combination of brk() for small allocations and mmap() for larger
 allocations.  -Erik

---
 libc/stdlib/malloc-standard/malloc.h | 953 +++++++++++++++++++++++++++++++++++
 1 file changed, 953 insertions(+)
 create mode 100644 libc/stdlib/malloc-standard/malloc.h

(limited to 'libc/stdlib/malloc-standard/malloc.h')

diff --git a/libc/stdlib/malloc-standard/malloc.h b/libc/stdlib/malloc-standard/malloc.h
new file mode 100644
index 000000000..46858332d
--- /dev/null
+++ b/libc/stdlib/malloc-standard/malloc.h
@@ -0,0 +1,953 @@
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain.  Use, modify, and
+  redistribute this code without permission or acknowledgement in any
+  way you wish.  Send questions, comments, complaints, performance
+  data, etc to dl@cs.oswego.edu
+
+  VERSION 2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+
+  Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+  Check before installing!
+
+  Hacked up for uClibc by Erik Andersen <andersen@codepoet.org>
+*/
+
+#include <features.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <malloc.h>
+
+
+#ifdef __UCLIBC_HAS_THREADS__
+#include <pthread.h>
+extern pthread_mutex_t __malloc_lock;
+# define LOCK	__pthread_mutex_lock(&__malloc_lock)
+# define UNLOCK	__pthread_mutex_unlock(&__malloc_lock);
+#else
+# define LOCK
+# define UNLOCK
+#endif
+
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * (sizeof(size_t)), even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT       (2 * (sizeof(size_t)))
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies.
+*/
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup (which is done only if
+   HAVE_MMAP).  The value must be a multiple of page size.  This
+   backup strategy generally applies only when systems have "holes" in
+   address space, so sbrk cannot perform contiguous expansion, but
+   there is still space available on system.  On systems for which
+   this is known to be useful (i.e. most linux kernels), this occurs
+   only when programs allocate huge amounts of memory.  Between this,
+   and the fact that mmap regions tend to be limited, the size should
+   be large, to avoid too many mmap calls and thus avoid running out
+   of kernel resources.
+*/
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  Note that this value is
+  cached during initialization into a field of malloc_state. So even
+  if malloc_getpagesize is a function, it is only called once.
+
+  The following mechanics for getpagesize were adapted from bsd/gnu
+  getpagesize.h. If none of the system-probes here apply, a value of
+  4096 is used, which should be OK: If they don't apply, then using
+  the actual value probably doesn't impact performance.
+*/
+#ifndef malloc_getpagesize
+#  include <unistd.h>
+#  define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#else /* just guess */
+#  define malloc_getpagesize (4096)
+#endif
+
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 16 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     64
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back
+      to the system, which helps keep the system level memory
+      demands of a long-lived program low.
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+. Some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  HAVE_MMAP is not set, the default value is 0, and attempts to set it
+  to non-zero values in mallopt will fail.
+*/
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+
+/* ------------------ MMAP support ------------------  */
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+
+
+/* -----------------------  Chunk representations ----------------------- */
+
+
+/*
+  This struct declaration is misleading (but accurate and necessary).
+  It declares a "view" into memory allowing access to necessary
+  fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+  size_t      prev_size;  /* Size of previous chunk (if free).  */
+  size_t      size;       /* Size in bytes, including overhead. */
+
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+
+typedef struct malloc_chunk* mchunkptr;
+
+/*
+   malloc_chunk details:
+
+    (The following includes lightly edited explanations by Colin Plumb.)
+
+    Chunks of memory are maintained using a `boundary tag' method as
+    described in e.g., Knuth or Standish.  (See the paper by Paul
+    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+    survey of such techniques.)  Sizes of free chunks are stored both
+    in the front of each chunk and at the end.  This makes
+    consolidating fragmented chunks into bigger chunks very fast.  The
+    size fields also hold bits representing whether chunks are free or
+    in use.
+
+    An allocated chunk looks like this:
+
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk, if allocated            | |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             User data starts here...                          .
+            .                                                               .
+            .             (malloc_usable_space() bytes)                     .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of chunk                                     |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+    Where "chunk" is the front of the chunk for the purpose of most of
+    the malloc code, but "mem" is the pointer that is returned to the
+    user.  "Nextchunk" is the beginning of the next contiguous chunk.
+
+    Chunks always begin on even word boundries, so the mem portion
+    (which is returned to the user) is also on an even word boundary, and
+    thus at least double-word aligned.
+
+    Free chunks are stored in circular doubly-linked lists, and look like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+    chunk size (which is always a multiple of two words), is an in-use
+    bit for the *previous* chunk.  If that bit is *clear*, then the
+    word before the current chunk size contains the previous chunk
+    size, and can be used to find the front of the previous chunk.
+    The very first chunk allocated always has this bit set,
+    preventing access to non-existent (or non-owned) memory. If
+    prev_inuse is set for any given chunk, then you CANNOT determine
+    the size of the previous chunk, and might even get a memory
+    addressing fault when trying to do so.
+
+    Note that the `foot' of the current chunk is actually represented
+    as the prev_size of the NEXT chunk. This makes it easier to
+    deal with alignments etc but can be very confusing when trying
+    to extend or adapt this code.
+
+    The two exceptions to all this are
+
+     1. The special chunk `top' doesn't bother using the
+        trailing size field since there is no next contiguous chunk
+        that would have to index off it. After initialization, `top'
+        is forced to always exist.  If it would become less than
+        MINSIZE bytes long, it is replenished.
+
+     2. Chunks allocated via mmap, which have the second-lowest-order
+        bit (IS_MMAPPED) set in their size fields.  Because they are
+        allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p)   ((void*)((char*)(p) + 2*(sizeof(size_t))))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*(sizeof(size_t))))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        (sizeof(struct malloc_chunk))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+
+/* Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around sero.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((unsigned long)(req) >=                                        \
+   (unsigned long)(size_t)(-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
+   MINSIZE :                                                      \
+   ((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (REQUEST_OUT_OF_RANGE(req)) {                                \
+    errno = ENOMEM;                                               \
+    return 0;                                                     \
+  }                                                               \
+  (sz) = request2size(req);
+
+/*
+  --------------- Physical chunk operations ---------------
+*/
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+/* Bits to mask off when extracting size
+
+  Note: IS_MMAPPED is intentionally not masked off from size field in
+  macros for which mmapped chunks should never be seen. This should
+  cause helpful core dumps to occur if it is tried by accident by
+  people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE|IS_MMAPPED)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) ))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)\
+((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
+
+#define clear_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & PREV_INUSE) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s)       (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+
+
+/* -------------------- Internal data structures -------------------- */
+
+/*
+  Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.  
+*/
+
+typedef struct malloc_chunk* mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - ((sizeof(size_t))<<1)))
+
+/* analog of ++bin */
+#define next_bin(b)  ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) {                                            \
+  FD = P->fd;                                                          \
+  BK = P->bk;                                                          \
+  FD->bk = BK;                                                         \
+  BK->fd = FD;                                                         \
+}
+
+/*
+  Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+*/
+
+#define NBINS              96
+#define NSMALLBINS         32
+#define SMALLBIN_WIDTH      8
+#define MIN_LARGE_SIZE    256
+
+#define in_smallbin_range(sz)  \
+  ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz)     (((unsigned)(sz)) >> 3)
+
+#define bin_index(sz) \
+ ((in_smallbin_range(sz)) ? smallbin_index(sz) : __malloc_largebin_index(sz))
+
+/*
+  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+  first bin that is maintained in sorted order. This must
+  be the smallest size corresponding to a given bin.
+
+  Normally, this should be MIN_LARGE_SIZE. But you can weaken
+  best fit guarantees to sometimes speed up malloc by increasing value.
+  Doing this means that malloc may choose a chunk that is
+  non-best-fitting by up to the width of the bin.
+
+  Some useful cutoff values:
+      512 - all bins sorted
+     2560 - leaves bins <=     64 bytes wide unsorted
+    12288 - leaves bins <=    512 bytes wide unsorted
+    65536 - leaves bins <=   4096 bytes wide unsorted
+   262144 - leaves bins <=  32768 bytes wide unsorted
+       -1 - no bins sorted (not recommended!)
+*/
+
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*
+  Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and __malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at(M, 1))
+
+/*
+  Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    __malloc_alloc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks(M))
+
+/*
+  Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
+
+#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i))
+#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i))
+
+/*
+  Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. __malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks.
+*/
+
+typedef struct malloc_chunk* mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz)        ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     80
+
+#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1)
+
+/*
+  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+  that triggers automatic consolidation of possibly-surrounding
+  fastbin chunks. This is a heuristic, so the exact value should not
+  matter too much. It is defined at half the default trim threshold as a
+  compromise heuristic to only attempt consolidation if it is likely
+  to lead to trimming. However, it is not dynamically tunable, since
+  consolidation reduces fragmentation surrounding loarge chunks even
+  if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  \
+  ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
+
+/*
+  Since the lowest 2 bits in max_fast don't matter in size comparisons,
+  they are used as flags.
+*/
+
+/*
+  ANYCHUNKS_BIT held in max_fast indicates that there may be any
+  freed chunks at all. It is set true when entering a chunk into any
+  bin.
+*/
+
+#define ANYCHUNKS_BIT        (1U)
+
+#define have_anychunks(M)     (((M)->max_fast &  ANYCHUNKS_BIT))
+#define set_anychunks(M)      ((M)->max_fast |=  ANYCHUNKS_BIT)
+#define clear_anychunks(M)    ((M)->max_fast &= ~ANYCHUNKS_BIT)
+
+/*
+  FASTCHUNKS_BIT held in max_fast indicates that there are probably
+  some fastbin chunks. It is set true on entering a chunk into any
+  fastbin, and cleared only in __malloc_consolidate.
+*/
+
+#define FASTCHUNKS_BIT        (2U)
+
+#define have_fastchunks(M)   (((M)->max_fast &  FASTCHUNKS_BIT))
+#define set_fastchunks(M)    ((M)->max_fast |=  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+#define clear_fastchunks(M)  ((M)->max_fast &= ~(FASTCHUNKS_BIT))
+
+/* Set value of max_fast.  Use impossibly small value if 0.  */
+#define set_max_fast(M, s) \
+  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
+  ((M)->max_fast &  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+
+#define get_max_fast(M) \
+  ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+
+/*
+  morecore_properties is a status word holding dynamically discovered
+  or controlled properties of the morecore function
+*/
+
+#define MORECORE_CONTIGUOUS_BIT  (1U)
+
+#define contiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT))
+#define noncontiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT) == 0)
+#define set_contiguous(M) \
+        ((M)->morecore_properties |=  MORECORE_CONTIGUOUS_BIT)
+#define set_noncontiguous(M) \
+        ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+
+  /* The maximum chunk size to be eligible for fastbin */
+  size_t  max_fast;   /* low 2 bits used as flags */
+
+  /* Fastbins */
+  mfastbinptr      fastbins[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  mchunkptr        top;
+
+  /* The remainder from the most recent split of a small request */
+  mchunkptr        last_remainder;
+
+  /* Normal bins packed as described above */
+  mchunkptr        bins[NBINS * 2];
+
+  /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
+  unsigned int     binmap[BINMAPSIZE+1];
+
+  /* Tunable parameters */
+  unsigned long     trim_threshold;
+  size_t  top_pad;
+  size_t  mmap_threshold;
+
+  /* Memory map support */
+  int              n_mmaps;
+  int              n_mmaps_max;
+  int              max_n_mmaps;
+
+  /* Cache malloc_getpagesize */
+  unsigned int     pagesize;
+
+  /* Track properties of MORECORE */
+  unsigned int     morecore_properties;
+
+  /* Statistics */
+  size_t  mmapped_mem;
+  size_t  sbrked_mem;
+  size_t  max_sbrked_mem;
+  size_t  max_mmapped_mem;
+  size_t  max_total_mem;
+};
+
+typedef struct malloc_state *mstate;
+
+/*
+   There is exactly one instance of this struct in this malloc.
+   If you are adapting this malloc in a way that does NOT use a static
+   malloc_state, you MUST explicitly zero-fill it before using. This
+   malloc relies on the property that malloc_state is initialized to
+   all zeroes (as is true of C statics).
+*/
+extern struct malloc_state __malloc_state;  /* never directly referenced */
+
+/*
+   All uses of av_ are via get_malloc_state().
+   At most one "call" to get_malloc_state is made per invocation of
+   the public versions of malloc and free, but other routines
+   that in turn invoke malloc and/or free may call more then once.
+   Also, it is called in check* routines if __MALLOC_DEBUGGING is set.
+*/
+
+#define get_malloc_state() (&(__malloc_state))
+
+/* External internal utilities operating on mstates */
+void   __malloc_consolidate(mstate);
+
+
+/* Debugging support */
+#if ! __MALLOC_DEBUGGING
+
+#define check_chunk(P)
+#define check_free_chunk(P)
+#define check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)
+#define check_malloc_state()
+#define assert(x) ((void)0)
+
+
+#else
+
+#define check_chunk(P)              __do_check_chunk(P)
+#define check_free_chunk(P)         __do_check_free_chunk(P)
+#define check_inuse_chunk(P)        __do_check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N) __do_check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)   __do_check_malloced_chunk(P,N)
+#define check_malloc_state()        __do_check_malloc_state()
+
+extern void __do_check_chunk(mchunkptr p);
+extern void __do_check_free_chunk(mchunkptr p);
+extern void __do_check_inuse_chunk(mchunkptr p);
+extern void __do_check_remalloced_chunk(mchunkptr p, size_t s);
+extern void __do_check_malloced_chunk(mchunkptr p, size_t s);
+extern void __do_check_malloc_state(void);
+
+#include <assert.h>
+
+#endif
-- 
cgit v1.2.3