leevis.com
leevis.com copied to clipboard
glibc 内存管理源码分析
概述
相关代码glibc的malloc.h malloc.c hooks.c
源码分析
前提是x86-64位系统,sizeof(size_t) == 8 。
相关结构体描述
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk, if unallocated (P clear) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |A|M|P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_size() bytes) .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| (size of chunk, but used for application data) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk, in bytes |A|0|1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
调用malloc分配一块内存,最终被表示为上述结构体。返回给用户。
主要数据结构
chunk 结构体
struct malloc_chunk {
INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */
INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */
struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};
mchunk_size 是该chunk的大小,包括 mchunk_prev_size 和用户指定的大小,对齐在2*sizeof(size_t)上。低3bit作为标志位使用,如上图。A 表示是主分配区还是非主分配区。M为1表示通过mmap分配的内存。P为1表示前一个chunk空闲,mchunk_prev_size为前一个chunk的大小,没有标志位。如果不空闲,mchunk_prev_size是被前一块chunk作为用户数据内存提供给用户使用的。这样做的目的是节约内存。
fd和bk是空闲的时候被链在bins上使用的。非空闲的时候也没作用,被用来存储用户数据。 fd_nextsize 和bk_nextsize 不是必需的。只有在chunk的大小大于一定的值且空闲时才有用。 以上说明对于理解代码非常重要,因为malloc为了节约内存,通常在free以后把内存直接映射成chunk使用。malloc又把chunk映射成void使用。
// 根据前面描述chunk结构体mchunk_prev_size mchunk_size fd bk 这四个元素是必需的
// 用户分配的内存不能小于该宏定义的大小。
#define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
// 内存对齐
#define MINSIZE \
(unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
// 需要分配内存的大小:需要用户请求大小 + mchunk_prev_size长度 + mchunk_size 长度 - 下一块chunk的mchunk_prev_size长度。因此在大于最小内存上只加了一次SIZE_SZ,也就是mchunk_size的长度。
// 把用户请求分配的大小转为chunk需要的大小并对齐到size_t上。
#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
// 根据用户请求分配内存的大小,转换为实际需要分配的大小。
#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);
// 检查mchunk_size的低3bit
// 前一块chunk是否空闲
#define PREV_INUSE 0x1
#define prev_inuse(p) ((p)->mchunk_size & PREV_INUSE)
// 该chunk是否通过mmap分配
#define IS_MMAPPED 0x2
#define chunk_is_mmapped(p) ((p)->mchunk_size & IS_MMAPPED)
// 该chunk是否在主分配区分配的
#define NON_MAIN_ARENA 0x4
#define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
// 设置非主内存区分配标志
#define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
// 该chunk的实际大小,清除低3bit
#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
// 下一块chunk
#define next_chunk(p) ((mchunkptr) (((char*) (p)) + chunksize (p)))
#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
// 设置chunk的大小,保留低3bit
#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s)))
// 设置chunk大小,不保留低3bit
#define set_head(p, s) ((p)->size = (s))
// 把该chunk的大小设置到下一块chunk的prev_size
#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
保存了分配区的信息
struct malloc_state {
/* Serialize access. */
mutex_t mutex;
/* Flags (formerly in max_fast). */
int flags;
#if THREAD_STATS
/* Statistics for locking. Only used if THREAD_STATS is defined. */
long stat_lock_direct, stat_lock_loop, stat_lock_wait;
#endif
/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;
/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;
// #define NBINS 128
/* Normal bins packed as described above */
// 长度254的指针数组
mchunkptr bins[NBINS * 2 - 2];
// 4 个无符号整形,16B * 8bit == 128bit
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
/* Linked list */
struct malloc_state *next;
#ifdef PER_THREAD
/* Linked list for free arenas. */
struct malloc_state *next_free;
#endif
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};
- bins数组 上述bins数组,分为unsorted bin、small bins 和 large bins。 其中,Usorted bin 只有一 个,回收的 chunk 块必须先放到 unsorted bin 中,分配内存时会查看 unsorted bin 中是否有 合适的 chunk,如果找到满足条件的 chunk,则直接返回给用户,否则将 unsorted bin 的所 有 chunk 放入 small bins 或是 large bins 中。
Small bins 用于存放固定大小的 chunk,共 64 个 bin,最小的 chunk 大小为 32 字节,每个 bin 的大小相差16 字节,当 分配小内存块时,采用精确匹配的方式从 small bins 中查找合适的 chunk。32、48、64、80、96、112、128、144、160、176。。。1008 共62个数组,从下标2到63结束。
Large bins 用于存 储大于等于1024B 的空闲 chunk,这些 chunk 使用双向链表的形式按大小顺序排序, 分配内存时按最近匹配方式从 large bins 中分配 chunk。Large bins 一共包括 63 个 bin, 每个 bin 中的 chunk 大小不是一个固定公差的等差数列,而是分成 6 组 bin,每组 bin 是一个 固定公差的等差数列,每组的 bin 数量依次为 32、16、8、4、2、1,公差依次为 64B、512B、 4096B、32768B、262144B 等。 第一组(33个):下标64-96 大小:[1024 - 3136) size/64+48==index (index-48)*64==size 第二组(15个):下标:97-111 大小:[3136-10752) size/512+91==index (index-91)*512==size 第三组(8个): 下标:112-119 大小:[10752-45056)
第四组(4个): 下标:120-123 大小:[45056 -163840)
第五组(2个): 下标:124-125大小:[163840-786432)
第六组(1个): 下标 126 大小:[786432-)
// 乘以2是因为每个下标占bins数组两个位置
// 减去 malloc_chunk结构体fd元素前面的大小是因为fd前面的两个元素在bins数组链表不会使用,也不能使用。
#define bin_at(m, i) \
(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \
- offsetof (struct malloc_chunk, fd))
// bins 数组个数
#define NBINS 128
// small bins 个数
#define NSMALLBINS 64
// 其中MALLOC_ALIGNMENT == 2*sizeof(size_t) 16字节
#define SMALLBIN_WIDTH MALLOC_ALIGNMENT
#define MIN_LARGE_SIZE (NSMALLBINS * SMALLBIN_WIDTH)
// 小于1k的chunk在small bins数组中
#define in_smallbin_range(sz) \
((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
// 64位系统chunk的大小直接除16就是对应到small bins的数组下标
#define smallbin_index(sz) \
(SMALLBIN_WIDTH == 16 ? (((unsigned)(sz)) >> 4) : (((unsigned)(sz)) >> 3))
// 64位系统给定chunk大小,返回在bins数组下标
#define largebin_index_64(sz) \
(((((unsigned long)(sz)) >> 6) <= 48)? 48 + (((unsigned long)(sz)) >> 6): \
((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \
((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \
((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \
126)
// 给定chunk大小,返回该chunk在bins数组的下标。
#define bin_index(sz) \
((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
#define unsorted_chunks(M) (bin_at(M, 1))
- fast bins Fast bins 主要是用于提高小内存的分配效率。Fast bins 可以看着是 small bins 的一小部分 cache,默认情况下,对于SIZE_SZ为8B的平台,fast bins有10个chunk空闲链表(bin),每个bin的chunk大小依 次为 32B,48B,64B,80B,96B,112B,128B,144B,160B,176B。实际上只有小于128B的chunk会作为fast chunk来缓存。
#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])
/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
((((unsigned int)(sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
/* The maximum fastbin request size we support */
// 160B
#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
// request2size => (160+8+15) & ~15 == 176
// fastbin_index => 176>>4 - 2 == 9
// NFASTBINS == 10
#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1)
- binmap 一共128bit,用一个 bit 来标识该 bit 对应的 bin 中是否包含空闲 chunk。
#define BINMAPSHIFT 5
#define BITSPERMAP (1U << BINMAPSHIFT)
#define BINMAPSIZE (NBINS / BITSPERMAP)
#define idx2block(i) ((i) >> BINMAPSHIFT)
#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i))
初始化
ptmalloc 定义了以下全局变量
static struct malloc_state main_arena; static struct malloc_par mp_; static INTERNAL_SIZE_T global_max_fast;
main_arena 表示主分配区,任何进程有且仅有一个全局的主分配区,mp_是全局唯一的 一个 malloc_par 实例,用于管理参数和统计信息,global_max_fast 全局变量表示 fast bins 中 最大的 chunk 大小。
从malloc函数开始,malloc 是个public_mALLOc的别名。 #如果__malloc_hook的hook函数不为null,则调用该hook。该hook被赋值为malloc_hook_ini。
// malloc.c 文件定义了__malloc_hook的回调函数为malloc_hook_ini
__malloc_ptr_t weak_variable (*__malloc_hook)
(size_t __size, const __malloc_ptr_t) = malloc_hook_ini;
#if __STD_C
static void do_check_inuse_chunk(mstate av, mchunkptr p)
#else
static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p;
#endif
{
mchunkptr next;
do_check_chunk(av, p);
if (chunk_is_mmapped(p))
return; /* mmapped chunks have no next/prev */
/* Check whether it claims to be in use ... */
assert(inuse(p));
next = next_chunk(p);
/* ... and is surrounded by OK chunks.
Since more things can be checked with free chunks than inuse ones,
if an inuse chunk borders them and debug is on, it's worth doing them.
*/
if (!prev_inuse(p)) {
/* Note that we cannot even look at prev unless it is not inuse */
mchunkptr prv = prev_chunk(p);
assert(next_chunk(prv) == p);
do_check_free_chunk(av, prv);
}
if (next == av->top) {
assert(prev_inuse(next));
assert(chunksize(next) >= MINSIZE);
}
else if (!inuse(next))
do_check_free_chunk(av, next);
}
#if __STD_C
static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
#else
static void do_check_remalloced_chunk(av, p, s)
mstate av; mchunkptr p; INTERNAL_SIZE_T s;
#endif
{
// chunk的实际大小
INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
if (!chunk_is_mmapped(p)) {
assert(av == arena_for_chunk(p));
if (chunk_non_main_arena(p))
assert(av != &main_arena);
else
assert(av == &main_arena);
}
do_check_inuse_chunk(av, p);
/* Legal size ... */
assert((sz & MALLOC_ALIGN_MASK) == 0);
assert((unsigned long)(sz) >= MINSIZE);
/* ... and alignment */
assert(aligned_OK(chunk2mem(p)));
/* chunk is less than MINSIZE more than request */
assert((long)(sz) - (long)(s) >= 0);
assert((long)(sz) - (long)(s + MINSIZE) < 0);
}
#if __STD_C
static void do_check_chunk(mstate av, mchunkptr p)
#else
static void do_check_chunk(av, p) mstate av; mchunkptr p;
#endif
{
unsigned long sz = chunksize(p);
/* min and max possible addresses assuming contiguous allocation */
char* max_address = (char*)(av->top) + chunksize(av->top);
char* min_address = max_address - av->system_mem;
if (!chunk_is_mmapped(p)) {
// 不是通过mmap分配的内存
/* Has legal address ... */
if (p != av->top) {
// 不是最新分配的chunk,或者说不是高地址边界的chunk
if (contiguous(av)) {
// 检查分配的chunk的地址是否在合适的堆内存区间
assert(((char*)p) >= min_address);
assert(((char*)p + sz) <= ((char*)(av->top)));
}
}
else {
/* top size is always at least MINSIZE */
assert((unsigned long)(sz) >= MINSIZE);
/* top predecessor always marked inuse */
assert(prev_inuse(p));
}
}
else {
#if HAVE_MMAP
/* address is outside main heap */
if (contiguous(av) && av->top != initial_top(av)) {
assert(((char*)p) < min_address || ((char*)p) >= max_address);
}
/* chunk is page-aligned */
assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0);
/* mem is aligned */
assert(aligned_OK(chunk2mem(p)));
#else
/* force an appropriate assert violation if debug set */
assert(!chunk_is_mmapped(p));
#endif
}
}
Void_t*
public_mALLOc(size_t bytes)
{
// typedef struct malloc_state *mstate; 在include/malloc.h文件中定义
mstate ar_ptr;
Void_t *victim;
// 初始化结束后hook为NULL
__malloc_ptr_t (*hook) (size_t, __const __malloc_ptr_t)
= force_reg (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0)); // malloc_hook_ini函数
arena_lookup(ar_ptr); // 获取分配区指针
#if 0
// XXX We need double-word CAS and fastbins must be extended to also
// XXX hold a generation counter for each entry.
if (ar_ptr) {
INTERNAL_SIZE_T nb; /* normalized request size */
checked_request2size(bytes, nb);
if (nb <= get_max_fast ()) {
long int idx = fastbin_index(nb);
mfastbinptr* fb = &fastbin (ar_ptr, idx);
mchunkptr pp = *fb;
mchunkptr v;
do
{
v = pp;
if (v == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, v->fd, v)) != v);
if (v != 0) {
if (__builtin_expect (fastbin_index (chunksize (v)) != idx, 0))
malloc_printerr (check_action, "malloc(): memory corruption (fast)",
chunk2mem (v));
check_remalloced_chunk(ar_ptr, v, nb);
void *p = chunk2mem(v);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
}
#endif
arena_lock(ar_ptr, bytes);
if(!ar_ptr)
return 0;
victim = _int_malloc(ar_ptr, bytes);
if(!victim) {
/* Maybe the failure is due to running out of mmapped areas. */
if(ar_ptr != &main_arena) {
(void)mutex_unlock(&ar_ptr->mutex);
ar_ptr = &main_arena;
(void)mutex_lock(&ar_ptr->mutex);
victim = _int_malloc(ar_ptr, bytes);
(void)mutex_unlock(&ar_ptr->mutex);
} else {
#if USE_ARENAS
/* ... or sbrk() has failed and there is still a chance to mmap() */
ar_ptr = arena_get2(ar_ptr->next ? ar_ptr : 0, bytes);
(void)mutex_unlock(&main_arena.mutex);
if(ar_ptr) {
victim = _int_malloc(ar_ptr, bytes);
(void)mutex_unlock(&ar_ptr->mutex);
}
#endif
}
} else
(void)mutex_unlock(&ar_ptr->mutex);
// 分配的内存
assert(!victim || chunk_is_mmapped(mem2chunk(victim)) ||
ar_ptr == arena_for_chunk(mem2chunk(victim)));
return victim;
}
// 在hooks.c文件中
static Void_t*
#if __STD_C
malloc_hook_ini(size_t sz, const __malloc_ptr_t caller)
#else
malloc_hook_ini(sz, caller)
size_t sz; const __malloc_ptr_t caller;
#endif
{
// 赋值为NULL,仅初始化执行一次
__malloc_hook = NULL;
ptmalloc_init();
return public_mALLOc(sz);
}
// arena.c 文件中
static void
ptmalloc_init (void)
{
#if __STD_C
const char* s;
#else
char* s;
#endif
int secure = 0;
// 未初始化时为-1
if(__malloc_initialized >= 0) return;
__malloc_initialized = 0; // 正在初始化
#ifdef _LIBC
# if defined SHARED && !USE___THREAD
/* ptmalloc_init_minimal may already have been called via
__libc_malloc_pthread_startup, above. */
if (mp_.pagesize == 0)
# endif
#endif
ptmalloc_init_minimal();
#ifndef NO_THREADS
# if defined _LIBC
/* We know __pthread_initialize_minimal has already been called,
and that is enough. */
# define NO_STARTER
# endif
# ifndef NO_STARTER
/* With some threads implementations, creating thread-specific data
or initializing a mutex may call malloc() itself. Provide a
simple starter version (realloc() won't work). */
save_malloc_hook = __malloc_hook;
save_memalign_hook = __memalign_hook;
save_free_hook = __free_hook;
__malloc_hook = malloc_starter;
__memalign_hook = memalign_starter;
__free_hook = free_starter;
# ifdef _LIBC
/* Initialize the pthreads interface. */
if (__pthread_initialize != NULL)
__pthread_initialize();
# endif /* !defined _LIBC */
# endif /* !defined NO_STARTER */
#endif /* !defined NO_THREADS */
mutex_init(&main_arena.mutex);
main_arena.next = &main_arena;
#if defined _LIBC && defined SHARED
/* In case this libc copy is in a non-default namespace, never use brk.
Likewise if dlopened from statically linked program. */
Dl_info di;
struct link_map *l;
if (_dl_open_hook != NULL
|| (_dl_addr (ptmalloc_init, &di, &l, NULL) != 0
&& l->l_ns != LM_ID_BASE))
__morecore = __failing_morecore;
#endif
mutex_init(&list_lock);
tsd_key_create(&arena_key, NULL);
tsd_setspecific(arena_key, (Void_t *)&main_arena);
thread_atfork(ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2);
#ifndef NO_THREADS
# ifndef NO_STARTER
__malloc_hook = save_malloc_hook;
__memalign_hook = save_memalign_hook;
__free_hook = save_free_hook;
# else
# undef NO_STARTER
# endif
#endif
#ifdef _LIBC
secure = __libc_enable_secure;
s = NULL;
if (__builtin_expect (_environ != NULL, 1))
{
char **runp = _environ;
char *envline;
while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL,
0))
{
size_t len = strcspn (envline, "=");
if (envline[len] != '=')
/* This is a "MALLOC_" variable at the end of the string
without a '=' character. Ignore it since otherwise we
will access invalid memory below. */
continue;
switch (len)
{
case 6:
if (memcmp (envline, "CHECK_", 6) == 0)
s = &envline[7];
break;
case 8:
if (! secure)
{
if (memcmp (envline, "TOP_PAD_", 8) == 0)
mALLOPt(M_TOP_PAD, atoi(&envline[9]));
else if (memcmp (envline, "PERTURB_", 8) == 0)
mALLOPt(M_PERTURB, atoi(&envline[9]));
}
break;
case 9:
if (! secure)
{
if (memcmp (envline, "MMAP_MAX_", 9) == 0)
mALLOPt(M_MMAP_MAX, atoi(&envline[10]));
#ifdef PER_THREAD
else if (memcmp (envline, "ARENA_MAX", 9) == 0)
mALLOPt(M_ARENA_MAX, atoi(&envline[10]));
#endif
}
break;
#ifdef PER_THREAD
case 10:
if (! secure)
{
if (memcmp (envline, "ARENA_TEST", 10) == 0)
mALLOPt(M_ARENA_TEST, atoi(&envline[11]));
}
break;
#endif
case 15:
if (! secure)
{
if (memcmp (envline, "TRIM_THRESHOLD_", 15) == 0)
mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16]));
else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0)
mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
}
break;
default:
break;
}
}
}
#else
if (! secure)
{
if((s = getenv("MALLOC_TRIM_THRESHOLD_")))
mALLOPt(M_TRIM_THRESHOLD, atoi(s));
if((s = getenv("MALLOC_TOP_PAD_")))
mALLOPt(M_TOP_PAD, atoi(s));
if((s = getenv("MALLOC_PERTURB_")))
mALLOPt(M_PERTURB, atoi(s));
if((s = getenv("MALLOC_MMAP_THRESHOLD_")))
mALLOPt(M_MMAP_THRESHOLD, atoi(s));
if((s = getenv("MALLOC_MMAP_MAX_")))
mALLOPt(M_MMAP_MAX, atoi(s));
}
s = getenv("MALLOC_CHECK_");
#endif
if(s && s[0]) {
mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0'));
// 设置了MALLOC_CHECK_ 环境变量
if (check_action != 0)
__malloc_check_init();
}
// mcheck-init.c 文件中定义,最终会调用mcheck函数
void (*hook) (void) = force_reg (__malloc_initialize_hook);
if (hook != NULL)
(*hook)();
__malloc_initialized = 1;
}
// hooks.c 中定义
void
__malloc_check_init()
{
if (disallow_malloc_check) {
disallow_malloc_check = 0;
return;
}
using_malloc_checking = 1;
__malloc_hook = malloc_check;
__free_hook = free_check;
__realloc_hook = realloc_check;
__memalign_hook = memalign_check;
}
malloc.c 文件中定义,最终会调用该函数分配内存
static Void_t*
_int_malloc(mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */
mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */
mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */
unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */
mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */
const char *errstr = NULL;
/*
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
aligned.
*/
// 判断并计算chunk大小
checked_request2size(bytes, nb);
/*
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
*/
// fast chunk 最大size 128B
// #define set_max_fast(s) \
// global_max_fast = (((s) == 0) \
// ? SMALLBIN_WIDTH: ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
// #define get_max_fast() global_max_fast
// set_max_fast 在malloc_init_state中被调用
if ((unsigned long)(nb) <= (unsigned long)(get_max_fast ())) {
idx = fastbin_index(nb); // 根据chunk size的大小计算在fast bins数组的下标
mfastbinptr* fb = &fastbin (av, idx);
#ifdef ATOMIC_FASTBINS
mchunkptr pp = *fb;
do
{
victim = pp;
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);
#else
victim = *fb;
#endif
if (victim != 0) {
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim));
return NULL;
}
#ifndef ATOMIC_FASTBINS
*fb = victim->fd;
#endif
check_remalloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
/*
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
*/
if (in_smallbin_range(nb)) {
// fast bins 没有命中,而又小于1KB的内存分配
idx = smallbin_index(nb);
bin = bin_at(av,idx);
if ( (victim = last(bin)) != bin) {
if (victim == 0) /* initialization check */
malloc_consolidate(av);
else {
bck = victim->bk;
if (__builtin_expect (bck->fd != victim, 0))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset(victim, nb);
bin->bk = bck;
bck->fd = bin;
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
}
/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
*/
else {
idx = largebin_index(nb);
if (have_fastchunks(av))
malloc_consolidate(av); // 合并fastbins的内存,把合并后的内存添加到unsorted bins的链表头部
}
/*
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.
The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.
*/
for(;;) {
int iters = 0;
// 从unsorted bins的结尾取一块chunk
while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
bck = victim->bk;
// 判断取出这块chunk的大小是否合适
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim));
size = chunksize(victim);
/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/
if (in_smallbin_range(nb) &&
bck == unsorted_chunks(av) &&
victim == av->last_remainder &&
(unsigned long)(size) > (unsigned long)(nb + MINSIZE)) {
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset(victim, nb);
unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks(av);
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
/* remove from unsorted list */
unsorted_chunks(av)->bk = bck;
bck->fd = unsorted_chunks(av);
/* Take now instead of binning if exact fit */
if (size == nb) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
/* place chunk in bin */
if (in_smallbin_range(size)) {
victim_index = smallbin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;
}
else {
victim_index = largebin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;
/* maintain large bins in sorted order */
if (fwd != bck) {
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) {
fwd = bck;
bck = bck->bk;
victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else {
assert((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long) size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert((fwd->size & NON_MAIN_ARENA) == 0);
}
if ((unsigned long) size == (unsigned long) fwd->size)
/* Always insert in the second position. */
fwd = fwd->fd;
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
}
} else
victim->fd_nextsize = victim->bk_nextsize = victim;
}
mark_bin(av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/
if (!in_smallbin_range(nb)) {
bin = bin_at(av, idx);
/* skip scan if empty or largest chunk is too small */
if ((victim = first(bin)) != bin &&
(unsigned long)(victim->size) >= (unsigned long)(nb)) {
victim = victim->bk_nextsize;
while (((unsigned long)(size = chunksize(victim)) <
(unsigned long)(nb)))
victim = victim->bk_nextsize;
/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last(bin) && victim->size == victim->fd->size)
victim = victim->fd;
remainder_size = size - nb;
unlink(victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else {
remainder = chunk_at_offset(victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__builtin_expect (fwd->bk != bck, 0))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.
The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/
++idx;
bin = bin_at(av,idx);
block = idx2block(idx);
map = av->binmap[block];
bit = idx2bit(idx);
for (;;) {
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0) {
do {
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
} while ( (map = av->binmap[block]) == 0);
bin = bin_at(av, (block << BINMAPSHIFT));
bit = 1;
}
/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0) {
bin = next_bin(bin);
bit <<= 1;
assert(bit != 0);
}
/* Inspect the bin. It is likely to be non-empty */
victim = last(bin);
/* If a false alarm (empty bin), clear the bit. */
if (victim == bin) {
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin(bin);
bit <<= 1;
}
else {
size = chunksize(victim);
/* We know the first chunk in this bin is big enough to use. */
assert((unsigned long)(size) >= (unsigned long)(nb));
remainder_size = size - nb;
/* unlink */
unlink(victim, bck, fwd);
/* Exhaust */
if (remainder_size < MINSIZE) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else {
remainder = chunk_at_offset(victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__builtin_expect (fwd->bk != bck, 0))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
/* advertise as last remainder */
if (in_smallbin_range(nb))
av->last_remainder = remainder;
if (!in_smallbin_range(remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).
We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/
victim = av->top;
size = chunksize(victim);
if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
remainder_size = size - nb;
remainder = chunk_at_offset(victim, nb);
av->top = remainder;
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
if (__builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
#ifdef ATOMIC_FASTBINS
/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (have_fastchunks(av)) {
malloc_consolidate(av);
/* restore original bin index */
if (in_smallbin_range(nb))
idx = smallbin_index(nb);
else
idx = largebin_index(nb);
}
#else
/*
If there is space available in fastbins, consolidate and retry,
to possibly avoid expanding memory. This can occur only if nb is
in smallbin range so we didn't consolidate upon entry.
*/
else if (have_fastchunks(av)) {
assert(in_smallbin_range(nb));
malloc_consolidate(av);
idx = smallbin_index(nb); /* restore original bin index */
}
#endif
/*
Otherwise, relay to handle system-dependent cases
*/
else {
void *p = sYSMALLOc(nb, av);
if (p != NULL && __builtin_expect (perturb_byte, 0))
alloc_perturb (p, bytes);
return p;
}
}
}
// 合并fast bins中的chunk,放入unsorted bin
#if __STD_C
static void malloc_consolidate(mstate av)
#else
static void malloc_consolidate(av) mstate av;
#endif
{
mfastbinptr* fb; /* current fastbin being consolidated */
mfastbinptr* maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */
/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;
/*
If max_fast is 0, we know that av hasn't
yet been initialized, in which case do so below
*/
if (get_max_fast () != 0) {
clear_fastchunks(av);
unsorted_bin = unsorted_chunks(av);
/*
Remove each chunk from fast bin and consolidate it, placing it
then in unsorted bin. Among other reasons for doing this,
placing in unsorted bin avoids needing to calculate actual bins
until malloc is sure that chunks aren't immediately going to be
reused anyway.
*/
#if 0
/* It is wrong to limit the fast bins to search using get_max_fast
because, except for the main arena, all the others might have
blocks in the high fast bins. It's not worth it anyway, just
search all bins all the time. */
maxfb = &fastbin (av, fastbin_index(get_max_fast ()));
#else
maxfb = &fastbin (av, NFASTBINS - 1);
#endif
fb = &fastbin (av, 0);
do {
#ifdef ATOMIC_FASTBINS
p = atomic_exchange_acq (fb, 0);
#else
p = *fb;
#endif
if (p != 0) {
#ifndef ATOMIC_FASTBINS
*fb = 0;
#endif
do {
check_inuse_chunk(av, p);
nextp = p->fd;
/* Slightly streamlined version of consolidation code in free() */
size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);
// 前一块内存空闲
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(p, bck, fwd);
}
if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
if (!nextinuse) {
size += nextsize;
unlink(nextchunk, bck, fwd);
} else
clear_inuse_bit_at_offset(nextchunk, 0);
first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;
if (!in_smallbin_range (size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}
else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}
} while ( (p = nextp) != 0);
}
} while (fb++ != maxfb);
}
else {
malloc_init_state(av);
check_malloc_state(av);
}
}
总结
glibc报错:
- "malloc(): memory corruption (fast)" 表示分配小于等于128B的内存块时,内存块的大小和在fast bins数组下标不对应。可能是该内存块数据被相连接的上一块内存的数据覆盖至少7B。
- glibc没有输出错误日志,而程序core掉了。gdb coredump文件时,显示assert断言错误。一般是分配的地址不合法。
- "malloc(): smallbin double linked list corrupted" 表示分配小于1KB内存时,在small bins数组对应下标的双链表异常,可能chunk的fd或bk指针被覆盖。
- "corrupted double-linked list" 链表内存块合并时双链表顺坏。
- "malloc(): memory corruption" 分配内存时,在unsorted bins有合适的内存,但是内存的size可能被覆盖导致太大或太小。
- "free(): invalid pointer" 释放内存时,释放的chunk的地址不正确,超出堆内存或者没有对齐到16字节。
- "free(): invalid size" 释放内存时size太小,小于32B。
- "free(): invalid next size (fast)" 释放内存时,该内存小于128B,该内存的下一块内存的size小于32B或大于系统堆内存。
- "double free or corruption (fasttop)" 释放内存时,检查到该fast bins数组已经有该和内存地址相等的内存。
- "invalid fastbin entry (free)" 释放内存时,该内存大小对应到fast bins数组的下标第一个chunk的大小计算出的下标和该内存下标不等。
- "free(): invalid next size (normal)" 释放内存,该内存块大于128B,而该内存的下一个内存的size小于32B或大于系统堆内存。
- "double free or corruption (!prev)" 释放内存,该内存的下一块内存已经标志该内存已释放了。可能该内存溢出导致。
一般内存溢出基本是字符串,如果是结构体赋值导致的溢出还真不好排查。通常可以把本chunk的size和分配的size打印出来,再把本chunk的地址-16B内存处的内容打印32B观察一下。
参考:
- http://www.valleytalk.org/wp-content/uploads/2015/02/glibc内存管理ptmalloc源代码分析1.pdf
- 还有理解了下面的程序对于理解malloc非常有意义。
# define offsetof(type,ident) ((size_t)&(((type*)0)->ident))
typedef struct _test
{
int a;
int b;
int x;
int y;
} test;
int main(void)
{
int xxx[2] = {2, 3};
test *t = (test *)((char*)xxx - offsetof(struct _test, x));
fprintf(stderr, "%d\n", t->x);
return 0;
}
稍稍还原一下malloc的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE_SZ (sizeof(size_t))
#define offsetof(type,ident) ((size_t)&(((type*)0)->ident))
struct malloc_chunk {
size_t prev_size; /* Size of previous chunk (if free). */
size_t size; /* Size in bytes, including overhead. */
struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};
typedef struct malloc_chunk* mchunkptr;
typedef struct malloc_chunk* mbinptr;
#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
#define bin_at(m, i) \
(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \
- offsetof (struct malloc_chunk, fd))
struct malloc_state {
mchunkptr bins[2];
};
int main(void)
{
struct malloc_state main_arena;
mbinptr chunk_head = bin_at(&main_arena, 1);
void *p = malloc(3);
chunk_head->fd = mem2chunk(p);
fprintf(stderr, "size: %lu\n", chunk_head->fd->size);
free(p);
return 0;
}