blog
blog copied to clipboard
Python的内存模型架构
/* An object allocator for Python.
Here is an introduction to the layers of the Python memory architecture,
showing where the object allocator is actually used (layer +2), It is
called for every object allocation and deallocation (PyObject_New/Del),
unless the object-specific allocators implement a proprietary allocation
scheme (ex.: ints use a simple free list). This is also the place where
the cyclic garbage collector operates selectively on container objects.
Object-specific allocators
_____ ______ ______ ________
[ int ] [ dict ] [ list ] ... [ string ] Python core |
+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
_______________________________ | |
[ Python's object allocator ] | |
+2 | ####### Object memory ####### | <------ Internal buffers ------> |
______________________________________________________________ |
[ Python's raw memory allocator (PyMem_ API) ] |
+1 | <----- Python memory (under PyMem manager's control) ------> | |
__________________________________________________________________
[ Underlying general-purpose allocator (ex: C library malloc) ]
0 | <------ Virtual memory allocated for the python process -------> |
=========================================================================
_______________________________________________________________________
[ OS-specific Virtual Memory Manager (VMM) ]
-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
__________________________________ __________________________________
[ ] [ ]
-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
第0层以下是操作系统的功能。 第-2层是物理存储器 第-1层是操作系统特有的虚拟内存管理器 第0层是通用的内存管理器,比如C运行时所提供的malloc和free接口。
Layer 1
第1层是Python基于第0层简单包装而成的。PyMem_ API接口提供统一的raw memory管理接口。
我们先看下PyMem_Raw前缀的函数族,定义在Object/obmalloc.c中:
void *
PyMem_RawMalloc(size_t size)
{
if (size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
}
void *
PyMem_RawCalloc(size_t nelem, size_t elsize)
{
/* see PyMem_RawMalloc() */
if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
return NULL;
return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
}
void*
PyMem_RawRealloc(void *ptr, size_t new_size)
{
/* see PyMem_RawMalloc() */
if (new_size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
}
void PyMem_RawFree(void *ptr)
{
_PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
}
四个函数通过_PyMem_Raw分别调用了malloc, calloc, realloc, free成员函数。
static PyMemAllocatorEx _PyMem_Raw = {
NULL, PYRAW_FUNCS
#endif
};
#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree
// Include/pymen.h
typedef struct {
/* user context passed as the first argument to the 4 functions */
void *ctx;
/* allocate a memory block */
void* (*malloc) (void *ctx, size_t size);
/* allocate a memory block initialized by zeros */
void* (*calloc) (void *ctx, size_t nelem, size_t elsize);
/* allocate or resize a memory block */
void* (*realloc) (void *ctx, void *ptr, size_t new_size);
/* release a memory block */
void (*free) (void *ctx, void *ptr);
} PyMemAllocatorEx;
_PyMem_Raw 对应PyMemAllocatorEx结构,可见前面几个PyMem_RawXXX函数对应的实现:
PyMem_RawMalloc => _PyMem_RawMalloc PyMem_RawCalloc => _PyMem_RawCalloc PyMem_RawRealloc => _PyMem_RawRealloc PyMem_RawFree => _PyMem_RawFree
static void *
_PyMem_RawMalloc(void *ctx, size_t size)
{
/* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
for malloc(0), which would be treated as an error. Some platforms would
return a pointer with no memory behind it, which would break pymalloc.
To solve these problems, allocate an extra byte. */
if (size == 0)
size = 1;
return malloc(size);
}
static void *
_PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
{
/* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
for calloc(0, 0), which would be treated as an error. Some platforms
would return a pointer with no memory behind it, which would break
pymalloc. To solve these problems, allocate an extra byte. */
if (nelem == 0 || elsize == 0) {
nelem = 1;
elsize = 1;
}
return calloc(nelem, elsize);
}
static void *
_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
{
if (size == 0)
size = 1;
return realloc(ptr, size);
}
static void
_PyMem_RawFree(void *ctx, void *ptr)
{
free(ptr);
}
从上面的_PyMem_RawMalloc的实现来看,PyMem_RawMalloc函数只是简单的包装了第0层的malloc函数,对申请大小为0的内存这种特殊情况进行了处理。Python不允许申请大小为0的内存,它会强制将其装换为申请大小为1个字节的内存空间,从而避免了不同操作系统上的不同运行时的行为。其他_PyMem_Raw也做了类似处理。
在第1层中,Python除了提供与malloc等相同语义分配raw memory之外,还提供面向Python类型的内存分配器。
/*
* Type-oriented memory interface
* ==============================
*
* Allocate memory for n objects of the given type. Returns a new pointer
* or NULL if the request was too large or memory allocation failed. Use
* these macros rather than doing the multiplication yourself so that proper
* overflow checking is always done.
*/
#define PyMem_New(type, n) \
( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \
( (type *) PyMem_Malloc((n) * sizeof(type)) ) )
#define PyMem_NEW(type, n) \
( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \
( (type *) PyMem_MALLOC((n) * sizeof(type)) ) )
PyMem_Malloc和PyMem_RawMalloc一样,需要提供作为申请空间大小的参数,而PyMem_New宏定义只要提供类型和数量,Python会自动侦测所需的内存空间大小。
PyMem_MALLOC是PyMem_Malloc的宏定义。
/* Macros. */
/* PyMem_MALLOC(0) means malloc(1). Some systems would return NULL
for malloc(0), which would be treated as an error. Some platforms
would return a pointer with no memory behind it, which would break
pymalloc. To solve these problems, allocate an extra byte. */
/* Returns NULL to indicate error if a negative size or size larger than
Py_ssize_t can represent is supplied. Helps prevents security holes. */
#define PyMem_MALLOC(n) PyMem_Malloc(n)
#define PyMem_REALLOC(p, n) PyMem_Realloc(p, n)
#define PyMem_FREE(p) PyMem_Free(p)
我们来看看PyMem_前缀的函数族。
void *
PyMem_Malloc(size_t size)
{
/* see PyMem_RawMalloc() */
if (size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyMem.malloc(_PyMem.ctx, size);
}
void *
PyMem_Calloc(size_t nelem, size_t elsize)
{
/* see PyMem_RawMalloc() */
if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
return NULL;
return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
}
void *
PyMem_Realloc(void *ptr, size_t new_size)
{
/* see PyMem_RawMalloc() */
if (new_size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
}
void
PyMem_Free(void *ptr)
{
_PyMem.free(_PyMem.ctx, ptr);
}
_PyMem定义如下:
static PyMemAllocatorEx _PyMem = {
NULL, PYMEM_FUNCS
};
#ifdef WITH_PYMALLOC
# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free
#else
# define PYOBJ_FUNCS PYRAW_FUNCS
#endif
#define PYMEM_FUNCS PYOBJ_FUNCS
_PyMem对应的四个操作malloc, calloc, realloc, free的实现受WITH_PYMALLOC参数的影响,
当没有设置WITH_PYMALLOC参数,四个操作操作的实现由PYRAW_FUNCS来提供,也就是前面提到的PyMem_Raw前缀的函数族实现。
当设置了WITH_PYMALLOC参数,四个操作的实现分别对应了_PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free。
为了简化Python自身的开发,Python在比第一层更高的抽象层次上提供了第二层内存管理接口。在这一层,是一组以
PyObject_为前缀的函数族,主要提供了创建Python对象的接口。这一套函数族又被唤作Pymalloc机制,从Python2.1开始,它才慢慢登上历史舞台,在Python 2.1和Python 2.2中,这个机制是作为实验性质的机制,所以默认情况下是不打开的,只有自己通过——with-pymalloc编译符号重新编译Python,才能激活Pymalloc机制。在Python 2.3发布时,Pymalloc机制已经经过了长期的优化和稳定,终于登上了“正宫”的位置,在默认情况下被打开了。摘自:《Python源码剖析》 — 陈儒
Python2.3中的whatsnew关于Pymalloc机制的介绍 Pymalloc: A Specialized Object Allocator
Layer 2
第2层的PyObject_前缀的函数族定义如下:
void *
PyObject_Malloc(size_t size)
{
/* see PyMem_RawMalloc() */
if (size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyObject.malloc(_PyObject.ctx, size);
}
void *
PyObject_Calloc(size_t nelem, size_t elsize)
{
/* see PyMem_RawMalloc() */
if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
return NULL;
return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
}
void *
PyObject_Realloc(void *ptr, size_t new_size)
{
/* see PyMem_RawMalloc() */
if (new_size > (size_t)PY_SSIZE_T_MAX)
return NULL;
return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
}
void
PyObject_Free(void *ptr)
{
_PyObject.free(_PyObject.ctx, ptr);
}
_PyObject定义了使用PYOBJ_FUNCS为结构体PyMemAllocatorEx的成员函数:
static PyMemAllocatorEx _PyObject = {
#ifdef Py_DEBUG
&_PyMem_Debug.obj, PYDBG_FUNCS
#else
NULL, PYOBJ_FUNCS
#endif
};
因为前面提到Python2.3之后默认开启WITH_PYMALLOC参数,所以PYOBJ_FUNCS对应的定义:
# define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free
我们简单的看下_PyObject_Malloc和_PyObject_Calloc的定义:
static void *
_PyObject_Malloc(void *ctx, size_t nbytes)
{
return _PyObject_Alloc(0, ctx, 1, nbytes);
}
static void *
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
{
return _PyObject_Alloc(1, ctx, nelem, elsize);
}
_PyObject_Malloc和_PyObject_Calloc都是通过_PyObject_Alloc来实现的。
static void *
_PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
{
size_t nbytes;
block *bp;
poolp pool;
poolp next;
uint size;
_Py_AllocatedBlocks++;
/* 略去 */
if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
/* 当申请的内存大小小于等于512字节的内存分配 */
}
/* The small block allocator ends here. */
redirect:
/* Redirect the original request to the underlying (libc) allocator.
* We jump here on bigger requests, on error in the code above (as a
* last chance to serve the request) or when the max memory limit
* has been reached.
*/
{
void *result;
if (use_calloc)
result = PyMem_RawCalloc(nelem, elsize);
else
result = PyMem_RawMalloc(nbytes);
if (!result)
_Py_AllocatedBlocks--;
return result;
}
SMALL_REQUEST_THRESHOLD在Python3.7中的定义:
#define SMALL_REQUEST_THRESHOLD 512
当需要分配大于512字节时,调用前面提到的PyMem_Raw前缀的函数族。
在Python中,许多时候申请的内存都是小块的内存,这些小块内存在申请后,很快又会被释放,由于这些内存的申请并不是为了创建对象,所以并没有对象一级的内存池机制。这就意味着Python在运行期间会大量地执行malloc和free的操作,导致操作系统频繁地在用户态和核心态之间进行切换,这将严重影响Python的执行效率。为了提高Python的执行效率,Python引入了一个内存池机制,用于管理对小块内存的申请和释放。这也就是之前提到的Pymalloc机制。
摘自:《Python源码剖析》 — 陈儒
整个小块内存池可以视为一个层次结构,在这个层次结构中,从下至上分别是:block、pool、arena、内存池。
arena、pool、block三者的关系。

Block
在最底层,block是一个确定大小的内存块。在Python中,有很多种block,不同种类的block都有不同的内存大小,这个内存大小的值被称为size class。为了在当前主流的32位平台和64位平台上都能获得最佳的性能,所有的block的长度都是8字节对齐的。
摘自:《Python源码剖析》 — 陈儒
/*==========================================================================*/
/*
* Allocation strategy abstract:
*
* For small requests, the allocator sub-allocates <Big> blocks of memory.
* Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
* system's allocator.
*
* Small requests are grouped in size classes spaced 8 bytes apart, due
* to the required valid alignment of the returned address. Requests of
* a particular size are serviced from memory pools of 4K (one VMM page).
* Pools are fragmented on demand and contain free lists of blocks of one
* particular size class. In other words, there is a fixed-size allocator
* for each size class. Free pools are shared by the different allocators
* thus minimizing the space reserved for a particular size class.
*
* This allocation strategy is a variant of what is known as "simple
* segregated storage based on array of free lists". The main drawback of
* simple segregated storage is that we might end up with lot of reserved
* memory for the different free lists, which degenerate in time. To avoid
* this, we partition each free list in pools and we share dynamically the
* reserved space between all free lists. This technique is quite efficient
* for memory intensive programs which allocate mainly small-sized blocks.
*
* For small requests we have the following table:
*
* Request in bytes Size of allocated block Size class idx
* ----------------------------------------------------------------
* 1-8 8 0
* 9-16 16 1
* 17-24 24 2
* 25-32 32 3
* 33-40 40 4
* 41-48 48 5
* 49-56 56 6
* 57-64 64 7
* 65-72 72 8
* ... ... ...
* 497-504 504 62
* 505-512 512 63
*
* 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
* allocator.
*/
/*==========================================================================*/
对于小于512字节的小块内存分配,申请不同大小和对应的block的大小对应关系如上,每个size class都对应了一个size class index,这个index从0开始,在后面的内存池的使用中会用到它。
block是由pool来管理的。
Pool
一组block的集合称为一个pool,换句话说,一个pool管理着一堆有固定大小的内存块。pool的大小是4K字节,因为当前操作系统的内存页都是4K字节。
/*
* The system's VMM page size can be obtained on most unices with a
* getpagesize() call or deduced from various header files. To make
* things simpler, we assume that it is 4K, which is OK for most systems.
* It is probably better if this is the native page size, but it doesn't
* have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
* size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
* violation fault. 4K is apparently OK for all the platforms that python
* currently targets.
*/
#define SYSTEM_PAGE_SIZE (4 * 1024)
#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
...
/*
* Size of the pools used for small blocks. Should be a power of 2,
* between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
*/
#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
pool在Python中是有数据结构表示的:
/* When you say memory, my mind reasons in terms of (pointers to) blocks */
typedef uint8_t block;
/* Pool for small blocks. */
struct pool_header {
union { block *_padding;
uint count; } ref; /* number of allocated blocks */
block *freeblock; /* pool's free list head */
struct pool_header *nextpool; /* next pool of this size class */
struct pool_header *prevpool; /* previous pool "" */
uint arenaindex; /* index into arenas of base adr */
uint szidx; /* block size class index */
uint nextoffset; /* bytes to virgin block */
uint maxnextoffset; /* largest valid nextoffset */
};
typedef struct pool_header *poolp;
先来解释pool_header各个域的意思:
ref表示分配到pool里面的block数量,为什么会是一个union的原因我们先暂且不表;freeblock指针指向下一次分配block的起始位置;nextpool指针和prevpool指针分别指向双向链表的下一个pool和前一个pool,这边的双向链表我们也先暂且不表;arenaindex表示这个pool所属arena的索引;szidx表示分配的block大小,一个pool管理的block的大小是一样的;nextoffset表示freeblock的下一次偏移位置。maxnextoffset表示pool中最后一个block位置距pool的偏移。这意味这当nextoffset > maxnextoffset时,就没有可分配的block了。
pool_header顾名思义就是仅表示pool的头部,只占pool 4K大小的很小一部分,剩下的内存块由是pool维护的block的集合组成的。
这里需要提一下block的几种状态:
- 未使用
- 已经分配(正在使用)
- 使用完毕(空闲)
所有的block一开始是连续的未使用block。
当初始化pool的时候从pool的开头开始分配block,freeblock就会指向的是下一次分配block的起始位置。
当使用完毕的block被释放之后,就会被连接到作为空闲链表的freeblock中。

我们通过代码来看pool是怎么初始化和管理block的。
/*
* Initialize the pool header, set up the free list to
* contain just the second block, and return the first
* block.
*/
pool->szidx = size;
size = INDEX2SIZE(size);
bp = (block *)pool + POOL_OVERHEAD;
pool->nextoffset = POOL_OVERHEAD + (size << 1);
pool->maxnextoffset = POOL_SIZE - size;
pool->freeblock = bp + size;
*(block **)(pool->freeblock) = NULL;
if (use_calloc)
memset(bp, 0, nbytes);
return (void *)bp;
pool是指向4K大小的内存,size表示的是size class index。
设置szidx为size,并将size class index转换成对应的size。
#define ALIGNMENT_SHIFT 3
/* Return the number of bytes in size class I, as a uint. */
#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
比如size class index为3的话,size就是32字节。
bp表示block指针,第一块block地址就是偏移了pool_header的内存大小,并对齐的地址。
#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
分配的block可用的大小范围是[bp, bp+size]。可以理解block就是一个大小为size的unsigned char *数组BLOCK1,bp指向的是这个数组的起始位置BLOCK1[0]。
typedef unsigned char uint8_t;
typedef uint8_t block;
在nextoffset和maxnextoffset中存储的是相对于poo的偏移位置。
nextoffset的偏移量为POOL_OVERHEAD+ size * 2。maxnextoffset是pool中最后一个block位置距pool的偏移为POOL_SIZE- size。
freeblock指向了第二块block的开头bp+size,我们假设第二块block是BLOCK2数组。
(block **)(pool->freeblock)相当于freeblock++的操作,也就是指向了BLOCK2[1]的位置。Python没有为block提供结构体表示,是通过这个位置将空闲的block连接起来的,设置NULL表示链表的结束。
所以实际上在初始化pool之后,空闲链表freeblock是空链表,freeblock指向的是下一次分配block的起始位置,也是未使用的block的起始位置。
pool初始化之后的内存图:

我们再来看下释放block后,pool是怎么管理block的。
assert(pool->ref.count > 0); /* else it was empty */
*(block **)p = lastfree = pool->freeblock;
pool->freeblock = (block *)p;
p是指向释放的block指针。
释放时,空闲链表开头的地址会被写入作为释放对象block内,之后将释放完毕的block地址存到对应pool的freeblock中,形成block的空闲链表。
释放block之后的内存图:

pool_header在arena内将各个pool相连接,并保持pool内的block的大小和个数。
arena
在Python中,多个pool聚合的结果就是一个arena。arena的大小为256KB:
#define ARENA_SIZE (256 << 10) /* 256KB */
那么一个arena中容纳的最大pool个数就是 ARENA_SIZE / POOL_SIZE = 64个。
一个arena对应的结构体如下:
/* Record keeping for arenas. */
struct arena_object {
/* The address of the arena, as returned by malloc. Note that 0
* will never be returned by a successful malloc, and is used
* here to mark an arena_object that doesn't correspond to an
* allocated arena.
*/
uintptr_t address;
/* Pool-aligned pointer to the next pool to be carved off. */
block* pool_address;
/* The number of available pools in the arena: free pools + never-
* allocated pools.
*/
uint nfreepools;
/* The total number of pools in the arena, whether or not available. */
uint ntotalpools;
/* Singly-linked list of available pools. */
struct pool_header* freepools;
/* Whenever this arena_object is not associated with an allocated
* arena, the nextarena member is used to link all unassociated
* arena_objects in the singly-linked `unused_arena_objects` list.
* The prevarena member is unused in this case.
*
* When this arena_object is associated with an allocated arena
* with at least one available pool, both members are used in the
* doubly-linked `usable_arenas` list, which is maintained in
* increasing order of `nfreepools` values.
*
* Else this arena_object is associated with an allocated arena
* all of whose pools are in use. `nextarena` and `prevarena`
* are both meaningless in this case.
*/
struct arena_object* nextarena;
struct arena_object* prevarena;
};
address表示arena的地址;pool_address表示arena中未被利用的pool的起始地址;nfreepools表示arena中未被利用pool的数量;ntotalpools表示arena中pool的总数;freepools指向未被使用的pool的单向链表;nextarena和prevarena指向双向链表的下一个arena和前一个arena。
未被利用指的是pool未被初始化过,未被使用指的是pool已经被初始化过但pool里面的block都未使用。
这边有一个疑问,address域和pool_address域一开始是相等么?
实际上一开始address域和pool_address域的地址可能会不一样。原因是arena内的pool是4K字节对齐的,也就是说,每个pool开头的地址为4K的倍数。

除此之外,pool_address表示的是arena中未被利用的pool的起始地址,这个指针可能会随着arena中的pool被利用而向后移动。
arena_object结构体管理着arena。arena_object和arena在内存中的对应关系:

我们曾说arena是用来管理一组pool的集合的,
arena_object的作用看上去和pool_header的作用是一样的,但是实际上,pool_header管理的内存和areana_object管理的内存有一点细微的差别。pool_header管理的内存与pool_header自身是一块连续的内存,而areana_object与其管理的内存则是分离的。摘自:《Python源码剖析》 — 陈儒