在http://blog.chinaunix.net/uid-22334392-id-3497189.html中,我们给出了sizeof(PyIntObject)的大小为12
在http://blog.chinaunix.net/uid-22334392-id-3495659.html中,我们总结了python所有的对象(包括对象,类型对象,类型对象的类型)的开始部分总是相同的,即具有如下的结构:
-
/* PyObject_HEAD defines the initial segment of every PyObject. */
-
#define PyObject_HEAD
-
Py_ssize_t ob_refcnt;
-
struct _typeobject *ob_type;
一个int对象的结构如下:
-
typedef struct {
-
PyObject_HEAD
-
long ob_ival;
-
} PyIntObject;
这一点是我们后续继续研究的基础,也是python源码分析的出发点。
int对象的class为PyIntObject,而该对象通过ob_type指针指向了该对象所说的类型对象PyInt_Type,而该对象PyInt_Type同样通过
ob_type指向了所有对象的基类:PyType_Type,PyType_Type则指向了自己。
在python源码分析这本书,指明了python整数对象的缓存机制,即对使用非常频繁的小整数是进行缓存的,小整数的范围是可以调整的,具体的范围在2.7中是可以重新调整的。
-
#define BLOCK_SIZE 1000 /* 1K less typical malloc overhead */
-
#define BHEAD_SIZE 8 /* Enough for a 64-bit pointer */
-
#define N_INTOBJECTS ((BLOCK_SIZE - BHEAD_SIZE) / sizeof(PyIntObject))
-
-
struct _intblock {
-
struct _intblock *next;
-
PyIntObject objects[N_INTOBJECTS];
-
};
-
-
typedef struct _intblock PyIntBlock;
-
-
static PyIntBlock *block_list = NULL;
-
static PyIntObject *free_list = NULL;
-
#ifndef NSMALLPOSINTS
-
#define NSMALLPOSINTS 257
-
#endif
-
#ifndef NSMALLNEGINTS
-
#define NSMALLNEGINTS 5
-
#endif
-
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
-
/* References to small integers are saved in this array so that they
-
can be shared.
-
The integers that are saved are those in the range
-
-NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive).
-
*/
-
static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS];
-
#endif
通过使用PyIntBlock,python缓存了-5到257之间的所有整数,共262,这些对象都是放在PyIntBlock上的,一个PyIntBlock可以存放82个对象,因此在缓存262小整数的时候,python就需要创建4个PyIntBlock,这些PyIntBlock通过PyIntBlock的next指针进行连接,这都不是重点,我们重点研究的在每一个PyIntBlock,82个对象所形成的单向链表
-
fill_free_list(void)
-
{
-
PyIntObject *p, *q;
-
/* Python's object allocator isn't appropriate for large blocks. */
-
p = (PyIntObject *) PyMem_MALLOC(sizeof(PyIntBlock));
-
if (p == NULL)
-
return (PyIntObject *) PyErr_NoMemory();
-
((PyIntBlock *)p)->next = block_list;
-
block_list = (PyIntBlock *)p;
-
/* Link the int objects together, from rear to front, then return
-
the address of the last int object in the block. */
-
p = &((PyIntBlock *)p)->objects[0];
-
q = p + N_INTOBJECTS;
-
while (--q > p)
-
Py_TYPE(q) = (struct _typeobject *)(q-1);
-
Py_TYPE(q) = NULL;
-
return p + N_INTOBJECTS - 1;
-
}
-
PyObject *
-
PyInt_FromLong(long ival)
-
{
-
register PyIntObject *v;
-
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
-
if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) {
-
v = small_ints[ival + NSMALLNEGINTS];
-
Py_INCREF(v);
-
#ifdef COUNT_ALLOCS
-
if (ival >= 0)
-
quick_int_allocs++;
-
else
-
quick_neg_int_allocs++;
-
#endif
-
return (PyObject *) v;
-
}
-
#endif
-
if (free_list == NULL) {
-
if ((free_list = fill_free_list()) == NULL)
-
return NULL;
-
}
-
/* Inline PyObject_New */
-
v = free_list;
-
free_list = (PyIntObject *)Py_TYPE(v);
-
PyObject_INIT(v, &PyInt_Type);
-
v->ob_ival = ival;
-
return (PyObject *) v;
-
}
从这两个函数,我们可以清晰的看到,小整数确实是缓存到了
PyIntBlock,为了验证确实如此,我们修改print_int代码后,进行
确认:
-
/*add by kinfinger */
-
435 static void print_block(PyIntBlock * pblock,int n){
-
436 static PyIntObject *freelist,*qfree;
-
437 freelist = free_list;
-
438
-
439 PyIntObject * pint, *qint;
-
440 pint =&pblock->objects[0];
-
441 qint = pint + N_INTOBJECTS;
-
442 while( --qint >= pint){
-
443 /* if (qint == freelist ) blank block
-
444 freelist = (PyIntObject *) freelist->ob_type;
-
445 else */
-
446 printf("value %8ld at @ %p with ref count %4d in %4d IN TBLOCK n", qint->ob_ival,&(qint->ob_ival),qint->ob_refcnt,n);
-
447 }
-
448 }
-
-
static int
-
451 int_print(PyIntObject *v, FILE *fp, int flags)
-
452 /* flags -- not used but required by interface */
-
453 {
-
454 long int_val = v->ob_ival;
-
455 Py_BEGIN_ALLOW_THREADS
-
456 fprintf(fp, "%ld", int_val);
-
457 printf("nthe below is something new n");
-
458 /*******add by kinfinger */
-
459
-
460 PyIntBlock * pblock,*qblock;
-
461 pblock = block_list;
-
462 int n = 0;
-
463 while(pblock){
-
464 n += 1;
-
465 qblock = pblock->next;
-
466 print_block(pblock,n);
-
467 pblock = qblock;
-
468 }
-
469 printf("the free_list add is %pn",&(free_list->ob_ival));
-
470
-
471 Py_END_ALLOW_THREADS
-
472 return 0
然后重新编译,运行后得到如下结果:
从上述输出,我们可以清晰地看到小整数是缓存到INTBLOCK链表的尾部,同时在每一个INTBLOCK内部,两个小整数之间的地址之差刚好为12
同我们的结果相符,同时发现在INTBLOCk 1的开始部分有些没有被初始化,即free_list的部分,值都是不可信的
仔细的看一下源代码你会发现:
-
while (--q > p)
-
Py_TYPE(q) = (struct _typeobject *)(q-1);
你会发现在形成单向链表的时候,python使用了一个小技巧,即所有的对象都具有相同的开头部分,即ref_cnt,ob_type,
这样,暴漏出来的问题就是对小整形对象的类型检查无效了,那么python是如何进行类型检查的呢?
为此,我们修改类型检查的定义:
-
/* add by kinfinger */
-
423
-
424 #define CONVERT_TO_LONG(obj, lng)
-
425 if (PyInt_Check(obj)) {
-
426 printf("small int check or notn");
-
427 lng = PyInt_AS_LONG(obj);
-
428 }
-
429 else {
-
430 Py_INCREF(Py_NotImplemented);
-
431 return Py_NotImplemented;
-
432 }
发现当int为小整数是,是不进行类型检查的,只有当整数达到一定的范围时(2<<16-1)才进行真正的类型检查,这个没有在源代码发现,有知道的筒子可以告诉我。
至于大整数的缓存也是通过INTBLOCK来实现的,源码分析书中写的比较详细,在此不在介绍。
还有一个比较奇怪的现象是在对象进行初始化的时候,使用如下代码:
-
PyObject_INIT(v, &PyInt_Type);
-
before image:
-
PyObject *
-
PyObject_Init(PyObject *op, PyTypeObject *tp)
-
{
-
if (op == NULL)
-
return PyErr_NoMemory();
-
/* Any changes should be reflected in PyObject_INIT (objimpl.h) */
-
Py_TYPE(op) = tp;
-
_Py_NewReference(op);
-
return op;
-
}
-
after image:
-
/* this is edit by kinfinger */
218 long int_call = 0;
219 PyObject *
220 PyObject_Init(PyObject *op, PyTypeObject *tp)
221 {
222 if (op == NULL)
223 return PyErr_NoMemory();
224 /* Any changes should be reflected in PyObject_INIT (objimpl.h) */
225 printf("%s type \r\n",tp->tp_name);
226 if (strcmp(tp->tp_name ,"int") == 0 ){ /* just for int */
227 int_call += 1;
228 printf("int call number is %ld \n",int_call);
229 }
230 /* int_call += 1;
231 printf("int call number is %ld \n",int_call);
232 printf("in PyObject_init\r\n"); */
233 Py_TYPE(op) = tp;
234 _Py_NewReference(op);
235 return op;
236 }
貌似这是python所有对象的初始化部分,但是我们在进行确认的时候,发现程序打印出来的是类似如下消息:
buffer type
PyCapsule type
code type
int call number is 617
其中 int_call 的大小和我们的预期相符,每创建一个整数,该值加1,但是上面的信息确让人看不懂,留待以后研究
-
>>> a = 1
-
>>> type(a)
-
<type 'int'>
-
>>> type(int)
-
<type 'type'>
-
>>> b =1
-
>>> print hex(id(a)),hex(id(b))
-
0x160a2d8 0x160a2d8
ref:
python源码分析
python manual
python源代码
阅读(3326) | 评论(0) | 转发(0) |