python自动生成结构体打印函数-luoyan

菜鸟先飞

首页　| 　博文目录　| 　关于我

luoyan_xy

博客访问： 387779
博文数量： 73
博客积分： 3574
博客等级：中校
技术积分： 1503
用户组：普通用户
注册时间： 2010-03-26 11:17

文章分类

全部博文（73）

python（5）
解剖Twitter（10）
Ipv6（0）
Gtk（0）
linux网络协议栈（5）
Linux内核学习（6）
Linux文件系统（0）
Netfilter（6）
心情感悟（3）
编程技术（6）
读书笔记（2）
Linux系统命令（10）
疑难杂症（2）
Linux学习（17）
未分配的博文（1）

文章存档

2012年（14）

2011年（15）

2010年（44）

我的朋友

相关博文

python自动生成结构体打印函数

分类： Python/Ruby

2012-09-11 23:15:20

原始需求是在调试代码的过程中，经常需要插入一些printk或printf调试语句，而如果被调试的对象是一个结构体，并且结构体所包含的变量还比较多的时候，手写打印结构体内变量的函数就成了一个琐碎而又无聊的工作。

那么，有没有什么办法能够自动生成这些打印函数，在需要的时候直接编译，而不需要手动去一行行的敲代码呢。

当然，这个问题绝对不是我第一次遇到，网上查了一下，有不少人在问同样的问题，而在开源界也有一个实现此功能的开源软件，它的原理是利用gcc -g编译生成的二进制代码中已经存在打印每个结构体变量的函数，通过某种方法把这些code码直接抠出来使用。

这个方法当然比较高明了，恰巧最近在学习python的正则表达式，便想着能不能通过正则表达式去解析代码，生成一个.h或者.c文件，文件中包含针对每个结构体的打印函数，下面就可以直接在需要的位置添加这些函数，重新编译时把生成的文件编译进工程就可以了。

针对这个需求，有了以下的代码：

点击(此处)折叠或打开

#!/usr/bin/env python
#-*-coding:UTF-8 -*-
import re
import os
import sys
import getopt
print_symbol = {} #保存打印符号信息
print_func = {} #保存每一个结构体对应的打印函数
define_store = [] #保存分解出的所有typedef定义
struct_store = [] #保存分解出的所有struct结构列表
#默认写入文件为temp.c
target_file = './temp.c'
m = re.compile(r'struct\s*\w[\w_]*?\s*?{.*?}',re.S) #匹配struct(不支持嵌套)
n = re.compile(r'typedef.*?(?=;)') #匹配typedef定义
m1 = re.compile(r'/\*.*?\*/',re.S) #匹配/*...*/的注释
n1 = re.compile(r'//.*(?=\n)') #匹配//..的注释
m2 = re.compile(r'#if\s*?0.*?#endif',re.S) #匹配#if 0的注释(不支持嵌套)
#设置打印符号的默认配置
def set_default_conf():
#后期可考虑默认打印符号的自动化生成，如生成组合，确定打印类型等
print_symbol['int'] = '%d'
print_symbol['unsigned int'] = '%u'
print_symbol['long'] = '%l'
print_symbol['unsigned long'] = '%lu'
print_symbol['short'] = '%d'
print_symbol['unsigned short'] = '%u'
print_symbol['char'] = '%c'
print_symbol['unsigned char'] = '%u'
print_symbol['float'] = '%f'
print_symbol['double'] = '%f'
print_symbol['void *'] = '%p'
print_symbol['int *'] = '%p'
print_symbol['char *'] = '%p'
#处理单个define
def collect_each_define(define_item):
define_store.append(define_item)
#处理单个struct
def collect_each_struct(struct_item):
#删除注释应放到最前面，防止解析到已经注释到的结构体
#删除注释
#struct_item = m1.sub('',struct_item)
#struct_item = n1.sub('',struct_item)
struct_store.append(struct_item)
#处理数组赋值
def process_array(head,fp,line):
#print 'array\t' + line
#指针数组
if '*' in line:
flag_match = re.compile(r'\*+')
flag = ''.join(flag_match.findall(line))
style = flag_match.split(line)[0]
name_value = flag_match.split(line)[-1]
array_match1 = re.compile(r'\w\w*?(?=\[)')
array_match2 = re.compile(r'(?<=\[)\d+(?=\])')
name = array_match1.findall(line.split()[-1].strip())
value = array_match2.findall(line.split()[-1].strip())
name = ''.join(name).strip()
value = ''.join(value).strip()
style += flag
#普通数组
else:
style = (' '.join(line.split()[:-1])).strip()
#print style
#print line.split()[-1].strip()
array_match1 = re.compile(r'\w\w*?(?=\[)')
array_match2 = re.compile(r'(?<=\[)\d+(?=\])')
name = array_match1.findall(line.split()[-1].strip())
value = array_match2.findall(line.split()[-1].strip())
name = ''.join(name).strip()
value = ''.join(value).strip()
#style为类型，name为变量名，value为数组大小
if style in print_symbol.keys():
if '%' in print_symbol[style]:
fp.write('\n')
fp.write('''\tfor(i = 0 ; i < %s; i++)\n''' %value)
fp.write('''\t{\n''')
fp.write('''\t\tprintf("%s.%s <==> %s\\n",item.%s);\n''' %(head,name,print_symbol[style],name))
fp.write('''\t}\n''')
#不在打印符号表内的指针统一按照%p打印
elif '*' in style:
fp.write('\n')
fp.write('''\tfor(i = 0 ; i < %s; i++)\n''' %value)
fp.write('''\t{\n''')
fp.write('''\t\tprintf("%s.%s <==> %%p\\n",item.%s);\n''' %(head,name,name))
#print ('''\t\tprintf("%s.%s <==> %%p\\n",item.%s);\n''' %(head,name,print_symbol[style],name))
fp.write('''\t}\n''')
#处理位域赋值
def process_bit(head,fp,line):
#print 'bit\t' + line
bit_match = re.compile(r'\w+:\d+')
style_match = re.compile(r'\w+')
bit_list = bit_match.findall(line)
style = ''.join(style_match.findall(bit_match.sub('',line)))
if style in print_symbol.keys():
if '%' in print_symbol[style]:
for it in bit_list:
its = str(it)
fp.write('''\tprintf("%s.%s <==> %s\\n",item.%s);\n''' %(head,its[:its.index(':')],print_symbol[style],its[:its.index(':')]))
else:
fp.write('''printf("sizeof(%s.%s) %%d\n",sizeof(%s.%s));\n''' %(head,name,head,name))
#处理指针赋值
def process_pointer(head,fp,line):
#print 'pointer\t' + line
#数组处理优先级更高，确保在这个地方已经不存在指针数组
if '*' in line:
#print line
flag_match = re.compile(r'\*+')
flag = ''.join(flag_match.findall(line))
style = flag_match.split(line)[0]
name = flag_match.split(line)[-1]
style += flag
if style in print_symbol.keys():
if '%' in print_symbol[style]:
fp.write('''\tprintf("%s.%s <==> %s\\n",item.%s);\n''' %(head,name,print_symbol[style],name))
else:
fp.write('''\tprintf("%s.%s <==>%%p\\n",item.%s);\n''' %(head,name,name))
#处理标准赋值
def process_normal(head,fp,line):
#print 'normal\t' + line
style = (' '.join(line.split()[:-1])).strip()
name = line.split()[-1].strip()
if style in print_symbol.keys():
#如果已经存在%*的打印格式
if '%' in print_symbol[style]:
fp.write('''\tprintf("%s.%s <==> %s\\n",item.%s);\n''' %(head,name,print_symbol[style],name))
#pass
#这种情形应该为结构体中包含别的结构体变量
else:
#先规避，打印结构体大小，后续可直接嵌套打印函数
fp.write('''\tprintf("sizeof(%s.%s) %%d\\n",sizeof(%s.%s));\n''' %(head,name,head,name))
#pass
#解析struct中每一个定义语句
def process_line(head,fp,line):
#判断是否为位域定义(假定位域的定义中包含,)
if ',' in line:
process_bit(head,fp,line)
#判断是否为数组(数组的定义为包含一对中括号[]),对于指针数组优先为数组
elif '[' in line and ']' in line:
process_array(head,fp,line)
#判断指针
elif '*' in line:
process_pointer(head,fp,line)
#正常赋值(还有没有其他情况？)
else:
process_normal(head,fp,line)
#解析define
def reslove_each_define():
for item in define_store:
temp_match = re.compile('typedef')
item = temp_match.sub('',item)
#这块解析的好烂啊，复杂点的都解析不出来
#怎么解析指针类赋值,尤其函数指针???
temp_list = item.split(' ')
s = (' '.join(temp_list[:-1])).strip()
if s in print_symbol.keys():
print_symbol[temp_list[-1]] = print_symbol[s]
else:
#先保存define定义，在生成打印时再查找确认一次
print_symbol[temp_list[-1]] = s
#解析struct
def reslove_each_struct():
for item in struct_store:
#获取struct的名字
name_match = re.compile(r'(?<=struct)\s*\w\w*(?=\s*?{)')
name = name_match.findall(item)
name = (''.join(name)).strip()
#假设{}之间的为正文
index1 = item.find('{')
if index1 == -1:
print 'reslove failed for the error farmat struct'
return
index2 = item.find('}')
if index2 == -1:
print 'reslove failed for the error farmat struct'
return
fp = open(target_file,'a+')
fp.write('\n')
fp.write('void print_%s(%s item)\n'%(name,name))
fp.write('{\n')
fp.write('\tint i = 0; \n')
#print ('void print_%s(struct %s item)\n'%(name,name))
date = item[index1 +1 : index2]
item_match = re.compile(r'\w.*?(?=;)',re.S)
for line in item_match.findall(date):
process_line(name,fp,line)
fp.write('}\n')
fp.close()
#完成对单个文件的解析
def handle_file(test_file):
fp = open(test_file,'r')
context = fp.readlines()
context = ''.join(context)
context = m2.sub('',context)
context = m1.sub('',context)
context = n1.sub('',context)
file_define = []
file_struct = []
for i in n.findall(context):
collect_each_define(i)
for j in m.findall(context):
collect_each_struct(j)
'''
def write_to_file(target):
pass
'''
def print_help_info():
'''print function help information'''
print 'Usage: python get_struct.py [-f file -d directory -o target_file -h --help]'
print '-f 待解析的源文件'
print '-d 待解析的目录，将解析目录下所有的.c以及.h'
print '-o 打印函数保留文件，默认保留在当前目录下的temp.c文件中'
print '-h/--help 打印帮助信息'
if __name__ == '__main__':
if(len(sys.argv) <= 1):
print 'input the error args format'
try:
opts,args = getopt.getopt(sys.argv[1:],'f:d:ho:',['help'])
except getopt.GetoptError,err:
print str(err)
sys.exit(-1)
reslove_file = 0
reslove_dir = 0
for k,v in opts:
if k == '-f':
reslove_file = v
print reslove_file
elif k == '-d':
reslove_dir = v
print reslove_dir
elif k == '-o':
target_file = v
print target_file
elif k in ('-h','--help'):
print_help_info()
sys.exit()
else:
print 'Error,unhandled option'
sys.exit(-1)
set_default_conf()
if(reslove_file):
handle_file(reslove_file)
if(reslove_dir):
#文件夹支持
pass
#print print_symbol
#解析define以及struct
reslove_each_define()
reslove_each_struct()
#将解析结果写入文件
#write_to_file(reslove_file)

代码中注释自认为还是比较详细的，代码也不长，看懂应该没什么问题。

一共300行左右的代码，花了自己两个晚上看电影的时间，不过如果用C实现，用字符串去解析，那这个工程的复杂性还会增加，由此可见，python在效率（编码 + 调试 + 运行）上还是有一定的优势。

当然了，代码中也存在不少的问题，而且有一些严重的硬伤，究其愿意还在于用正则的语义去完成词法分析、语法分析的逻辑，还是稍有差别的。

如果有机会，以后会慢慢完善它；如果谁有一些好的建议，也可以交流~

阅读(6395) | 评论(0) | 转发(0) |

上一篇：创建python的C扩展

下一篇：Linux内核kprobe机制实现浅析

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6