一个Python和C性能对比的好例子-me09-ChinaUnix博客

温厚谦和

首页　| 　博文目录　| 　关于我

me09

博客访问： 1513063
博文数量： 408
博客积分： 10036
博客等级：上将
技术积分： 4440
用户组：普通用户
注册时间： 2006-04-06 13:57

文章分类

全部博文（408）

VPS（0）
LDAP（6）
PHP（2）
UNIX系统编程（4）
系统安全（54）
服务器架设（11）
技术资料文摘（27）
mail（7）
数据库（52）
翰海风云（22）
考证测试试题（1）
windows2003资料（25）

技术动态（2）
软件测试技术文档（2）
CGI编程（5）
VPN（1）
bind（9）
perl（9）
Squid代理（1）
shell（2）
freeBSD（2）
python（11）
iptables（22）
程序人生（7）
编程基础（31）
文件系统（6）
系统基础（39）
负载均衡（13）
存储备份（14）
路由网络（5）
debian（15）
未分配的博文（3）

文章存档

2011年（1）

2010年（2）

2009年（1）

2008年（3）

2007年（7）

2006年（394）

我的朋友

相关博文

一个Python和C性能对比的好例子

分类： Python/Ruby

2006-06-20 16:55:41

VCKbase的论坛上有人提了一个问题：
遇到的问题是，我把E-mail地址收集在txt文件中（每行一个E-mail地址，共78000行），要求是把其中重复的E-mail地址全部去掉。考虑编程方便，我用了CString类，但似乎效率不高。我写出来的整理78000份E-mail地址的需要近7分钟的时间

我给他用Python实现了一个(这个版本是修改过的，当时我随手实现的一个版本做错了，呵呵)
这里算法的主要内容就是利用hash_table来判断email是否已经重复出现，这比字符串的查找要快的多。

#remove duplicated email address from file
import datetime

if __name__ == "__main__":
	t = datetime.datetime(2000,1,1)
	print str(t.today())
	hashtable = {}
	f = file("email.txt","r")
	f2 = file("email_new.txt","w")
	line = f.readline();
	while len(line)>0:
		if not hashtable.has_key(line): 
			hashtable[line] = 1
			f2.write(line)
		line = f.readline();
	f.close()
	f2.close()
	t2 = datetime.datetime(2000,1,1)
	print str(t2.today())

上面的算法在我的机器上：P4 2.8G,512M 内存，耗时大约300ms，跟他的7分钟差了天了。而我当时错误的算法，需要2分半钟，所以我贴到我们内部的BBS上，我的一个同学实现了如下算法(C):

#include 
#include 
#include 
#include "list.h"

#define HASH_SIZE 262157
#define LINE_MAX_LEN 1024

struct list_node {
	struct list_head head;
	int len;
	char *str;
};

struct list_head hash_table[HASH_SIZE];

static inline int
hash_string (const char *ptr, int len)
{
	unsigned int hash = 0;
	
	for (hash = 0; len; len--, ptr++) {
		/* (31 * hash) will probably be optimized to ((hash << 5) - hash). */
		hash = 31 * hash + *ptr;
	}
	
	return (hash % HASH_SIZE);
}

static inline int
cmpfn(const struct list_node *node, int len, char *str)
{
	return ((node->len == len) && (memcmp(node->str, str, len) == 0));
}

int main(int argc, char **argv)
{
	int i, hash, len;
	struct list_node *node;
	char buf[LINE_MAX_LEN];
	FILE *fp_in, *fp_out;
	
	if(argc != 3) {
		printf("error: \nplease use eml as:\n eml infile outfile\n");
		return -1;
	}
	fp_in = fopen(argv[1], "r");
	if(!fp_in) {
		printf("error: could not open infile %s\n", argv[1]);
		return -1;
	}
	fp_out = fopen(argv[2], "w");
	if(!fp_out) {
		printf("error: could not open infile %s\n", argv[2]);
		return -1;
	}
	for(i=0; istr = (char *)malloc(len + 1);
			node->len = len;
			memcpy(node->str, buf, len);
			node->str[len] = '\0';
			list_append(&(hash_table[hash]), node);
		}
	}

	return 0;
}

还有.h文件：

#ifndef _LINUX_LIST_H
#define _LINUX_LIST_H

extern inline void prefetch(const void *x)
{
	return;
}
/*
 * Simple doubly linked list implementation.
 *
 * Some of the internal functions ("__xxx") are useful when
 * manipulating whole lists rather than single entries, as
 * sometimes we already know the next/prev entries and we can
 * generate better code by using them directly rather than
 * using the generic single-entry routines.
 */

struct list_head {
	struct list_head *next, *prev;
};

#define LIST_HEAD_INIT(name) { &(name), &(name) }

#define LIST_HEAD(name) \
	struct list_head name = LIST_HEAD_INIT(name)

#define INIT_LIST_HEAD(ptr) do { \
	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
} while (0)

/*
 * Insert a new entry between two known consecutive entries. 
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void
__list_add(struct list_head *new,
	   struct list_head *prev, struct list_head *next)
{
	next->prev = new;
	new->next = next;
	new->prev = prev;
	prev->next = new;
}

/**
 * list_add - add a new entry
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
static inline void
list_add(struct list_head *new, struct list_head *head)
{
	__list_add(new, head, head->next);
}

/**
 * list_add_tail - add a new entry
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 */
static inline void
list_add_tail(struct list_head *new, struct list_head *head)
{
	__list_add(new, head->prev, head);
}

/*
 * Delete a list entry by making the prev/next entries
 * point to each other.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void
__list_del(struct list_head *prev, struct list_head *next)
{
	next->prev = prev;
	prev->next = next;
}

/**
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
 */
static inline void
list_del(struct list_head *entry)
{
	__list_del(entry->prev, entry->next);
	entry->next = (void *) 0;
	entry->prev = (void *) 0;
}

/**
 * list_del_init - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 */
static inline void
list_del_init(struct list_head *entry)
{
	__list_del(entry->prev, entry->next);
	INIT_LIST_HEAD(entry);
}

/**
 * list_move - delete from one list and add as another's head
 * @list: the entry to move
 * @head: the head that will precede our entry
 */
static inline void
list_move(struct list_head *list, struct list_head *head)
{
	__list_del(list->prev, list->next);
	list_add(list, head);
}

/**
 * list_move_tail - delete from one list and add as another's tail
 * @list: the entry to move
 * @head: the head that will follow our entry
 */
static inline void
list_move_tail(struct list_head *list, struct list_head *head)
{
	__list_del(list->prev, list->next);
	list_add_tail(list, head);
}

/**
 * list_empty - tests whether a list is empty
 * @head: the list to test.
 */
static inline int
list_empty(struct list_head *head)
{
	return head->next == head;
}

static inline void
__list_splice(struct list_head *list, struct list_head *head)
{
	struct list_head *first = list->next;
	struct list_head *last = list->prev;
	struct list_head *at = head->next;

	first->prev = head;
	head->next = first;

	last->next = at;
	at->prev = last;
}

/**
 * list_splice - join two lists
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 */
static inline void
list_splice(struct list_head *list, struct list_head *head)
{
	if (!list_empty(list))
		__list_splice(list, head);
}

/**
 * list_splice_init - join two lists and reinitialise the emptied list.
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 *
 * The list at @list is reinitialised
 */
static inline void
list_splice_init(struct list_head *list, struct list_head *head)
{
	if (!list_empty(list)) {
		__list_splice(list, head);
		INIT_LIST_HEAD(list);
	}
}

/**
 * list_entry - get the struct for this entry
 * @ptr:	the &struct list_head pointer.
 * @type:	the type of the struct this is embedded in.
 * @member:	the name of the list_struct within the struct.
 */
#define list_entry(ptr, type, member) \
	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))

/**
 * list_for_each	-	iterate over a list
 * @pos:	the &struct list_head to use as a loop counter.
 * @head:	the head for your list.
 */
#define list_for_each(pos, head) \
	for (pos = (head)->next, prefetch(pos->next); pos != (head); \
        	pos = pos->next, prefetch(pos->next))

/**
 * list_for_each_safe	-	iterate over a list safe against removal of list entry
 * @pos:	the &struct list_head to use as a loop counter.
 * @n:		another &struct list_head to use as temporary storage
 * @head:	the head for your list.
 */
#define list_for_each_safe(pos, n, head) \
	for (pos = (head)->next, n = pos->next; pos != (head); \
		pos = n, n = pos->next)

/**
 * list_for_each_entry	-	iterate over list of given type
 * @pos:	the type * to use as a loop counter.
 * @head:	the head for your list.
 * @member:	the name of the list_struct within the struct.
 */
#define list_for_each_entry(pos, head, member)				\
	for (pos = list_entry((head)->next, typeof(*pos), member),	\
		     prefetch(pos->member.next);			\
	     &pos->member != (head); 					\
	     pos = list_entry(pos->member.next, typeof(*pos), member),	\
		     prefetch(pos->member.next))

/*this is the begin of listhelp ;-)*/

#define LIST_FIND(head, cmpfn, type, args...)		\
({							\
	const struct list_head *__i = (head);		\
							\
	do {						\
		__i = __i->next;			\
		if (__i == (head)) {			\
			__i = NULL;			\
			break;				\
		}					\
	} while (!cmpfn((const type)__i , ## args));	\
	(type)__i;					\
})

#define LIST_FIND_W(head, cmpfn, type, args...)	\
({						\
	const struct list_head *__i = (head);	\
						\
	do {					\
		__i = __i->next;		\
		if (__i == (head)) {		\
			__i = NULL;		\
			break;			\
		}				\
	} while (!cmpfn((type)__i , ## args));	\
	(type)__i;				\
})

static inline int
__list_cmp_same(const void *p1, const void *p2)
{
	return p1 == p2;
}

/* Is this entry in the list? */
static inline int
list_inlist(struct list_head *head, const void *entry)
{
	return LIST_FIND(head, __list_cmp_same, void *, entry) !=NULL;
}

/* Delete from list. */
#ifdef CONFIG_NETFILTER_DEBUG
#define LIST_DELETE(head, oldentry)					\
do {									\
	if (!list_inlist(head, oldentry))				\
		printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n",	\
		       __FILE__, __LINE__, #oldentry, oldentry, #head);	\
        else list_del((struct list_head *)oldentry);			\
} while(0)
#else
#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry)
#endif

/* Append. */
static inline void
list_append(struct list_head *head, void *new)
{
	list_add((new), (head)->prev);
}

/* Prepend. */
static inline void
list_prepend(struct list_head *head, void *new)
{
	list_add(new, head);
}

/* Insert according to ordering function; insert before first true. */
#define LIST_INSERT(head, new, cmpfn)				\
do {								\
	struct list_head *__i;					\
	for (__i = (head)->next;				\
	     !cmpfn((new), (typeof (new))__i) && __i != (head);	\
	     __i = __i->next);					\
	list_add((struct list_head *)(new), __i->prev);		\
} while(0)

/* If the field after the list_head is a nul-terminated string, you
   can use these functions. */
static inline int
__list_cmp_name(const void *i, const char *name)
{
	return strcmp(name, i + sizeof (struct list_head)) == 0;
}

static inline int
__list_cmp_nameptr(const void *i, const char *name)
{
	return strcmp(name, ((char *)
			     *(unsigned long *) (i +
						 sizeof (struct list_head)))) ==
	    0;
}

/* Returns false if same name already in list, otherwise does insert. */
static inline int
list_named_insert(struct list_head *head, void *new)
{
	if (LIST_FIND(head, __list_cmp_name, void *,
		      new + sizeof (struct list_head)))
		 return 0;
	list_prepend(head, new);
	return 1;
}

static inline int
list_nameptr_insert(struct list_head *head, void *new)
{
	if (LIST_FIND(head, __list_cmp_nameptr, void *,
		      new + sizeof (struct list_head)))
		 return 0;
	list_prepend(head, new);
	return 1;
}

/* Find this named element in the list. */
#define list_named_find(head, name)			\
LIST_FIND(head, __list_cmp_name, void *, name)

#define list_nameptr_find(head, name)			\
LIST_FIND(head, __list_cmp_nameptr, void *, name)

#endif

在我的机器上测试(cygwin)：耗时竟然是400ms!
当然，这里包含了Cygwin的间接调用开销。

很难想象吧，Python在这里的性能竟然跟C不相上下！而且Python的代码比C不知道短了多少倍！
所以讲，Python是蛮好用的，:-).
不过，这个例子其实是个特殊情况，以前我们玩过的另外一个程序，C的性能就比Python快得多。不过代码量还是Python简短得多。有兴趣的朋友可以用自己熟悉的语言来测试一下，下面是测试文件(也是我用11行python代码产生的,有1/10的email是重复的)：

阅读(1285) | 评论(0) | 转发(0) |

上一篇：Linux 程序的命令行处理

下一篇：MySQL用户管理

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6