最近写一个小工具,顺便开始学用Python。觉得挺有趣(虽然OOP一直很让我头疼) 恰好昨天遇到一个问题,一个list中有许多重复的值,需要将它去重。找了半天,list没有相关的类似uniq这样的方法。于是上网找……结果找到一堆帖子都唆使大家用set。但是在我的机器上试了一下,还要import sets,否则……: Traceback (most recent call last): File "", line 1, in NameError: name 'Set' is not defined
但是我已经import了一堆东西,并且这个方式在效率上不怎么样,觉得很不爽。于是继续找……终于,在某个牛人的站点上找到一篇,解决了困扰我至少15分钟的问题…… 看了看文中提到的11种去重方式,选了一个最快的。 def f9(seq): # Not order preserving return {}.fromkeys(seq).keys()不需要import任何模块,直接用dictionary 内建的fromkeys和keys解决问题。很好,很强大。 上面提到的文章中一共给出了11种去重的方法,并且都以函数的方式定义了,直接粘出来就可以用(或者作为一个模块import之)。代码可以 为了避免以后找不到,我还是在这里粘出来吧……顺便再牢骚一下space没有代码高亮功能的问题…… -_-! - from random import shuffle, randint
-
import re
-
from sets import Set
-
-
def f1(seq): # Raymond Hettinger
-
# not order preserving
-
set = {}
-
map(set.__setitem__, seq, [])
-
return set.keys()
-
-
-
def f2(seq): # *********
-
# order preserving
-
checked = []
-
for e in seq:
-
if e not in checked:
-
checked.append(e)
-
return checked
-
-
def f3(seq):
-
# Not order preserving
-
keys = {}
-
for e in seq:
-
keys[e] = 1
-
return keys.keys()
-
-
def f4(seq): # ********** order preserving
-
noDupes = []
-
[noDupes.append(i) for i in seq if not noDupes.count(i)]
-
return noDupes
-
-
def f5(seq, idfun=None): # Alex Martelli ******* order preserving
-
if idfun is None:
-
def idfun(x): return x
-
seen = {}
-
result = []
-
for item in seq:
-
marker = idfun(item)
-
# in old Python versions:
-
# if seen.has_key(marker)
-
# but in new ones:
-
if marker in seen: continue
-
seen[marker] = 1
-
result.append(item)
-
return result
-
-
-
def f5b(seq, idfun=None): # Alex Martelli ******* order preserving
-
if idfun is None:
-
def idfun(x): return x
-
seen = {}
-
result = []
-
for item in seq:
-
marker = idfun(item)
-
# in old Python versions:
-
# if seen.has_key(marker)
-
# but in new ones:
-
if marker not in seen:
-
seen[marker] = 1
-
result.append(item)
-
-
return result
-
-
-
-
def f6(seq):
-
# Not order preserving
-
return list(Set(seq))
-
-
def f7(seq):
-
# Not order preserving
-
return list(set(seq))
-
-
def f8(seq): # Dave Kirby
-
# Order preserving
-
seen = set()
-
return [x for x in seq if x not in seen and not seen.add(x)]
-
-
def f9(seq):
-
# Not order preserving
-
return {}.fromkeys(seq).keys()
-
-
def f10(seq, idfun=None): # Andrew Dalke
-
# Order preserving
-
return list(_f10(seq, idfun))
-
-
def _f10(seq, idfun=None):
-
seen = set()
-
if idfun is None:
-
for x in seq:
-
if x in seen:
-
continue
-
seen.add(x)
-
yield x
-
else:
-
for x in seq:
-
x = idfun(x)
-
if x in seen:
-
continue
-
seen.add(x)
-
yield x
-
-
-
def f11(seq): # f10 but simpler
-
# Order preserving
-
return list(_f10(seq))
-
-
def _f11(seq):
-
seen = set()
-
for x in seq:
-
if x in seen:
-
continue
-
seen.add(x)
-
yield x
-
-
import time
-
-
def timing(f, n, a):
-
print f.__name__,
-
r = range(n)
-
t1 = time.clock()
-
for i in r:
-
f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)
-
t2 = time.clock()
-
print round(t2-t1, 3)
-
-
-
-
-
def getRandomString(length=10, loweronly=1, numbersonly=0,
-
lettersonly=0):
-
""" return a very random string """
-
_letters = 'abcdefghijklmnopqrstuvwxyz'
-
if numbersonly:
-
l = list('0123456789')
-
elif lettersonly:
-
l = list(_letters + _letters.upper())
-
else:
-
lowercase = _letters+'0123456789'*2
-
l = list(lowercase + lowercase.upper())
-
shuffle(l)
-
s = ''.join(l)
-
if len(s) < length:
-
s = s + getRandomString(loweronly=1)
-
s = s[:length]
-
if loweronly:
-
return s.lower()
-
else:
-
return s
-
-
testdata = {}
-
for i in range(35):
-
k = getRandomString(5, lettersonly=1)
-
v = getRandomString(100 )
-
testdata[k] = v
-
-
testdata = [int(x) for x in list('21354612')]
-
testdata += list('abcceeaa5efm')
-
class X:
-
def __init__(self, n):
-
self.foo = n
-
def __repr__(self):
-
return ""%self.foo
-
def __cmp__(self, e):
-
return cmp(self.foo, e.foo)
-
-
testdata = []
-
for i in range(10000):
-
testdata.append(getRandomString(3, loweronly=True))
-
#testdata = ['f','g','c','d','b','a','a']
-
-
-
order_preserving = f2, f4, f5, f5b, f8, f10, f11
-
order_preserving = f5, f5b, f8, f10, f11
-
-
not_order_preserving = f1, f3, f6, f7, f9
-
testfuncs = order_preserving + not_order_preserving
-
-
-
for f in testfuncs:
-
if f in order_preserving:
-
print "*",
-
timing(f, 100, testdata)
|