以MD5校验和的方式比较文件,遍历目录,将文件放入record={},在遍历过程中如果发现有相同的文件,则将相同的文件放入dup=[]
import hashlib
def checksum(file):
fp=open(file)
checksum=hashlib.md5()
while True:
buffer=fp.read(8192)
if not buffer:break
checksum.update(buffer)
fp.close()
checksum=checksum.digest()
return checksum
import os
def diskwalk(path):
fullpath=[]
for paths,dirs,files in os.walk(path):
for file in files:
filepath=os.path.join(paths,file)
fullpath.append(filepath)
return fullpath
def getsize(file):
size=os.stat(file)[6]
return size
path='/opt/python'
files=diskwalk(path)
dup=[]
for file in files:
compound_key=(getsize(file),checksum(file))
if compound_key in record:
dup.append(file)
else:
record[compound_key]=file
print record
print "###############"
print dup
阅读(1306) | 评论(0) | 转发(0) |