目前,关于IP地址的数据库,用得比较多的就是纯真数据库了,关于纯真数据库的具体格式,可以从这里找到,
最近,偶尔闲余也学下python,恰巧看见这个有意思的小东西,就学习了下其ip信息存储格式,为了纯粹学习python就用python简单写了个python版的解析代码,详见如下:
# -*- coding: utf-8 -*-
import os,sys
from struct import *
# 重定向方式
REDIRECT_MODE_1 = 1
REDIRECT_MODE_2 = 2
NATION_ERR = "未知国家"
AREA_ERR = "未知地区"
DEFAULT_ERR = "未知"
# 打开文件,读取整个文件数据
def open_db(path):
file_object = open(path,"rb")
try:
data = file_object.read()
finally:
file_object.close()
return data
# 从偏移offset处,读取4字节IP地址
def get_ip(buf,offset):
ip, = unpack('L',buf[offset:offset+4])
return ip
# 从偏移offset处,读取3字节的另一个偏移地址
def get_offset(buf,offset):
o, = unpack('L',buf[offset:offset+3]+b'\x00')
return o
# 从偏移offset处,读取重定向方式
def get_mode(buf,offset):
mode,=unpack('B',buf[offset:offset+1])
return mode
# 把字符串IP地址转为int,小端int
def ip_str2int(ipstr):
x = ipstr.split('.')
ipaddress = pack('BBBB',int(x[3]),int(x[2]),int(x[1]),int(x[0]))
ipaddress,= unpack('L',ipaddress)
return ipaddress
# 把小端int型IP数据转为字符串IP地址
def ip_int2str(ip):
c0 = ip >> 24
c1 = ip >> 16 & 0x00ff
c2 = ip >> 8 & 0x0000ff
c3 = ip & 0x000000ff
return str(c0)+'.'+str(c1)+'.'+str(c2)+'.'+str(c3)
# 解码数据,直到不能正常转化为GB2312
def a2g(binstr,encode='gb2312'):
out = ""
i = 0
while i try:
out+=binstr[i:i+2].decode(encoding=encode)
i+=2
except:
break
return out
# 在对应偏移上获取字符串,尝试两种解码GB2312和GBK
def get_str(buf,offset):
info = buf[offset:offset+256].split(b'\x00')
uip = ""
try:
uip = info[0].decode(encoding="gb2312")
except UnicodeDecodeError:
try:
uip = info[0].decode(encoding="gbk")
except:
uip = a2g(info[0])
if len(uip) == 0:
uip = DEFAULT_ERR
return uip
#获取对应便宜上的二进制字符串长度
def get_str_len(buf,offset):
info = buf[offset:offset+256].split(b'\x00')
return len(info[0])+1
# 读取区域信息,返回数据长度
def get_area(buf,offset):
mode = get_mode(buf,offset);
if(mode ==REDIRECT_MODE_1 or mode == REDIRECT_MODE_2):
areaoffset = get_offset(buf,offset+1)
if(areaoffset == 0):
return AREA_ERR
else:
return get_str(buf,areaoffset);
else:
return get_str(buf,offset);
# 获取国家和区域信息
def get_ipinfo(buf,offset):
nation = ""
area = ""
info_offset = get_offset(buf,offset+4)
area_offset = 0
mode_flag = get_mode(buf,info_offset+4)
if mode_flag == REDIRECT_MODE_1:
nationoffset = get_offset(buf,info_offset+5)
mode_flag = get_mode(buf,nationoffset)
if mode_flag == REDIRECT_MODE_2:
area_offset = nationoffset+4
nationoffset = get_offset(buf,nationoffset+1)
nation = get_str(buf,nationoffset)
else:
nation = get_str(buf,nationoffset)
# GB2312的字符串的bytes型数组长度需要*2,+上换行符1
area_offset = nationoffset+get_str_len(buf,nationoffset)
area = get_area(buf,area_offset)
elif mode_flag == REDIRECT_MODE_2:
nationoffset = get_offset(buf,info_offset+5)
nation = get_str(buf,nationoffset)
area = get_area(buf,info_offset+8)
else:
nation = get_str(buf,info_offset+4)
area = get_area(buf,info_offset+4+get_str_len(buf,info_offset+4))
return nation+":"+area
# 获取两条索引中间的那条索引的偏移
def get_middle_offset(begin,end):
records = int((end - begin) / 7);
records = records>>1;
if(records == 0):
records = 1;
return begin + records * 7;
# 获取一个特定的IP地址信息
def getlocation(filebuf,ipstr):
# 获取起始索引和最后一个索引的偏移
first_index_offset, = unpack('L',filebuf[0:4]) #int(x,2)
last_index_offset, = unpack('L',filebuf[4:8])
# 转为小端int型IP
ipaddress = ip_str2int(ipstr)
# 二分法,搜索对应的IP信息
isfind = 0
left = first_index_offset
right = last_index_offset
middle = 0
while left middle = get_middle_offset(left,right)
ipcur = get_ip(filebuf,middle)
if ipcur == ipaddress:
isfind = 1
break
elif ipcur > ipaddress:
if middle == right:
right -= 7
middle = right
else:
right = middle
else:
left = middle
if isfind:
return get_ipinfo(filebuf,middle)
return NATION_ERR+":"+AREA_ERR
# 一次性,枚举出所有的IP地址信息,以字典形式返回:[ip]=info
def get_ip_dict(filebuf):
dict = {}
# 获取起始索引和最后一个索引的偏移
first_index_offset, = unpack('L',filebuf[0:4]) #int(x,2)
last_index_offset, = unpack('L',filebuf[4:8])
# 没办法,循环获取出所有的IP地址信息
begin = first_index_offset
end = last_index_offset-7
while begin<end:
ipcur = get_ip(filebuf,begin)
ipstr_tmp = ip_int2str(ipcur)
dict[ipstr_tmp] = get_ipinfo(filebuf,begin)
print(begin,": ",ipstr_tmp,dict[ipstr_tmp])
# 转到下一个索引
begin+=7
return dict
# 输出IP信息
def print_ipinfo(ip):
print(ip,": ",getlocation(filebuf,ip))
# 单元测试例子
if __name__ == "__main__":
filebuf = open_db("d:\\qqwry.dat")
# 二分法查找,并且输出
print_ipinfo("211.144.174.197")
print_ipinfo("58.82.143.0")
# 全部枚举出来
ipdict = get_ip_dict(filebuf)
print(ipdict["1.1.128.0"])
如果有任何的错误,欢迎指正...
阅读(1284) | 评论(0) | 转发(0) |