本代码采用关键字匹配的方法,过滤出国内主流的浏览器以及对应的内核、操作系统以及硬件类型(主要针对Android手机)
说明:由于好多浏览器试图去兼容其他类型的浏览器,所以会在UA中写血多其他兼容的浏览器信息,
所以本代码中对这种类型的浏览器做了一下判断,选择了最外层的浏览器:
比如QQBrowser使用IE内核;而Maxthon兼容Chrome,Chrome兼容safari;
内核也是如此。
- class UAParse:
- def __init__(self):
- self.keywordsmap={'TencentTraveler':1,'QQBrowser':2,'Maxthon':3,'BIDUBrowser':4,'360SE':5,'TheWorld':6,'qihu theworld':7,'SE 2.X':8
- ,'Firefox':9,'Safari':10,'Chrome':11,'MSIE':12,'Opera':13
- ,'BdMobile':14,'MQQBrowser':15,'UCWEB':16,'NokiaBrowser':17,'UC':18
- ,'Iceweasel':19,'Mobile':20,'K-MeleonCCFME':21
- # kernel
- ,'Trident':101,'AppleWebKit':102,'Presto':103,'Gecko':104,'KHTML':105
- #OS
- ,'SymbianOS':201,'Mac OS X':202,'Android':203,'Windows NT':204,'Linux':205,
- }
- self.pattern='(%s)([0-9/. ]*)'%('|'.join(self.keywordsmap.keys()))
- self.cpat=re.compile(self.pattern)
- def uaparse(self,useragent):
- browser=''
- print useragent
- midx=[100,200,300]
- info={'brw':'','brwv':'','knl':'','knlv':'','os':'','osv':'','hard':''}
- for m in self.cpat.finditer(useragent):
- if not m.group(1):
- continue
- cidx=self.keywordsmap[m.group(1)]
- if cidx<100:
- if midx[0]>cidx:
- midx[0]=cidx
- info['brw']=m.group(1)
- info['brwv']=m.group(2)
- elif cidx<200:
- if midx[1]>cidx:
- midx[1]=cidx
- info['knl']=m.group(1)
- info['knlv']=m.group(2)
- elif cidx<300:
- if midx[2]>cidx:
- midx[2]=cidx
- info['os']=m.group(1)
- info['osv']=m.group(2)
- if info['os']=='Android':
- pos=useragent[m.end(2):].find(' Build')
- if pos>0:
- hard = re.search('([0-9a-zA-Z_-]+)( Build/)',useragent[useragent.rfind(';'):])
- if hard:
- info['hard']=hard.group(1)
- else:
- hard = re.search('; ([0-9a-zA-Z_ -]+)([0-9a-zA-Z_ /-]+\))',useragent[useragent.rfind(';'):])
- if hard:
- info['hard']=hard.group(1)
- elif info['os']=='Mac OS X':
- hard = re.search('\( *([a-zA-Z]+) *;',useragent[:m.start(1)])
- if hard:
- info['hard']=hard.group(1)
- version = re.search('OS ([0-9_]+)',useragent[:m.start(1)])
- if version:
- info['osv'] = version.group(1)
- elif info['os']=='Linux':
- version = re.search('(Linux) ([0-9a-zA-Z_-]+)',useragent[m.start(1):])
- if version:
- info['osv'] = version.group(2)
- info['brwv']=info['brwv'].strip(' /')
- info['knlv']=info['knlv'].strip(' /')
- info['osv']=info['osv'].strip(' /')
- #print info
- print '%s=>%s\t%s=>%s\t%s=>%s\t%s'%(info['brw'],info['brwv'],info['knl'],info['knlv'],info['os'],info['osv'],info['hard'])
- def test():
- obj = UAParse()
- obj.uaparse('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.12 (KHTML, like Gecko) Maxthon/3.4.1.1000 Chrome/18.0.966.0 Safari/535.12')
- obj.uaparse('MQQBrowser/3.7/Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn; GT-N7000 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1')
- obj.uaparse('MQQBrowser/2.7 Mozilla/5.0 (iPad; U; CPU OS 4_3_5 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Mobile/8L1 Safari/7534.48.3')
- obj.uaparse('Mozilla/5.0 (X11; U; Linux mips64; zh-CN; rv:1.9.0.11) Gecko/2009061212 Iceweasel/3.0.6 (Debian-3.0.6-1)')
阅读(5450) | 评论(0) | 转发(0) |