上周五完成了這個log分析
######################################
# 數據文件的格式如下:
#Software: Microsoft(R) Internet Security and Acceleration Server 2000
#Version: 1.0
#Date: 2005-10-13 00:00:05
#Fields: c-ip cs-username
c-agent sc-authenticated
date time
s-svcname s-computername
cs-referred r-host
r-ip r-port
time-taken cs-bytes
sc-bytes cs-protocol
cs-transport s-operation
cs-uri cs-mime-type
s-object-source sc-status
s-cache-info rule#1 rule#2
10.5.1.163 anonymous Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)
N 2005-10-13
00:00:05 w3proxy
SZFTP - -
- - -
268 - -
TCP GET
http://promos.hotbar.com/promos/promodll.dll?GetPromo&El=&SG=&RAND=13406&partner=hbtools&/p.gif
- - 12209
0x0 - -
10.5.1.163 anonymous Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)
N 2005-10-13
00:00:05 w3proxy
SZFTP - -
- - -
372 - -
TCP GET
http://promos.hotbar.com/promos/promodll.dll?GetPromo&El=&SG=&RAND=13406&partner=hbtools&/p.gif
- - 0
0x0 - -
10.5.1.163 YAHSINjinghua.lei
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)
Y 2005-10-13
00:00:05 w3proxy
SZFTP - -
- - -
528 - -
TCP GET
http://promos.hotbar.com/promos/promodll.dll?GetPromo&El=&SG=&RAND=13406&partner=hbtools&/p.gif
- - 12209
0x0 - -
10.7.64.31 yahsindanny.sun
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; MSN Messenger
7.0.0777) Y
2005-10-13 00:00:23
w3proxy SZFTP -
207.46.3.11 207.46.3.11
80 188 281
355 http TCP
POST
http://207.46.3.11/gateway/gateway.dll?Action=poll&SessionID=468608173.14050
application/x-msn-messenger Inet
200 0x40000004
all Allow rule
10.5.1.163 anonymous Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)
N 2005-10-13
00:00:40 w3proxy
SZFTP - -
- - -
268 - -
TCP GET
http://promos.hotbar.com/promos/promodll.dll?GetPromo&El=&SG=&RAND=99396&partner=hbtools&/p.gif
- - 12209
0x0 - -
10.5.1.163 anonymous Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)
N 2005-10-13
00:00:40 w3proxy
SZFTP - -
- - -
372 - -
TCP GET
http://promos.hotbar.com/promos/promodll.dll?GetPromo&El=&SG=&RAND=99396&partner=hbtools&/p.gif
- - 0
0x0 - -
#######################################
需要安裝python和Karrigell,
#######################################
# 第一個文件:
# index.py
import os
import datetime
workdir = "z:/"
os.chdir(workdir)
###############################################################
# Get LOG file list
file_list = []
date_list = []
file_list.append("file name")
date_list.append("date time")
for f in os.listdir(workdir):
if f[0:3] == "WEB":
file_list.append(f)
date_list.append(f[7:15])
###############################################################
# generate HTML Head
print """
<table width='100%'align='center'>
<tr>
<td>
<p align='center'>
<b><font size='7'>
web log analysis
</font></b>
</td>
</tr>
</table>
"""
###############################################################
# generate HTML Body
print "<table border='1' width='100%'align='center'>"
print "<tr><td width='30%' ><b><font size ='5'>" , date_list[0] , " </font></b></td>"
print "<td><b><font size='5'>" , file_list[0] , " </td></tr>"
for x in range(1,len(file_list)):
print "<tr>"
print "<td width='30%' ><a href='t1.py?file_name=" , file_list[x] , " '>"
print date_list[x]
print "</a></td>"
print "<td>" , file_list[x] , " </td></tr>"
print "</table>"
##############################################################
# generate HTML Tail
a = datetime.datetime.today().isoformat(' ')
b = string.split(a," ")
print """
<table width='100%' align='center'>
<tr>
<td>
Generate Date:
"""
print b[0],b[1]
print """
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td>
Power By : Python Script
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td> connect:
<a href='mailto:admin@aabb.com'>
admin@aabb.com
</a>
</td>
</tr>
</table>
"""
del file_list
del date_list
del a,b
######################## END File ########################
# 第二個文件:t1.py
#import cStringIO
import sys
import re
import string
import datetime
import os
from os.path import join
##############################################################
# Analisys information
workdir = 'z:/'
os.chdir(workdir)
fn = string.strip(QUERY["file_name"])
filename = workdir + fn
input = sys.stdin
input = open(filename)
#input = open("z:/WEB.log")
ip_list = []
user_list = []
url_list = []
#tver = ""
#tdate = ""
#tsoft = ""
for line in input.readlines():
list1 = line.split(" ")
if len(list1) > 1:
if list1[0][0:6] == "#Field":
ip_list.append("Client IP")
user_list.append("Account Name")
url_list.append("URL List")
else:
if (list1[0] not in ip_list) or (list1[1] not in user_list):
ip_list.append(list1[0])
user_list.append(list1[1])
url_list.append(list1[18])
else:
if line[0:4] == "#Sof":
tsoft = line
elif line[0:4] == "#Ver":
tver = line
elif line[0:4] == "#Dat":
tdate = line
input.close()
###############################################################################
# Generate HTML Head
print "<table border='1' width='100%'>" ,tsoft,"</table>"
print "<table border='1' width='100%'>",tdate,"</table>"
print "<table border='1' width='100%'>",tver,"</table>"
print "<table border='1' width='100%'>" , '#FileName:' , filename , "</table>"
###############################################################################
# Generate HTML Body
print """
<table border='1' width='100%'>
<tr>
<td width='15%'>
<b><font size='4'>
Client IP
</font></b>
</td>
<td width='20%'>
<b><font size='4'>
Account Name
</font></b>
</td>
<td>
<b><font size='4'>
URL List
</font></b>
</td>
</tr>
"""
for x in range(1,len(ip_list)):
#print ip_list[x]
print "<tr>"
print "<td width='15%' ><a href='filter.py?c_ip=" , ip_list[x] , "&type=c_ip &fn=" , fn , "'>"
print ip_list[x]
print "</a></td>"
print "<td width='20%' ><a href='filter.py?uname=" , user_list[x] , "&type=uname &fn=" , fn , "'>"
print user_list[x]
print "</a></td><td width='50%'><a href='" , url_list[x] , " '>"
print url_list[x]
print "</a></td></tr>"
print "</table>"
##############################################################
# generate HTML Tail
a = datetime.datetime.today().isoformat(' ')
b = string.split(a," ")
print """
<table width='100%' align='center'>
<tr>
<td>
Generate Date:
"""
print b[0],b[1]
print """
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td>
Power By : Python Script
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td> connect:
<a href='mailto:admin@aabb.com'>
admin@aabb.com
</a>
</td>
</tr>
</table>
"""
######################## Del variable ########################
#ip_list = []
#user_list = []
#url_list = []
del tver , tdate , tsoft , a , b
######################## END File ########################
# 第三個文件 filter.py
import os
import sys
import re
import datetime
workdir = 'z:/'
os.chdir(workdir)
fn = string.strip(QUERY["fn"])
filename = workdir + fn
ip_list = []
user_list = []
url_list = []
t_in = 0
t_out = 0
t_total = 0
#############################################################################
def grep_ip(ip):
t_in = 0
t_out = 0
t_total = 0
input = sys.stdin
input = open(filename)
#input = open("z:/WEB.log")
for line in input.readlines():
list1 = line.split(" ")
if len(list1) > 1:
if list1[0][0:6] == "#Field":
ip_list.append("Client IP")
user_list.append("Account Name")
url_list.append("URL List")
else:
if list1[0] == ip:
ip_list.append(list1[0])
user_list.append(list1[1])
url_list.append(list1[18])
if list1[13] != "-":
t_out = t_out + int(list1[13])
if list1[14] != "-":
t_in = t_in + int(list1[14])
t_total = t_total + t_out + t_in
else:
if line[0:4] == "#Sof":
tsoft = line
elif line[0:4] == "#Ver":
tver = line
elif line[0:4] == "#Dat":
tdate = line
input.close()
#print ip_list
#############################################################################
#
def grep_name(username):
t_in = 0
t_out = 0
t_total = 0
input = sys.stdin
input = open(filename)
#input = open("z:/WEB.log")
for line in input.readlines():
list1 = line.split(" ")
if len(list1) > 1:
if list1[0][0:6] == "#Field":
ip_list.append("Client IP")
user_list.append("Account Name")
url_list.append("URL List")
else:
if list1[1] == username:
ip_list.append(list1[0])
user_list.append(list1[1])
url_list.append(list1[18])
if list1[13] != "-":
t_out = t_out + int(list1[13])
if list1[14] != "-":
t_in = t_in + int(list1[14])
t_total = t_total + t_out + t_in
else:
if line[0:4] == "#Sof":
tsoft = line
elif line[0:4] == "#Ver":
tver = line
elif line[0:4] == "#Dat":
tdate = line
input.close()
#print user_list
######################## Generate HTML ##########################
def generate_html():
#### generate HTML Head
print """
<table width='100%'align='center'>
<tr>
<td>
<p align='center'>
<b><font size='7'>
web log analysis
</font></b>
</td>
</tr>
</table>
"""
#### generate HTML Body
print """
<table border='1' width='100%'>
<tr>
<td width='15%'>
<b><font size='4'>
Client IP
</font></b>
</td>
<td width='20%'>
<b><font size='4'>
Account Name
</font></b>
</td>
<td>
<b><font size='4'>
URL List
</font></b>
</td>
</tr>
"""
for x in range(1,len(ip_list)):
#print ip_list[x]
print "<tr>"
print "<td width='15%' ><a href='filter.py?c_ip=" , ip_list[x] , "&type=c_ip &fn=" , fn , "'>"
print ip_list[x]
print "</a></td>"
print "<td width='20%' ><a href='filter.py?uname=" , user_list[x] , "&type=uname &fn=" , fn , "'>"
print user_list[x]
print "</a></td><td width='50%'><a href='" , url_list[x] , " '>"
print url_list[x]
print "</a></td></tr>"
print "</table>"
#### generate HTML Tail
a = datetime.datetime.today().isoformat(' ')
b = string.split(a," ")
print """
<table width='100%' align='center'>
<tr>
<td>
Generate Date:
"""
print b[0],b[1]
print """
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td>
Power By : Python Script
</td>
</tr>
</table>
<table width='100%' align='center'>
<tr>
<td> connect:
<a href='mailto:admin@aabb.com'>
admin@aabb.com
</a>
</td>
</tr>
</table>
"""
#############################################################################
if QUERY.has_key("c_ip"):
tip = string.strip(QUERY["c_ip"])
ttype = string.strip(QUERY["type"])
grep_ip(tip)
generate_html()
if QUERY.has_key("uname"):
tun = string.strip(QUERY["uname"])
ttype = string.strip(QUERY["type"])
grep_name(tun)
generate_html()
ip_list = []
user_list = []
url_list = []
###############################################################################
還想添加一些功能,如前十名網站,前十名用戶,異常用戶,異常使用記錄等.
還要等幾天才行.