netfilter已经提供了基础设施,注册自己的nf_sockopt_ops就能通过套接字被用户程序访问。
我想为用户程序提供如下信息
1.获取tctable信息(版本等),用于验证用户程序和内核中tctable是否匹配
2.获取当前tc_counter个数,用于估计接收所有tc_counter所需空间
3.获取所有tc_counter信息
4.获取某个ip的tc_counter信息
5.获取某个ip的所有ipc_counter信息
在ip_conntrack.h中增加
struct ipc_counter_user
{
struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
struct
{
u_int64_t pcnt, bcnt; /*Packet and byte counters */
}
cnt[IP_CT_DIR_MAX];//两个方向
//unsigned long create;
unsigned long last; /*最后一次更新时的jiffies值*/
u_int64_t id;
};
ip_conntrack_core.c中init_conntrack函数
/* Mark clearly that it's not in the hash table. */
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list.next = NULL;
INIT_LIST_HEAD(&conntrack->counter.list);//新增
spin_lock_init(&conntrack->counter.lock);
如下是内核程序tctable_filter.c,修正了部分bug
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define BUG(msg) do { \
printk(KERN_ERR "%s,kernel BUG at %s:%d!\n", msg, __FILE__, __LINE__); \
__asm__ __volatile__(".byte 0x0f,0x0b"); \
} while (0)
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&tc_counter_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&tc_counter_lock)
#include
#include
#include
enum tc_dir_enum
{
TC_DIR_TX,
TC_DIR_RX,
TC_DIR_MAX
};
enum tc_proto_enum
{
TC_PROTO_TCP,
TC_PROTO_UDP,
TC_PROTO_ICMP,
TC_PROTO_OTHER,
TC_PROTO_MAX
};
DECLARE_RWLOCK(tc_counter_lock);
static unsigned int tc_counter_htable_size = 8209;//借用ELF中对符号的hash
static int tc_counter_max = 4099;//最大不超过4099个ip地址
static atomic_t tc_counter_count = ATOMIC_INIT(0);
static struct list_head *tc_counter_hash;
static kmem_cache_t *tc_counter_cachep;
static unsigned long tc_counter_timeout=15*60*HZ;
static char tcdevname[IFNAMSIZ] = { "eth0" };
static char * tc_dev;
MODULE_PARM(tc_dev, "s");
struct tc_counter
{
struct list_head list;//链入hash表
u_int32_t ip;//ip地址,可作为host_id
struct
{
struct
{
u_int64_t pcnt, bcnt; /*Packet and byte counters */
}
dir[TC_DIR_MAX];//两个方向,rx和tx
u_int32_t conn;//连接数
}
proto[TC_PROTO_MAX];//tcp,udp,icmp,ohter四个
unsigned long last; /*最后一次更新时的jiffies值*/
spinlock_t lock;//更新锁
struct timer_list timeout;//本结构超时
rwlock_t conn_lock;
struct list_head conn_list;//属于本ip的连接
u_int64_t next_conn_id;//下一个连接的id
atomic_t use;//本结构引用
};
struct tc_counter_user
{
u_int32_t ip;//ip地址,可作为host_id
struct
{
struct
{
u_int64_t pcnt, bcnt; /*Packet and byte counters */
}
dir[TC_DIR_MAX];//两个方向,rx和tx
u_int32_t conn;//连接数
}
proto[TC_PROTO_MAX];//tcp,udp,icmp,ohter四个
unsigned long last; /*最后一次更新时的jiffies值*/
u_int64_t next_conn_id;//下一个连接的id
u_int32_t use;//本结构引用
};
#define TC_COUNTER_MAGIC 0x0073777a
#define TC_COUNTER_VERSION 1
struct tc_counter_info
{
u_int32_t magic;
u_int32_t ver;
u_int32_t hz;
u_int32_t tc_counter_user_size;
u_int32_t ipc_counter_user_size;
};
static inline u_int32_t
hash_tc_counter(u_int32_t ipaddr)
{
return ntohl(ipaddr)
% tc_counter_htable_size;
}
static inline int
tc_counter_cmp(const struct tc_counter * tc,u_int32_t ipaddr)
{
MUST_BE_READ_LOCKED(&tc_counter_lock);
return tc->ip==ipaddr;
}
static struct tc_counter *
__tc_counter_find(u_int32_t ipaddr)
{
struct tc_counter *tc;
MUST_BE_READ_LOCKED(&tc_counter_lock);
tc = LIST_FIND(&tc_counter_hash[hash_tc_counter(ipaddr)],
tc_counter_cmp,
struct tc_counter *,
ipaddr);
return tc;
}
static struct tc_counter *
tc_counter_find_get(u_int32_t ipaddr)
{
struct tc_counter *tc;
READ_LOCK(&tc_counter_lock);
tc = __tc_counter_find(ipaddr);
if (tc)
atomic_inc(&tc->use);//增加引用
READ_UNLOCK(&tc_counter_lock);
return tc;
}
static inline void
tc_counter_put(struct tc_counter * tc)
{
if (tc && atomic_dec_and_test(&tc->use)){
printk("free tc_counter:ip=%u.%u.%u.%u tc_counter_count=%u\n",NIPQUAD(tc->ip),atomic_read(&tc_counter_count));
kmem_cache_free(tc_counter_cachep, tc);
atomic_dec(&tc_counter_count);
}
}
static inline int
tc_counter_conn_cmp(const struct ipc_counter * cc,u_int64_t id)
{
return cc->id==id;
}
static void tc_counter_add_new_conn(struct tc_counter * tc,struct ipc_counter * cc,enum tc_proto_enum tc_proto)
{
struct ipc_counter * tcc;
struct ip_conntrack * conn;
struct list_head * head;
spin_lock(&cc->lock);
if(cc->parent)
{
spin_unlock(&cc->lock);
return;
}
/*
do
{
tcc = LIST_FIND(&tc->conn_list,tc_counter_conn_cmp,struct ipc_counter *,tc->next_conn_id++);
}
while(ntcc);
优化本算法,使连接按id升序
*/
WRITE_LOCK(&tc->conn_lock);
head=&tc->conn_list;
if(!list_empty(head))
{
struct ipc_counter * max=(struct ipc_counter *)list_entry(head->prev,struct ipc_counter,list);
if(tc->next_conn_id>max->id)
{
ok:
list_append(head,&cc->list);
WRITE_UNLOCK(&tc->conn_lock);
cc->parent=tc;
cc->id=tc->next_conn_id;
spin_unlock(&cc->lock);
spin_lock(&tc->lock);
tc->next_conn_id++;
tc->proto[tc_proto].conn++;
spin_unlock(&tc->lock);
atomic_inc(&tc->use);
printk("add conn:ip=%u.%u.%u.%u id=%Lu use=%u\n",NIPQUAD(tc->ip),cc->id,atomic_read(&tc->use));
return;
}
else
{
//重新编号
struct list_head *next=head->next;
u_int64_t i=0;
while(next!=head)
{
((struct ipc_counter *)list_entry(next,struct ipc_counter,list))->id=i++;
next=next->next;
}
tc->next_conn_id=i;
}
}
goto ok;
}
static void death_by_timeout(unsigned long ul_tc)
{
struct tc_counter *tc = (void *)ul_tc;
if(atomic_read(&tc->use)>1)//仍然有引用
{
tc->timeout.expires = jiffies+tc_counter_timeout;
add_timer(&tc->timeout);//修改定时器
}
else
{
WRITE_LOCK(&tc_counter_lock);
if(!list_empty(&tc->conn_list))
BUG("death_by_timeout,if(!list_empty(&tc->conn_list))");
LIST_DELETE(&tc_counter_hash
[hash_tc_counter(tc->ip)],
&tc->list);
WRITE_UNLOCK(&tc_counter_lock);
tc_counter_put(tc);
}
}
static int
new_tc_counter(u_int32_t ipaddr)
{
struct tc_counter *tc;
size_t hash;
if (tc_counter_max &&
atomic_read(&tc_counter_count) >= tc_counter_max) {
if (net_ratelimit())
printk(KERN_WARNING "new_tc_counter: maximum limit of"
" %d entries exceeded\n", tc_counter_max);
return 0;
}
hash = hash_tc_counter(ipaddr);
tc = kmem_cache_alloc(tc_counter_cachep, GFP_ATOMIC);
if (!tc) {
if (net_ratelimit())
printk(KERN_WARNING "new_tc_counter:out of memory\n");
return 0;
}
memset(tc, 0, sizeof(struct tc_counter));
INIT_LIST_HEAD(&tc->list);
tc->ip=ipaddr;
atomic_set(&tc->use, 1);
spin_lock_init(&tc->lock);
init_timer(&tc->timeout);
tc->timeout.data = (unsigned long)tc;
tc->timeout.function = death_by_timeout;
tc->timeout.expires = jiffies+tc_counter_timeout;
rwlock_init(&tc->conn_lock);
INIT_LIST_HEAD(&tc->conn_list);
WRITE_LOCK(&tc_counter_lock);
if (__tc_counter_find(ipaddr)) {
WRITE_UNLOCK(&tc_counter_lock);
kmem_cache_free(tc_counter_cachep, tc);
return 1;
}
printk("new tc_counter:ip=%u.%u.%u.%u\n",NIPQUAD(ipaddr));
add_timer(&tc->timeout);
list_prepend(&tc_counter_hash[hash],
&tc->list);
atomic_inc(&tc_counter_count);
WRITE_UNLOCK(&tc_counter_lock);
return 1;
}
static int get_tc_proto(u_int8_t proto)
{
enum tc_proto_enum tc_proto=TC_PROTO_OTHER;
//确定流量协议
switch(proto)
{
case IPPROTO_TCP:
tc_proto=TC_PROTO_TCP;
break;
case IPPROTO_UDP:
tc_proto=TC_PROTO_UDP;
break;
case IPPROTO_ICMP:
tc_proto=TC_PROTO_ICMP;
break;
}
return tc_proto;
}
/*
static void tc_counter_refresh(struct tc_counter *tc, unsigned long extra_jiffies)
{
IP_NF_ASSERT(tc->timeout.data == (unsigned long)tc);
WRITE_LOCK(&tc_counter_lock);
/ * Need del_timer for race avoidance (may already be dying). * /
if (del_timer(&tc->timeout)) {//定时器已启动
tc->timeout.expires = jiffies + extra_jiffies;
add_timer(&tc->timeout);//修改定时器
}
WRITE_UNLOCK(&ip_conntrack_lock);
}
*/
static void sum(struct sk_buff * skb,
const struct net_device *in,
const struct net_device *out,
const char * msg)
{
static const char nulldevname[IFNAMSIZ] = { 0 };
//static const char lodevname[IFNAMSIZ] = { "lo" };
struct iphdr *ip;
const char *indev, *outdev;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
unsigned long ret;
u_int32_t ipaddr;
enum tc_dir_enum tc_dir;
const char * msg2;
enum tc_proto_enum tc_proto;
//计算流量方向,相对于定义的内网网卡tc_dev,数据从该网卡来就是TX,发往该网卡就是RX
ip=skb->nh.iph;
tc_proto=get_tc_proto(ip->protocol);
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
do
{
int i;
//indev是否是tc_dev
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= ((const unsigned long *)indev)[i]
^ ((const unsigned long *)tcdevname)[i];
}
if(!ret){
ipaddr=ip->saddr;
tc_dir=TC_DIR_TX;
break;
}
//outdev是否是tc_dev
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= ((const unsigned long *)outdev)[i]
^ ((const unsigned long *)tcdevname)[i];
}
if(!ret){
ipaddr=ip->daddr;
tc_dir=TC_DIR_RX;
break;
}
/*
//in_dev是否是lo
if(((const unsigned long *)indev)[0]^ ((const unsigned long *)lodevname)[0]){
ipaddr=htonl(INADDR_LOOPBACK);
tc_dir=TC_DIR_TX;
break;
}
//out_dev是否是lo
if(((const unsigned long *)outdev)[0]^ ((const unsigned long *)lodevname)[0]){
ipaddr=htonl(INADDR_LOOPBACK);
tc_dir=TC_DIR_RX;
break;
}
msg2="tc_dev missed";
goto err;
*/
return;
}
while(0);
ct = ip_conntrack_get(skb, &ctinfo);
if(ct)//有跟踪
{
struct ipc_counter *cc=&ct->counter;
int conn_dir=CTINFO2DIR(ctinfo);
struct tc_counter * tc;
again:
spin_lock(&cc->lock);
tc=(struct tc_counter *)cc->parent;
if(tc){
spin_lock(&tc->lock);
tc->proto[tc_proto].dir[tc_dir].pcnt++;
tc->proto[tc_proto].dir[tc_dir].bcnt+=ntohs(ip->tot_len);
tc->last=jiffies;
spin_unlock(&tc->lock);
cc->cnt[conn_dir].pcnt++;
cc->cnt[conn_dir].bcnt+=ntohs(ip->tot_len);
cc->last=jiffies;
spin_unlock(&cc->lock);
//tc_counter_refresh(tc,tc_counter_timeout);
return;
}
spin_unlock(&cc->lock);
do {
tc = tc_counter_find_get(ipaddr);
if (!tc
&& !new_tc_counter(ipaddr)){
msg2="new_tc_counter failed";
goto err;
}
} while (!tc);
tc_counter_add_new_conn(tc,cc,tc_proto);
tc_counter_put(tc);
goto again;
}
msg2="untracked";
err:
if (net_ratelimit())
printk(KERN_DEBUG "%s:got %s packet indev:%s->outdev:%s %p %u %u.%u.%u.%u -> %u.%u.%u.%u %u\n",
msg,
msg2,
indev,
outdev,
skb,
skb->nh.iph->protocol,
NIPQUAD(skb->nh.iph->saddr),
NIPQUAD(skb->nh.iph->daddr),
skb->nh.iph->protocol
);
}
/* The work comes in here from netfilter.c. */
static unsigned int
tct_forward_hook(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
sum(*pskb,in,out,"tct_forward_hook");
return NF_ACCEPT;
}
static unsigned int
tct_local_in_hook(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
sum(*pskb,in,out,"tct_local_in_hook");
return NF_ACCEPT;
}
static unsigned int
tct_local_out_hook(unsigned int hook,
struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
sum(*pskb,in,out,"tct_local_out_hook");
return NF_ACCEPT;
}
static struct nf_hook_ops tct_ops[]
= { { { NULL, NULL }, tct_local_in_hook, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_FILTER+10 },
{ { NULL, NULL }, tct_forward_hook, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER+10 },
{ { NULL, NULL }, tct_local_out_hook, PF_INET, NF_IP_LOCAL_OUT, NF_IP_PRI_FILTER+10 }
};
static void tc_counter_cleanup_conntrack(struct ip_conntrack *conn)
{
struct ipc_counter* cc=(struct ipc_counter*)&conn->counter;
struct tc_counter* tc;
spin_lock(&cc->lock);
tc=(struct tc_counter*)cc->parent;
if(tc)
{
WRITE_LOCK(&tc->conn_lock);
LIST_DELETE(&tc->conn_list[hash_tc_counter(tc->ip)],&conn->counter.list);
WRITE_UNLOCK(&tc->conn_lock);
spin_lock(&tc->lock);
tc->proto[get_tc_proto(conn->tuplehash[0].tuple.dst.protonum)].conn--;
spin_unlock(&tc->lock);
atomic_dec(&tc->use);
printk("delete conn:ip=%u.%u.%u.%u id=%Lu use=%u\n",
NIPQUAD(tc->ip),
conn->counter.id,
atomic_read(&tc->use)
);
}
spin_unlock(&cc->lock);
}
static const char * tc_proto_names[]=
{
[TC_PROTO_TCP]="tcp",
[TC_PROTO_UDP]="udp",
[TC_PROTO_ICMP]="icmp",
[TC_PROTO_OTHER]="other",
};
static const char * tc_dir_names[]=
{
[TC_DIR_TX]="tx",
[TC_DIR_RX]="rx",
};
static unsigned int
print_tc_counter(char *buffer, const struct tc_counter *tc)
{
unsigned int len,i,j;
len = sprintf(buffer, "%u.%u.%u.%u %u ",
NIPQUAD(tc->ip),
timer_pending(&tc->timeout)
? (tc->timeout.expires - jiffies)/HZ : 0);
//spin_lock(&tc->lock);
for(i=0;i {
len+=sprintf(buffer + len,"{ %s ",tc_proto_names[i]);
for(j=0;j {
len+=sprintf(buffer + len,"[ %s ",tc_dir_names[j]);
len += sprintf(buffer + len, "pcnt=%Lu ",
tc->proto[i].dir[j].pcnt);
len += sprintf(buffer + len, "bcnt=%Lu ] ",
tc->proto[i].dir[j].bcnt);
}
len += sprintf(buffer + len, "conn=%u } ",
tc->proto[i].conn);
}
len += sprintf(buffer + len, "last=%u ",
tc->last);
len += sprintf(buffer + len, "next_id=%Lu ",
tc->next_conn_id);
//spin_unlock(&tc->lock);
len += sprintf(buffer + len, "use=%u ",
atomic_read(&tc->use));
len += sprintf(buffer + len, "\n");
return len;
}
/* Returns true when finished. */
static inline int
tc_counter_iterate(const struct tc_counter *tc,
char *buffer, off_t offset, off_t *upto,
unsigned int *len, unsigned int maxlen)
{
unsigned int newlen;
MUST_BE_READ_LOCKED(&tc_counter_lock);
if ((*upto)++ < offset)
return 0;
newlen = print_tc_counter(buffer + *len, tc);
if (*len + newlen > maxlen)
return 1;
else *len += newlen;
return 0;
}
static int
list_tc_counter(char *buffer, char **start, off_t offset, int length)
{
unsigned int i;
unsigned int len = 0;
off_t upto = 0;
READ_LOCK(&tc_counter_lock);
/* Traverse hash; print originals then reply. */
for (i = 0; i < tc_counter_htable_size; i++) {
if (LIST_FIND(&tc_counter_hash[i], tc_counter_iterate,
struct tc_counter *,
buffer, offset, &upto, &len, length))
break;
}
READ_UNLOCK(&tc_counter_lock);
/* `start' hack - see fs/proc/generic.c line ~165 */
*start = (char *)((unsigned int)upto - offset);
return len;
}
static void cleanup_a_conn(struct tc_counter *tc,struct ip_conntrack * ct)
{
struct ipc_counter * cc=&ct->counter;
spin_lock(&cc->lock);
if((struct tc_counter *)cc->parent==tc)
{
//INIT_LIST_HEAD(&cc->list);
cc->parent=NULL;
memset(&cc->cnt,0,sizeof(cc->cnt));
spin_lock(&tc->lock);
tc->proto[get_tc_proto(ct->tuplehash[0].tuple.dst.protonum)].conn--;
spin_unlock(&tc->lock);
atomic_dec(&tc->use);
}
spin_unlock(&cc->lock);
}
static void cleanup_a_tc_counter(struct tc_counter * tc)
{
struct list_head * head=&tc->conn_list;
int i;
MUST_BE_WRITE_LOCKED(&tc_counter_lock);
if(timer_pending(&tc->timeout))
del_timer(&tc->timeout);
WRITE_LOCK(&tc->conn_lock);
while(!list_empty(head))
{
struct ip_conntrack * ct=(struct ip_conntrack *)list_entry(head->next,struct ip_conntrack,counter.list);
list_del_init(head->next);
cleanup_a_conn(tc,ct);
}
WRITE_UNLOCK(&tc->conn_lock);
if(atomic_read(&tc->use)!=1)
BUG("cleanup_a_tc_counter,if(atomic_read(&tc->use)!=1)");
for(i=0;i
if(tc->proto[i].conn!=0)
BUG("cleanup_a_tc_counter,if(tc->proto[i].conn!=0)");
}
tc_counter_put(tc);
}
static void cleanup_a_bucket(struct list_head * head)
{
MUST_BE_WRITE_LOCKED(&tc_counter_lock);
while(!list_empty(head))
{
struct tc_counter * tc=(struct tc_counter *)list_entry(head->next,struct tc_counter,list);
LIST_DELETE(head,head->next);
cleanup_a_tc_counter(tc);
}
}
static void cleanup_all_tc_counters(void)
{
int i;
WRITE_LOCK(&tc_counter_lock);
for (i = 0; i < tc_counter_htable_size; i++) {
cleanup_a_bucket(&tc_counter_hash[i]);
}
WRITE_UNLOCK(&tc_counter_lock);
if(atomic_read(&tc_counter_count)!=0)
BUG("cleanup_all_tc_counters,if(atomic_read(&tc_counter_count)!=0)");
}
#define SO_GET_TC_COUNTER_INFO 256
#define SO_GET_TC_COUNTER_COUNT 257
#define SO_GET_ALL_TC_COUNTERS 258
#define SO_GET_TC_COUNTER_BY_IP 259
#define SO_GET_ALL_CONNS_BY_IP 260
static int get_all_tc_counters(void * user,int * len)
{
int i,ret;
struct tc_counter * tc;
struct tc_counter_user * tcu,tmpu;
READ_LOCK(&tc_counter_lock);
tcu=(struct tc_counter_user *) user;
if( (char *)& tcu[ atomic_read( &tc_counter_count ) ] > &((char *)user)[*len] )
{ret=-ENOMEM;goto out;}
for (i = 0; i < tc_counter_htable_size; i++) {
struct list_head * head=&tc_counter_hash[i];
struct list_head * next=head->next;
while(head!=next)
{
printk("%d,tcu=%p\n",i,tcu);
tc=list_entry(next,struct tc_counter,list);
tmpu.ip=tc->ip;
memcpy(&(tmpu.proto),&(tc->proto),sizeof(tmpu.proto));
tmpu.last=tc->last;
tmpu.next_conn_id=tc->next_conn_id;
tmpu.use=atomic_read(&tc->use);
if(copy_to_user(tcu++,&tmpu,sizeof(struct tc_counter_user))!=0)
{ret=-EFAULT;goto out;}
next=next->next;
}
}
*len=atomic_read(&tc_counter_count);
ret=0;
out:
READ_UNLOCK(&tc_counter_lock);
return ret;
}
static int get_tc_counter_by_ip(void * user,int * len)
{
u_int32_t ip;
int ret;
struct tc_counter * tc;
struct tc_counter_user tmpu;
if(*len
ret= get_user(ip,(u_int32_t*)user);
if(ret)
return ret;
printk("ip=%u.%u.%u.%u\n",NIPQUAD(ip));
tc=tc_counter_find_get(ip);
if(tc)
{
tmpu.ip=tc->ip;
memcpy(&(tmpu.proto),&(tc->proto),sizeof(tmpu.proto));
tmpu.last=tc->last;
tmpu.next_conn_id=tc->next_conn_id;
tmpu.use=atomic_read(&tc->use);
if(copy_to_user(user,&tmpu,sizeof(struct tc_counter_user))!=0)
{
ret=-EFAULT;
}
else
{
*len=1;
ret=0;
}
tc_counter_put(tc);
return ret;
}
return -ENOENT;
}
static int get_all_conns_by_ip(void * user,int * len)
{
u_int32_t ip;
int ret;
struct tc_counter * tc;
ret= get_user(ip,(u_int32_t*)user);
if(ret)
return ret;
printk("ip=%u.%u.%u.%u\n",NIPQUAD(ip));
tc=tc_counter_find_get(ip);
if(tc)
{
struct ipc_counter * cc;
struct ipc_counter_user *ccu,tmpu;
struct ip_conntrack * ct;
int i,conn;
struct list_head * head, * next;
READ_LOCK(&tc->conn_lock);
ccu=(struct ipc_counter_user *)user;
conn=0;
for(i=0;i conn+=tc->proto[i].conn;
if((char*)&ccu[conn] > &((char *)user)[*len])
{ret= -ENOMEM;goto out;}
head=&tc->conn_list;
next=head->next;
while(head!=next)
{
cc=list_entry(next,struct ipc_counter,list);
ct=list_entry(cc,struct ip_conntrack,counter);
for(i=0;i memcpy(&(tmpu.tuple[i]),&(ct->tuplehash[i].tuple),sizeof(struct ip_conntrack_tuple));
memcpy(&tmpu.cnt,&cc->cnt,sizeof(tmpu.cnt));
tmpu.last=cc->last;
tmpu.id=cc->id;
if(copy_to_user(ccu++,&tmpu,sizeof(struct ipc_counter_user))!=0)
{
ret=-EFAULT;
goto out;
}
next=next->next;
}
*len=conn;
ret=0;
out:
READ_UNLOCK(&tc->conn_lock);
tc_counter_put(tc);
return ret;
}
return -ENOENT;
}
static int get_tc_counter_info(void * user,int * len)
{
struct tc_counter_info info;
if(*len!=sizeof(struct tc_counter_info)) return -EINVAL;
info.magic=TC_COUNTER_MAGIC;
info.ver=TC_COUNTER_VERSION;
info.hz=HZ;
info.tc_counter_user_size=sizeof(struct tc_counter_user);
info.ipc_counter_user_size=sizeof(struct ipc_counter_user);
if(copy_to_user(user,&info,sizeof(struct tc_counter_info)))
return -EFAULT;
return 0;
}
static int
getinfo(struct sock *sk, int optval, void *user, int *len)
{
switch(optval)
{
case SO_GET_TC_COUNTER_INFO:
printk("SO_GET_TC_COUNTER_INFO\n");
return get_tc_counter_info(user,len);
break;
case SO_GET_TC_COUNTER_COUNT:
printk("SO_GET_TC_COUNTER_COUNT\n");
if(*len!=sizeof(int)) return -EINVAL;
if(put_user(atomic_read(&tc_counter_count),(int *)user))
return -EFAULT;
printk("tc_counter count=%d\n",atomic_read(&tc_counter_count));
return 0;
case SO_GET_ALL_TC_COUNTERS:
printk("SO_GET_ALL_TC_COUNTERS\n");
return get_all_tc_counters(user,len);
break;
case SO_GET_TC_COUNTER_BY_IP:
printk("SO_GET_TC_COUNTER_BY_IP\n");
return get_tc_counter_by_ip(user,len);
break;
case SO_GET_ALL_CONNS_BY_IP:
printk("SO_GET_ALL_CONNS_BY_IP\n");
return get_all_conns_by_ip(user,len);
break;
}
return -ENOENT;
}
static struct nf_sockopt_ops so_getinfo
= { { NULL, NULL }, PF_INET,
0, 0, NULL, /* Setsockopts */
SO_GET_TC_COUNTER_INFO, SO_GET_ALL_CONNS_BY_IP+1, &getinfo,
0, NULL };
static int __init init(void)
{
int ret,i;
if(tc_dev){
if(strlen(tc_dev)>=IFNAMSIZ){
printk("tc_dev:%s invalid!\n",tc_dev);
return -EINVAL;
}
memset(tcdevname,0,IFNAMSIZ);
for(i=0;i if(tc_dev[i]){
tcdevname[i]=tc_dev[i];
}else
break;
}
}
tc_counter_hash = vmalloc(sizeof(struct list_head)
* tc_counter_htable_size);
if(!tc_counter_hash) {
return -ENOMEM;
}
tc_counter_cachep = kmem_cache_create("tc_counter",
sizeof(struct tc_counter), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!tc_counter_cachep)
{
ret=-ENOMEM;
printk(KERN_ERR "Unable to create tc_couner slab cache\n");
goto create_slab_failed;
}
for (i = 0; i < tc_counter_htable_size; i++)
//初始化hash
INIT_LIST_HEAD(&tc_counter_hash[i]);
proc_net_create("tc_counter",0,list_tc_counter);
if(tc_counter_destroyed != NULL)
BUG("init,if(tc_counter_destroyed != NULL)");
tc_counter_destroyed = &tc_counter_cleanup_conntrack;
/* Register hooks */
ret = nf_register_hook(&tct_ops[0]);
if (ret < 0)
goto cleanup;
ret = nf_register_hook(&tct_ops[1]);
if (ret < 0)
goto cleanup_hook0;
ret = nf_register_hook(&tct_ops[2]);
if (ret < 0)
goto cleanup_hook1;
ret = nf_register_sockopt(&so_getinfo);//ioctl接口
if (ret != 0)
goto cleanup_hook2;
if (ip_conntrack_module)
__MOD_INC_USE_COUNT(ip_conntrack_module);//要使用conntrack
return ret;
cleanup_hook2:
nf_unregister_hook(&tct_ops[2]);
cleanup_hook1:
nf_unregister_hook(&tct_ops[1]);
cleanup_hook0:
nf_unregister_hook(&tct_ops[0]);
cleanup:
proc_net_remove("tc_counter");
if(atomic_read(&tc_counter_count)>0)//已经有统计
cleanup_all_tc_counters();
tc_counter_destroyed=NULL;
kmem_cache_destroy(tc_counter_cachep);
create_slab_failed:
vfree(tc_counter_hash);
return ret;
}
static void __exit fini(void)
{
unsigned int i;
nf_unregister_sockopt(&so_getinfo);
for (i = 0; i < sizeof(tct_ops)/sizeof(struct nf_hook_ops); i++)
nf_unregister_hook(&tct_ops[i]);
proc_net_remove("tc_counter");
cleanup_all_tc_counters();
tc_counter_destroyed = NULL;
kmem_cache_destroy(tc_counter_cachep);
vfree(tc_counter_hash);
if (ip_conntrack_module)
__MOD_DEC_USE_COUNT(ip_conntrack_module);
}
module_init(init);
module_exit(fini);
如下是用户程序tctable_user.c
#include
#include
#include
#include
#include
#include
#include
#define BUF_SIZE 1024*1024
#define NIPQUAD(addr) \
((unsigned char *)&addr)[0], \
((unsigned char *)&addr)[1], \
((unsigned char *)&addr)[2], \
((unsigned char *)&addr)[3]
enum tc_dir_enum
{
TC_DIR_TX,
TC_DIR_RX,
TC_DIR_MAX
};
enum tc_proto_enum
{
TC_PROTO_TCP,
TC_PROTO_UDP,
TC_PROTO_ICMP,
TC_PROTO_OTHER,
TC_PROTO_MAX
};
enum ip_conntrack_dir
{
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
IP_CT_DIR_MAX
};
const char * tc_proto_names[]=
{
[TC_PROTO_TCP]="tcp",
[TC_PROTO_UDP]="udp",
[TC_PROTO_ICMP]="icmp",
[TC_PROTO_OTHER]="other",
};
const char * tc_dir_names[]=
{
[TC_DIR_TX]="tx",
[TC_DIR_RX]="rx",
};
const char * ip_ct_dir_names[]=
{
[IP_CT_DIR_ORIGINAL]="orig",
[IP_CT_DIR_REPLY]="rely",
};
struct tc_counter_user
{
u_int32_t ip;//ip地址,可作为host_id
struct
{
struct
{
u_int64_t pcnt, bcnt; /*Packet and byte counters */
}
dir[TC_DIR_MAX];//两个方向,rx和tx
u_int32_t conn;//连接数
}
proto[TC_PROTO_MAX];//tcp,udp,icmp,ohter四个
unsigned long last; /*最后一次更新时的jiffies值*/
u_int64_t next_conn_id;//下一个连接的id
u_int32_t use;//本结构引用
};
union ip_conntrack_manip_proto
{
/* Add other protocols here. */
u_int16_t all;
struct {
u_int16_t port;
} tcp;
struct {
u_int16_t port;
} udp;
struct {
u_int16_t id;
} icmp;
};
/* The manipulable part of the tuple. */
struct ip_conntrack_manip
{
u_int32_t ip;
union ip_conntrack_manip_proto u;
};
/* This contains the information to distinguish a connection. */
struct ip_conntrack_tuple
{
struct ip_conntrack_manip src;
/* These are the parts of the tuple which are fixed. */
struct {
u_int32_t ip;
union {
/* Add other protocols here. */
u_int16_t all;
struct {
u_int16_t port;
} tcp;
struct {
u_int16_t port;
} udp;
struct {
u_int8_t type, code;
} icmp;
} u;
/* The protocol. */
u_int16_t protonum;
} dst;
};
struct ipc_counter_user
{
struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
struct
{
u_int64_t pcnt, bcnt; /*Packet and byte counters */
}
cnt[IP_CT_DIR_MAX];//两个方向
//unsigned long create;
unsigned long last; /*最后一次更新时的jiffies值*/
u_int64_t id;
};
#define TC_COUNTER_MAGIC 0x0073777a
#define TC_COUNTER_VERSION 1
struct tc_counter_info
{
u_int32_t magic;
u_int32_t ver;
u_int32_t hz;
u_int32_t tc_counter_user_size;
u_int32_t ipc_counter_user_size;
};
/*
* Convert an ASCII string to binary IP.
* 把一个ASCII字符串转化为二进制的IP地址
*/
// 218.22.21.228
u_int32_t in_aton(const char *str)
{
unsigned long l;
unsigned int val;
int i;
l = 0;
for (i = 0; i < 4; i++)
{
l <<= 8;
if (*str != '\0')
{
val = 0;
while (*str != '\0' && *str != '.')
{
val *= 10;
val += *str - '0';
str++;
}
l |= val;//不判断是否溢出??????????
if (*str != '\0') //是'.'
str++;
}
}
return(htonl(l));//转化为网络字节序???????????
}
int get_tc_proto(u_int8_t proto)
{
enum tc_proto_enum tc_proto=TC_PROTO_OTHER;
//确定流量协议
switch(proto)
{
case IPPROTO_TCP:
tc_proto=TC_PROTO_TCP;
break;
case IPPROTO_UDP:
tc_proto=TC_PROTO_UDP;
break;
case IPPROTO_ICMP:
tc_proto=TC_PROTO_ICMP;
break;
}
return tc_proto;
}
unsigned int tcp_print_tuple(char *buffer,
const struct ip_conntrack_tuple *tuple)
{
return sprintf(buffer, "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu",
NIPQUAD((tuple->src.ip)),
ntohs(tuple->src.u.tcp.port),
NIPQUAD((tuple->dst.ip)),
ntohs(tuple->dst.u.tcp.port));
}
/* Print out the per-protocol part of the tuple. */
unsigned int udp_print_tuple(char *buffer,
const struct ip_conntrack_tuple *tuple)
{
return sprintf(buffer, "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu",
NIPQUAD((tuple->src.ip)),
ntohs(tuple->src.u.udp.port),
NIPQUAD((tuple->dst.ip)),
ntohs(tuple->dst.u.udp.port));
}
unsigned int icmp_print_tuple(char *buffer,
const struct ip_conntrack_tuple *tuple)
{
return sprintf(buffer, "%u.%u.%u.%u->%u.%u.%u.%u type=%u",
NIPQUAD((tuple->src.ip)),
NIPQUAD((tuple->dst.ip)),
tuple->dst.u.icmp.type);
}
unsigned int generic_print_tuple(char *buffer,
const struct ip_conntrack_tuple *tuple)
{
return sprintf(buffer, "%u.%u.%u.%u->%u.%u.%u.%u",
NIPQUAD((tuple->src.ip)),
NIPQUAD((tuple->dst.ip)));
}
unsigned int print_tuple(char * buf,const struct ip_conntrack_tuple *tuple)
{
switch(tuple->dst.protonum)
{
case IPPROTO_TCP:
return tcp_print_tuple(buf,tuple);
break;
case IPPROTO_UDP:
return udp_print_tuple(buf,tuple);
break;
case IPPROTO_ICMP:
return icmp_print_tuple(buf,tuple);
break;
default:
return generic_print_tuple(buf,tuple);
}
}
#define SO_GET_TC_COUNTER_INFO 256
#define SO_GET_TC_COUNTER_COUNT 257
#define SO_GET_ALL_TC_COUNTERS 258
#define SO_GET_TC_COUNTER_BY_IP 259
#define SO_GET_ALL_CONNS_BY_IP 260
/*
#define SO_GET_TC_COUNTER_INFO 256 tctable -i
#define SO_GET_TC_COUNTER_COUNT 257 tctable -c
#define SO_GET_ALL_TC_COUNTERS 258 tctable -a
#define SO_GET_TC_COUNTER_BY_IP 259 tctable -d ip
#define SO_GET_ALL_CONNS_BY_IP 260 tctable -s ip
*/
void print_usage()
{
printf("Usage: tctable -{icads} [IP]\n");
printf("commands:\n");
printf("i - show tc_counter info\n");
printf("c - show tc_counter count\n");
printf("a - show all tc_counters info\n");
printf("d IP - show detail tc_counter info specified by ip\n");
printf("s IP - show all ipc_counters info specified by ip\n");
}
int check_version(int fd,int show)
{
int len,ret;
struct tc_counter_info info;
len=sizeof(struct tc_counter_info);
ret=getsockopt(fd,0,SO_GET_TC_COUNTER_INFO,&info,&len);
if(ret==-1)
{
printf("getsockopt failed!\n");
return 0;
}
if(show)
{
printf("magic:kernel=%x,user=%x\n",info.magic,TC_COUNTER_MAGIC);
printf("version:kernel=%u,user=%u\n",info.ver,TC_COUNTER_VERSION);
printf("kernel HZ=%u\n",info.hz);
printf("tc_counter_user_size:kernel=%u,user=%u\n",info.tc_counter_user_size,sizeof(struct tc_counter_user));
printf("ipc_counter_user_size:kernel=%u,user=%u\n",info.ipc_counter_user_size,sizeof(struct ipc_counter_user));
}
if(info.magic!=TC_COUNTER_MAGIC
||info.ver!=TC_COUNTER_VERSION
||info.tc_counter_user_size!=sizeof(struct tc_counter_user)
||info.ipc_counter_user_size!=sizeof(struct ipc_counter_user)
)
{
printf("Version mismatch!\n");
return 0;
}
return 1;
}
int main(int argc,char * argv[])
{
int fd,i,ret=0,len;
char * buf,* new_buf;
struct tc_counter_user *tcu;
struct ipc_counter_user *ccu;
u_int32_t ip=0;
if(argc==1||argv[1][0]!='-'||argv[1][2])
{
print_usage();
return -1;
}
fd=socket(AF_INET,SOCK_DGRAM,0);
if(fd==-1)
{
printf("socket failed!\n");
return errno;
}
if(!check_version(fd,argv[1][1]=='i')) {close(fd);return -1;}
if(argv[1][1]=='i') return 0;
/*
为何使用mmap?
当以类似于iptraf的方式监控tctable时(下一步实现),如果数据量变化很大,可以在buf后继续mmap扩大buf,而不会产生内存碎片.
malloc可能会因为频繁调用产生大量内存碎片。
为何使用mlock?
通常情况下,mmap后不会分配物理内存
在内核中读取数据时的代码如下
READ_LOCK();
copy_to_user();
READ_UNLOCK();
copy_to_user会导致缺页异常,由于READ_LOCK,缺页异常处理程序会认为是在中断中,不会分配页,从而失败
使用mlock,强行分配并锁定页很好地解决了这个问题,缺点是浪费一点内存.
*/
buf=(char *)mmap(NULL,BUF_SIZE,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,0,0);
if(buf==MAP_FAILED)
{
printf("mmap failed!\n");
close(fd);
return errno;
}
ret=mlock(buf,BUF_SIZE);
if(ret)
{
printf("mlock failed!");
munmap(buf,BUF_SIZE);
close(fd);
return errno;
}
/*
buf=(char *)malloc(BUF_SIZE);
if(buf==NULL)
{
printf("malloc failed!\n");
close(fd);
return -1;
}
*/
//for(i=0;i // buf[i]='0';
switch(argv[1][1])
{
case 'c':
len=4;
ret=getsockopt(fd,0,SO_GET_TC_COUNTER_COUNT,buf,&len);
if(ret==-1)
{
printf("getsockopt failed!\n");
break;
}
printf("tc_counter_count=%d\n",*(int *)buf);
ret=0;
break;
case 'a':
len=BUF_SIZE;
//printf("buf=%p,end=%p\n",buf,&buf[len]);
ret=getsockopt(fd,0,SO_GET_ALL_TC_COUNTERS,buf,&len);
if(ret==-1)
{
printf("getsockopt failed!\n");
break;
}
printf("len=%d\n",len);
tcu=(struct tc_counter_user *)buf;
for(i=0;i {
u_int32_t conn=0;
u_int64_t pcnt=0,bcnt=0;
int j;
for(j=0;j {
int k;
conn+=tcu->proto[j].conn;
for(k=0;k {
pcnt+=tcu->proto[j].dir[k].pcnt;
bcnt+=tcu->proto[j].dir[k].pcnt;
}
}
printf("%u.%u.%u.%u conn=%u pcnt=%Lu bcnt=%Lu last=%u next_id=%Lu use=%u\n",NIPQUAD(tcu->ip),conn,pcnt,bcnt,tcu->last,tcu->next_conn_id,tcu->use);
}
ret=0;
break;
case 'd':
if(argc!=3)
break;
ip=in_aton(argv[2]);
len=BUF_SIZE;
//printf("ip=%u.%u.%u.%u,buf=%p,end=%p\n",NIPQUAD(ip),buf,&buf[len]);
*(u_int32_t *)buf=ip;
ret=getsockopt(fd,0,SO_GET_TC_COUNTER_BY_IP,buf,&len);
if(ret==-1)
{
printf("getsockopt failed!\n");
break;
}
//printf("len=%d\n",len);
tcu=(struct tc_counter_user *)buf;
printf("%u.%u.%u.%u\n",NIPQUAD(tcu->ip));
for(i=0;i {
int j;
printf("%s conn=%u ",tc_proto_names[i],tcu->proto[i].conn);
for(j=0;j printf("%s bcnt=%Lu pcnt=%Lu ",tc_dir_names[j],tcu->proto[i].dir[j].pcnt,tcu->proto[i].dir[j].bcnt);
printf("\n");
}
ret=0;
break;
case 's':
if(argc!=3)
break;
ip=in_aton(argv[2]);
len=BUF_SIZE;
//printf("buf=%p,end=%p\n",buf,&buf[len]);
*(u_int32_t *)buf=ip;
ret=getsockopt(fd,0,SO_GET_ALL_CONNS_BY_IP,buf,&len);
if(ret==-1)
{
printf("getsockopt failed!\n");
break;
}
printf("len=%d\n",len);
ccu=(struct ipc_counter_user *)buf;
for(i=0;i {
int j;
char tmp[256];
printf("id=%Lu last=%u %s",ccu->id,ccu->last,tc_proto_names[get_tc_proto(ccu->tuple[0].dst.protonum)]);
for(j=0;j {
print_tuple(tmp,&ccu->tuple[j]);
printf(" %s %s pcnt=%Lu bcnt=%Lu",ip_ct_dir_names[j],tmp,ccu->cnt[j].pcnt, ccu->cnt[j].bcnt);
}
printf("\n");
}
ret=0;
break;
default:
print_usage();
ret=-1;
break;
}
munmap(buf,BUF_SIZE);
//free(buf);
close(fd);
return ret;
}
运行结果示例
[root@localhost netfilter]# ./tctable -i
magic:kernel=73777a,user=73777a
version:kernel=1,user=1
kernel HZ=100
tc_counter_user_size:kernel=164,user=164
ipc_counter_user_size:kernel=76,user=76
[root@localhost netfilter]# ./tctable -c
tc_counter_count=13
[root@localhost netfilter]# ./tctable -a
len=13
192.168.1.226 conn=1 pcnt=109 bcnt=109 last=9018027 next_id=101 use=2
192.168.1.227 conn=0 pcnt=1 bcnt=1 last=8968118 next_id=1 use=1
192.168.1.229 conn=0 pcnt=1 bcnt=1 last=8977638 next_id=1 use=1
192.168.1.230 conn=1 pcnt=32 bcnt=32 last=9019686 next_id=4 use=2
192.168.1.236 conn=0 pcnt=4 bcnt=4 last=8970537 next_id=4 use=1
192.168.1.238 conn=0 pcnt=32 bcnt=32 last=8999178 next_id=7 use=1
192.168.1.240 conn=0 pcnt=3 bcnt=3 last=9014858 next_id=3 use=1
192.168.1.241 conn=0 pcnt=1 bcnt=1 last=8969757 next_id=1 use=1
192.168.1.242 conn=0 pcnt=7 bcnt=7 last=9010599 next_id=4 use=1
192.168.1.246 conn=0 pcnt=5 bcnt=5 last=8975482 next_id=3 use=1
192.168.1.249 conn=0 pcnt=1 bcnt=1 last=9013102 next_id=1 use=1
192.168.1.250 conn=2 pcnt=11227 bcnt=11227 last=9020660 next_id=18 use=3
192.168.1.252 conn=0 pcnt=32 bcnt=32 last=9002545 next_id=12 use=1
[root@localhost netfilter]# ./tctable -d 192.168.1.250
192.168.1.250
tcp conn=2 tx bcnt=5842 pcnt=1073769 rx bcnt=5470 pcnt=1002070
udp conn=0 tx bcnt=21 pcnt=2614 rx bcnt=5 pcnt=816
icmp conn=0 tx bcnt=0 pcnt=0 rx bcnt=0 pcnt=0
other conn=0 tx bcnt=0 pcnt=0 rx bcnt=0 pcnt=0
[root@localhost netfilter]# ./tctable -s 192.168.1.250
len=2
id=0 last=9022270 tcp orig 192.168.1.250:60222->192.168.1.231:22 pcnt=1434 bcnt=100000 rely 192.168.1.231:22->192.168.1.250:60222 pcnt=1264 bcnt=104348
id=1 last=9019057 tcp orig 192.168.1.250:57752->192.168.1.231:139 pcnt=4406 bcnt=972948 rely 192.168.1.231:139->192.168.1.250:57752 pcnt=4202 bcnt=897518
[root@localhost netfilter]#
阅读(2037) | 评论(0) | 转发(0) |