|
文件: | 连接跟踪的协议之间的关系图.pdf |
大小: | 23KB |
下载: | 下载 |
|
本文参考了snriyt兄的文章http://blog.chinaunix.net/u3/102292/showart_2212096.html,在自己理解的基础上做了重新的描述和稍微的总结。
本文内容基于内核2.6.33.2
作者:bbo
一、重要协议分析首先对nf_conntrack_tuple_hash的结构从上到下一层层进行解析,
/* Connections have two entries in the hash table: one for each way */
struct nf_conntrack_tuple_hash {
struct hlist_nulls_node hnnode;//就是用来将tuple链接起来的连接点
struct nf_conntrack_tuple tuple;
};
struct nf_conntrack_tuple是一个最重要结构之一,它的定义如下。
/* This contains the information to distinguish a connection. */
struct nf_conntrack_tuple {
struct nf_conntrack_man src;
/* These are the parts of the tuple which are fixed. */
struct {
union nf_inet_addr u3; //用来存放目的ip
union {
/* Add other protocols here. */
__be16 all;
struct {
__be16 port;
} tcp;
struct {
__be16 port;
} udp;
struct {
u_int8_t type, code;
} icmp;
struct {
__be16 port;
} dccp;
struct {
__be16 port;
} sctp;
struct {
__be16 key;
} gre;
} u;
/* The protocol. */
u_int8_t protonum;
/* The direction (for tuplehash) */
u_int8_t dir;
} dst;
};
struct nf_conntrack_tuple其实包括了源地址(源ip,源端口或其他)和目的地址(目的ip,目的端口或其他)的信息。其中,dst的信息在struct nf_conntrack_tuple 中已经做了定义。下面看一下struct nf_conntrack_man src的结构。
struct nf_conntrack_man {
union nf_inet_addr u3;
union nf_conntrack_man_proto u;
/* Layer 3 protocol */
u_int16_t l3num;
};
union nf_inet_addr 的结构为,
union nf_inet_addr {
__u32 all[4]; //不太明白这个变量在这里的作用
__be32 ip;
__be32 ip6[4];
struct in_addr in;
struct in6_addr in6;
};
nf_conntrack_man_proto的结构如下,
/* The protocol-specific manipulable parts of the tuple: always in
network order! */
union nf_conntrack_man_proto {
/* Add other protocols here. */
__be16 all;
struct {
__be16 port;
} tcp;
struct {
__be16 port;
} udp;
struct {
__be16 id;
} icmp;
struct {
__be16 port;
} dccp;
struct {
__be16 port;
} sctp;
struct {
__be16 key; /* GRE key is 32bit, PPtP only uses 16bit */
} gre;
};
通过比较可以发现,src比dst少了两个重要元素:u_int8_t protonum (The protocol) 和 u_int8_t dir (用来标记数据包的连接跟踪方向)。
conntrack中一个重要结构,struct nf_conn 类似于2.4中的struct ip_conntrack。结构如下:
struct nf_conn {
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */
struct nf_conntrack ct_general;
/* These are my tuples; original and reply */
//tuplehash[]中含有tuplehash[ORIGINAL]和tuplehash[REPLAY]两个元素
struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
/* Have we seen traffic both ways yet? (bitset) */
unsigned long status;//位图,通常和枚举类型ip_conntrack_status进行位运算来判断连接状态
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;
/* Timer function; drops refcnt when it goes off. */
struct timer_list timeout;
#if defined(CONFIG_NF_CONNTRACK_MARK)
u_int32_t mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
u_int32_t secmark;
#endif
/* Storage reserved for other modules: */
union nf_conntrack_proto proto;
/* Extensions */
struct nf_ct_ext *ext; //标记此连接属于那个net
#ifdef CONFIG_NET_NS
struct net *ct_net;
#endif
};
//defined in net_namespace.h
struct net {
struct netns_ct ct;/*链接跟踪表,指向struct netns_ct结构*/
};
//defined in netns/conntrack.h
struct netns_ct {
atomic_t count;
unsigned int expect_count;
unsigned int htable_size;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash; //连接跟踪表的全局指针
struct hlist_head *expect_hash;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct ip_conntrack_stat *stat;
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
char *slabname;
};
好不容易画了个协议之间的
关系图,还不会设置在内容中显示,惭愧!用
附件上传吧。
二、连接跟踪的实现过程//in nf_conntrack_l3proto_ipv4.c
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_confirm,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
static unsigned int ipv4_conntrack_in(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
}
此函数起一个中转的作用ipv4_conntrack_in()-->nf_conntrack_in()
755 unsigned int
756 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
757 struct sk_buff *skb)
758 {
759 struct nf_conn *ct;
760 enum ip_conntrack_info ctinfo;
761 struct nf_conntrack_l3proto *l3proto;
762 struct nf_conntrack_l4proto *l4proto;
763 unsigned int dataoff;
764 u_int8_t protonum;
765 int set_reply = 0;
766 int ret;
767
768 /* Previously seen (loopback or untracked)? Ignore. */
769 if (skb->nfct) {
770 NF_CT_STAT_INC_ATOMIC(net, ignore);
771 return NF_ACCEPT;
772 }
773
774 /* rcu_read_lock()ed by nf_hook_slow */
/*根据pf从nf_ct_l3protos全局链表中查找三层协议*/
775 l3proto = __nf_ct_l3proto_find(pf);
/*假设是ipv4协议,则调用ipv4_get_l4proto函数,此函数主要功能是计算出dataoff,和查找出第四层的协议类型并保存在protonum中*/
776 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
777 &dataoff, &protonum);
778 if (ret <= 0) {
779 pr_debug("not prepared to track yet or error occured\n");
780 NF_CT_STAT_INC_ATOMIC(net, error);
781 NF_CT_STAT_INC_ATOMIC(net, invalid);
782 return -ret;
783 }
784
785 l4proto = __nf_ct_l4proto_find(pf, protonum);//在struct nf_conntrack_l4proto **proto_arry链表中查找注册的l4协议
786
787 /* It may be an special packet, error, unclean...
788 * inverse of the return code tells to the netfilter
789 * core what to do with the packet. */
790 if (l4proto->error != NULL) {
791 ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
792 if (ret <= 0) {
793 NF_CT_STAT_INC_ATOMIC(net, error);
794 NF_CT_STAT_INC_ATOMIC(net, invalid);
795 return -ret;
796 }
797 }
798 /*这是个非常重要的函数,主要完成的任务是根据skb获得其tuple,后跟据tuple查找(或初始化)其struct nf_conntrack_tuple_hash结构,找到数据包的连接跟踪信息ct,修改连接信息*/
799 ct =
resolve_normal_ct(net, skb, dataoff, pf, protonum,
800 l3proto, l4proto, &set_reply, &ctinfo);
801 if (!ct) {
802 /* Not valid part of a connection */
803 NF_CT_STAT_INC_ATOMIC(net, invalid);
804 return NF_ACCEPT;
805 }
806
807 if (IS_ERR(ct)) {
808 /* Too stressed to deal. */
809 NF_CT_STAT_INC_ATOMIC(net, drop);
810 return NF_DROP;
811 }
812
813 NF_CT_ASSERT(skb->nfct);
814
815 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); //在协议中,假设ip包协议类型为udp,则调用的函数是udp_packet,但是具体函数的实现功能,我暂时还未分析
816 if (ret <= 0) {
817 /* Invalid: inverse of the return code tells
818 * the netfilter core what to do */
819 pr_debug("nf_conntrack_in: Can't track with proto module\n");
820 nf_conntrack_put(skb->nfct);
821 skb->nfct = NULL;
822 NF_CT_STAT_INC_ATOMIC(net, invalid);
823 if (ret == -NF_DROP)
824 NF_CT_STAT_INC_ATOMIC(net, drop);
825 return -ret;
826 }
827
828 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
829 nf_conntrack_event_cache(IPCT_STATUS, ct);
830
831 return ret;
832 }
695
696 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
697 static inline struct nf_conn *
698 resolve_normal_ct(struct net *net,
699 struct sk_buff *skb,
700 unsigned int dataoff,
701 u_int16_t l3num,
702 u_int8_t protonum,
703 struct nf_conntrack_l3proto *l3proto,
704 struct nf_conntrack_l4proto *l4proto,
705 int *set_reply,
706 enum ip_conntrack_info *ctinfo)
707 {
708 struct nf_conntrack_tuple tuple;
709 struct nf_conntrack_tuple_hash *h;
710 struct nf_conn *ct;
711
712 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), //根据skb中数据报信息求出其tuple
713 dataoff, l3num, protonum, &tuple, l3proto,
714 l4proto)) {
715 pr_debug("resolve_normal_ct: Can't get tuple\n");
716 return NULL;
717 }
718
719 /* look for tuple match */
720 h = nf_conntrack_find_get(net, &tuple); //在全局的连接跟踪表中查找tuple对应的struct nf_conntrack_tuple_hash * 结构h
721 if (!h) {
722 h =
init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); //全局连接跟踪表里没有tuple对应的选项,则初始化连接信息
723 if (!h)
724 return NULL;
725 if (IS_ERR(h))
726 return (void *)h;
727 }
728 ct = nf_ct_tuplehash_to_ctrack(h);//找出tuple对应的结构为struct nf_conn 的连接信息
729
730 /* It exists; we have (non-exclusive) reference. */
731 /* #define NF_CT_DIRECTION(h) \
732 ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
733 defined in nf_conntrack_tuple.h
734 */
735 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
736 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
737 /* Please set reply bit if this packet OK */
738 *set_reply = 1;
739 } else {//对于PRE_ROUTING钩子点上的数据包来说,都是IP_CT_DIR_ORIG,所以会直接执行else分支
740 /* Once we've had two way comms, always ESTABLISHED. */
741 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {//IP_SEEN_REPLAY_BIT会在NF_INET_LOCAL_OUT钩子点上设置
742 pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
743 *ctinfo = IP_CT_ESTABLISHED;
744 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {//在init_conntrack中,如果判断为expectation链接,则会设置IPS_EXPECTED_BIT位
745 pr_debug("nf_conntrack_in: related packet for %p\n",
746 ct);
747 *ctinfo = IP_CT_RELATED;
748 } else {//新的连接,则将*ctinfo设为IP_CT_NEW
749 pr_debug("nf_conntrack_in: new packet for %p\n", ct);
750 *ctinfo = IP_CT_NEW;
751 }
752 *set_reply = 0;
753 }
754 skb->nfct = &ct->ct_general;
755 skb->nfctinfo = *ctinfo;
756 return ct;
757 }
初始化连接信息的函数init_conntrack()的分析:
621 static struct nf_conntrack_tuple_hash *
622 init_conntrack(struct net *net,
623 const struct nf_conntrack_tuple *tuple,
624 struct nf_conntrack_l3proto *l3proto,
625 struct nf_conntrack_l4proto *l4proto,
626 struct sk_buff *skb,
627 unsigned int dataoff)
628 {
629 struct nf_conn *ct;
630 struct nf_conn_help *help;
631 struct nf_conntrack_tuple repl_tuple;
632 struct nf_conntrack_expect *exp;
633
634 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {//计算相反tuple
635 pr_debug("Can't invert tuple.\n");
636 return NULL;
637 }
638
639 ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);//为新的连接信息分配空间
640 if (IS_ERR(ct)) {
641 pr_debug("Can't allocate conntrack.\n");
642 return (struct nf_conntrack_tuple_hash *)ct;
643 }
644
645 if (!l4proto->new(ct, skb, dataoff)) {
646 nf_conntrack_free(ct);
647 pr_debug("init conntrack: can't track with proto module\n");
648 return NULL;
649 }
650
651 nf_ct_acct_ext_add(ct, GFP_ATOMIC);//添加acct, 这两个函数具体实现我还没有分析
652 nf_ct_ecache_ext_add(ct, GFP_ATOMIC);//添加扩展协议
653
654 spin_lock_bh(&nf_conntrack_lock);
655 exp = nf_ct_find_expectation(net, tuple);//检查该数据包是否为expection链接,如果是,则会将相应位标记,并进行expection相关处理
656 if (exp) {
657 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
658 ct, exp);
659 /* Welcome, Mr. Bond. We've been expecting you... */
660 __set_bit(IPS_EXPECTED_BIT, &ct->status);//设置EXPECTED位
661 ct->master = exp->master;
662 if (exp->helper) {
663 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
664 if (help)
665 rcu_assign_pointer(help->helper, exp->helper);
666 }
667
668 #ifdef CONFIG_NF_CONNTRACK_MARK
669 ct->mark = exp->master->mark;
670 #endif
671 #ifdef CONFIG_NF_CONNTRACK_SECMARK
672 ct->secmark = exp->master->secmark;
673 #endif
674 nf_conntrack_get(&ct->master->ct_general);
675 NF_CT_STAT_INC(net, expect_new);
676 } else {
677 __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
678 NF_CT_STAT_INC(net, new);
679 }
680
681 /* Overload tuple linked list to put us in unconfirmed list. */
682 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
683 &net->ct.unconfirmed); //将tuplehash[ORIGINAL]添加到net->ct.unconfirmed链表中
684
685 spin_unlock_bh(&nf_conntrack_lock);
686
687 if (exp) {
688 if (exp->expectfn)
689 exp->expectfn(ct, exp);
690 nf_ct_expect_put(exp);
691 }
692
693 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
694 }
现在分析到ipv4_confrim()函数了,如下:
89 static unsigned int ipv4_confirm(unsigned int hooknum,
90 struct sk_buff *skb,
91 const struct net_device *in,
92 const struct net_device *out,
93 int (*okfn)(struct sk_buff *))
94 {
95 struct nf_conn *ct;
96 enum ip_conntrack_info ctinfo;
97 const struct nf_conn_help *help;
98 const struct nf_conntrack_helper *helper;
99 unsigned int ret;
100
101 /* This is where we call the helper: as the packet goes out. */
102 ct = nf_ct_get(skb, &ctinfo);//根据skb中的参数获得连接跟踪协议的具体信息
103 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
104 goto out;
105 ......
此部分主要是介绍扩展功能的一些实现,由于本人暂时对扩展部分还未做分析,所以现在还不了解,所以本文在这里略过此部分的讲解。见谅
132 out:
133 /* We've seen it coming out the other side: confirm it */
134 return
nf_conntrack_confirm(skb);
135 }
56 /* Confirm a connection: returns NF_DROP if packet must be dropped. */
57 static inline int nf_conntrack_confirm(struct sk_buff *skb)
58 {
59 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
60 int ret = NF_ACCEPT;
61
62 if (ct && ct != &nf_conntrack_untracked) {
63 if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) //连接没有confirm,且没有超时
64 ret =
__nf_conntrack_confirm(skb);
65 if (likely(ret == NF_ACCEPT))
66 nf_ct_deliver_cached_events(ct);
67 }
68 return ret;
69 }
379 /* Confirm a connection given skb; places it in hash table */
380 int
381 __nf_conntrack_confirm(struct sk_buff *skb)
382 {
383 unsigned int hash, repl_hash;
384 struct nf_conntrack_tuple_hash *h;
385 struct nf_conn *ct;
386 struct nf_conn_help *help;
387 struct hlist_nulls_node *n;
388 enum ip_conntrack_info ctinfo;
389 struct net *net;
390
391 ct = nf_ct_get(skb, &ctinfo);//获得ct
392 net = nf_ct_net(ct);//获得net
393
394 /* ipt_REJECT uses nf_conntrack_attach to attach related
395 ICMP/TCP RST packets in other direction. Actual packet
396 which created connection will be IP_CT_NEW or for an
397 expected connection, IP_CT_RELATED. */
398 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) //这一块还没有完全明白
399 return NF_ACCEPT;
400
401 hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);//计算出tuple在连接跟踪表中的hash
402 repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);//计算出tuple在连接跟踪表中的反向hash值rel_hash
403
404 /* We're not in hash table, and we refuse to set up related
405 connections for unconfirmed conns. But packet copies and
406 REJECT will give spurious warnings here. */
407 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
408
409 /* No external references means noone else could have
410 confirmed us. */
411 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
412 pr_debug("Confirming conntrack %p\n", ct);
413
414 spin_lock_bh(&nf_conntrack_lock);
415
416 /* See if there's one in the list already, including reverse:
417 NAT could have grabbed it without realizing, since we're
418 not in the hash. If there is, we lost race. */
419 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)//查找是否有连接和反向连接
420 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
421 &h->tuple))
422 goto out;
423 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
424 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
425 &h->tuple))
426 goto out;
427
428 /* Remove from unconfirmed list */
429 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);//将tuple从unconfirm的列表中删除
430
431 /* Timer relative to confirmation time, not original
432 setting time, otherwise we'd get timer wrap in
433 weird delay cases. */
434 ct->timeout.expires += jiffies;//设置时间
435 add_timer(&ct->timeout);
436 atomic_inc(&ct->ct_general.use);
437 set_bit(IPS_CONFIRMED_BIT, &ct->status);//将该连接设定为confimed
438
439 /* Since the lookup is lockless, hash insertion must be done after
440 * starting the timer and setting the CONFIRMED bit. The RCU barriers
441 * guarantee that no other CPU can find the conntrack before the above
442 * stores are visible.
443 */
444 __nf_conntrack_hash_insert(ct, hash, repl_hash);//将hash和rel_hash对应的节点插入全局链表
445 NF_CT_STAT_INC(net, insert);
446 spin_unlock_bh(&nf_conntrack_lock);
447
448 help = nfct_help(ct);
449 if (help && help->helper)
450 nf_conntrack_event_cache(IPCT_HELPER, ct);
451
452 nf_conntrack_event_cache(master_ct(ct) ?
453 IPCT_RELATED : IPCT_NEW, ct);
454 return NF_ACCEPT;
455
456 out:
457 NF_CT_STAT_INC(net, insert_failed);
458 spin_unlock_bh(&nf_conntrack_lock);
459 return NF_DROP;
460 }
至此,两个主要的钩子函数ipv4_conntrack_in()和ipv4_confrim()就分析完了,其他两个钩子函数也是调用这两个函数,所以这里就不再进行分析了。由于本人现在还没有完全分析明白,所以没有对扩展模块做过多描述,望谅解!
由于本人能力有限,只是对源码做了点浅析,不能保证分析完全正确,仅供参考,也希望大家能多多指点!