Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1658822
  • 博文数量: 782
  • 博客积分: 2455
  • 博客等级: 大尉
  • 技术积分: 4140
  • 用 户 组: 普通用户
  • 注册时间: 2011-04-06 21:37
个人简介

Linux ,c/c++, web,前端,php,js

文章分类

全部博文(782)

文章存档

2015年(8)

2014年(28)

2013年(110)

2012年(307)

2011年(329)

分类:

2012-10-18 17:54:40

How conntrack works in Netfilter(Part 1 - Initialization)

  本博客Netfilter/IPtables系列文章均基于Linux2.6.30内核。
  本文档版权归hereitis所有,可以自由拷贝/转载,转载时请保持文档的完整性并且注明来源,禁止用于任何商业用途。
  hereitis.cu@gmail.com


  1. Initialization
    1. nf_conntrack_standalone
      1. Big picture(Assume that no more networking namespace here, only a global one. This means CONFIG_NET_NS is not defined.)

      1. Some details
        1. Initialize some global hashtable variables(net/netfilter/nf_conntrack_core.c)
static int nf_conntrack_init_init_net(void)
{
    int max_factor = 8;
    int ret;

    /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
     * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
    if (!nf_conntrack_htable_size) {
        nf_conntrack_htable_size
            = (((num_physpages << PAGE_SHIFT) / 16384)
               / sizeof(struct hlist_head));  // Hashtable size is related to memory size and maximum size is 16384
        if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
            nf_conntrack_htable_size = 16384;
        if (nf_conntrack_htable_size < 32)
            nf_conntrack_htable_size = 32;

        /* Use a max. factor of four by default to get the same max as
         * with the old struct list_heads. When a table size is given
         * we use the old value of 8 to avoid reducing the max.
         * entries. */
        max_factor = 4;
    }
    nf_conntrack_max = max_factor * nf_conntrack_htable_size; // maximum tracking connections

    printk("nf_conntrack version %s (%u buckets, %d max)\n",
           NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
           nf_conntrack_max);

    nf_conntrack_cachep = kmem_cache_create("nf_conntrack",  //
                        sizeof(struct nf_conn),
                        0, SLAB_DESTROY_BY_RCU, NULL);
    if (!nf_conntrack_cachep) {
        printk(KERN_ERR "Unable to create nf_conn slab cache\n");
        ret = -ENOMEM;
        goto err_cache;
    }

    ret = nf_conntrack_proto_init();
    if (ret < 0)
        goto err_proto;

    ret = nf_conntrack_helper_init();
    if (ret < 0)
        goto err_helper;

    return 0;

err_helper:
    nf_conntrack_proto_fini();
err_proto:
    kmem_cache_destroy(nf_conntrack_cachep);
err_cache:
    return ret;
}
  1. nf_conntrack_proto_init(net/netfilter/nf_conntrack_proto.c)
int nf_conntrack_proto_init(void)
{
    unsigned int i;
    int err;

    err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
    if (err < 0)
        return err;

    for (i = 0; i < AF_MAX; i++)
        rcu_assign_pointer(nf_ct_l3protos[i],   // Global pointer array which point to struct nf_conntrack_l3proto
                   &nf_conntrack_l3proto_generic); // Initialize all l3 protocols of conntrack
    return 0;
}
  1. nf_conntrack_helper_init(net/netfilter/nf_conntrack_helper.c)
int nf_conntrack_helper_init(void)
{
    int err;

    nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
    nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
                          &nf_ct_helper_vmalloc, 0);
    if (!nf_ct_helper_hash)
        return -ENOMEM;

    err = nf_ct_extend_register(&helper_extend);
    if (err < 0)
        goto err1;

    return 0;

err1:
    nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
                 nf_ct_helper_hsize);
    return err;
}
  1. nf_ct_extend_register(net/netfilter/nf_conntrack_extend.c)
int nf_ct_extend_register(struct nf_ct_ext_type *type)
{
    int ret = 0;

    mutex_lock(&nf_ct_ext_type_mutex);
    if (nf_ct_ext_types[type->id]) {
        ret = -EBUSY;
        goto out;
    }

    /* This ensures that nf_ct_ext_create() can allocate enough area
       before updating alloc_size */
    type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
               + type->len;
    rcu_assign_pointer(nf_ct_ext_types[type->id], type); // Global list for struct nf_ct_ext_types
    update_alloc_size(type);
out:
    mutex_unlock(&nf_ct_ext_type_mutex);
    return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_extend_register);
  1. nf_conntrack_expect_init(net/netfilter/nf_conntrack_expect.c)
int nf_conntrack_expect_init(struct net *net)
{
    int err = -ENOMEM;

    if (net_eq(net, &init_net)) {
        if (!nf_ct_expect_hsize) {
            nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
            if (!nf_ct_expect_hsize)
                nf_ct_expect_hsize = 1;
        }
        nf_ct_expect_max = nf_ct_expect_hsize * 4;
    }

    net->ct.expect_count = 0;
    net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
                          &net->ct.expect_vmalloc, 0); // Global hash list for expectations
    if (net->ct.expect_hash == NULL)
        goto err1;

    if (net_eq(net, &init_net)) {
        nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
                    sizeof(struct nf_conntrack_expect),
                    0, 0, NULL);
        if (!nf_ct_expect_cachep)
            goto err2;
    }

    err = exp_proc_init(net);
    if (err < 0)
        goto err3;

    return 0;

err3:
    if (net_eq(net, &init_net))
        kmem_cache_destroy(nf_ct_expect_cachep);
err2:
    nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
                 nf_ct_expect_hsize);
err1:
    return err;
}
  1. How ipv4 initialize connection tracking module?
    1. Big picture

    1. More details
      1. nf_conntrack_l4proto_register(net/netfilter/nf_conntrack_proto.c)
int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
    int ret = 0;

    if (l4proto->l3proto >= PF_MAX)
        return -EBUSY;

    if ((l4proto->to_nlattr && !l4proto->nlattr_size)
        || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
        return -EINVAL;

    mutex_lock(&nf_ct_proto_mutex);
    if (!nf_ct_protos[l4proto->l3proto]) {
        /* l3proto may be loaded latter. */
        struct nf_conntrack_l4proto **proto_array;
        int i;

        proto_array = kmalloc(MAX_NF_CT_PROTO *
                      sizeof(struct nf_conntrack_l4proto *),
                      GFP_KERNEL);
        if (proto_array == NULL) {
            ret = -ENOMEM;
            goto out_unlock;
        }

        for (i = 0; i < MAX_NF_CT_PROTO; i++)
            proto_array[i] = &nf_conntrack_l4proto_generic;
        nf_ct_protos[l4proto->l3proto] = proto_array;
    } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
                    &nf_conntrack_l4proto_generic) {
        ret = -EBUSY;
        goto out_unlock;
    }

    ret = nf_ct_l4proto_register_sysctl(l4proto);
    if (ret < 0)
        goto out_unlock;

    l4proto->nla_size = 0;
    if (l4proto->nlattr_size)
        l4proto->nla_size += l4proto->nlattr_size();
    if (l4proto->nlattr_tuple_size)
        l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();

    rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
               l4proto);


out_unlock:
    mutex_unlock(&nf_ct_proto_mutex);
    return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
  1. nf_conntrack_l3proto_register(net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c)
int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
    int ret = 0;

    if (proto->l3proto >= AF_MAX)
        return -EBUSY;

    if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
        return -EINVAL;

    mutex_lock(&nf_ct_proto_mutex);
    if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
        ret = -EBUSY;
        goto out_unlock;
    }

    ret = nf_ct_l3proto_register_sysctl(proto);
    if (ret < 0)
        goto out_unlock;

    if (proto->nlattr_tuple_size)
        proto->nla_size = 3 * proto->nlattr_tuple_size();

    rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);

out_unlock:
    mutex_unlock(&nf_ct_proto_mutex);
    return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
  1. What we got after ipv4 connection tracking initialization




阅读(1217) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~