Chinaunix首页 | 论坛 | 博客
  • 博客访问: 8014
  • 博文数量: 5
  • 博客积分: 16
  • 博客等级: 民兵
  • 技术积分: 35
  • 用 户 组: 普通用户
  • 注册时间: 2010-03-30 21:46
文章分类
文章存档

2011年(5)

我的朋友
最近访客

分类:

2011-11-16 21:25:16

How to register hooks in Netfilter

  本博客Netfilter/IPtables系列文章均基于Linux2.6.30内核。
  本文档版权归hereitis所有,可以自由拷贝/转载,转载时请保持文档的完整性并且注明来源,禁止用于任何商业用途。
  hereitis.cu@gmail.com


  1. How to register a hook(Take filter table as an example).

    1. Call stack

  1. Source code
    1. Module init function iptable_filter_init
      1. net/ipv4/netfilter/iptable_filter.c
static int __init iptable_filter_init(void)
{
    int ret;

    if (forward < 0 || forward > NF_MAX_VERDICT) {
        printk("iptables forward must be 0 or 1\n");
        return -EINVAL;
    }

    /* Entry 1 is the FORWARD hook */
    initial_table.entries[1].target.verdict = -forward - 1;

    ret = register_pernet_subsys(&iptable_filter_net_ops);
    if (ret < 0)
        return ret;

    /* Register hooks */
    ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
    if (ret < 0)
        goto cleanup_table;

    return ret;

 cleanup_table:
    unregister_pernet_subsys(&iptable_filter_net_ops);
    return ret;
}
  1. Hook array
struct nf_hook_ops
{
    struct list_head list;

    /* User fills in from here down. */
    nf_hookfn *hook;
    struct module *owner;
    u_int8_t pf;
    unsigned int hooknum;
    /* Hooks are ordered in ascending priority. */
    int priority;
};

static struct nf_hook_ops ipt_ops[] __read_mostly = {
    {
        .hook        = ipt_local_in_hook,
        .owner        = THIS_MODULE,
        .pf        = PF_INET,
        .hooknum    = NF_INET_LOCAL_IN,
        .priority    = NF_IP_PRI_FILTER,
    },
    {
        .hook        = ipt_hook,
        .owner        = THIS_MODULE,
        .pf        = PF_INET,
        .hooknum    = NF_INET_FORWARD,
        .priority    = NF_IP_PRI_FILTER,
    },
    {
        .hook        = ipt_local_out_hook,
        .owner        = THIS_MODULE,
        .pf        = PF_INET,
        .hooknum    = NF_INET_LOCAL_OUT,
        .priority    = NF_IP_PRI_FILTER,
    },
};
  1. Hook number
enum nf_inet_hooks {
    NF_INET_PRE_ROUTING,
    NF_INET_LOCAL_IN,
    NF_INET_FORWARD,
    NF_INET_LOCAL_OUT,
    NF_INET_POST_ROUTING,
    NF_INET_NUMHOOKS
};
  1. Hook priority(Lowest number, highest priority)
enum nf_ip_hook_priorities {
    NF_IP_PRI_FIRST = INT_MIN,
    NF_IP_PRI_CONNTRACK_DEFRAG = -400,
    NF_IP_PRI_RAW = -300,
    NF_IP_PRI_SELINUX_FIRST = -225,
    NF_IP_PRI_CONNTRACK = -200,
    NF_IP_PRI_MANGLE = -150,
    NF_IP_PRI_NAT_DST = -100,
    NF_IP_PRI_FILTER = 0,
    NF_IP_PRI_SECURITY = 50,
    NF_IP_PRI_NAT_SRC = 100,
    NF_IP_PRI_SELINUX_LAST = 225,
    NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
    NF_IP_PRI_LAST = INT_MAX,
};
  1. nf_register_hooks
    1. net/netfilter/core.c
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
    unsigned int i;
    int err = 0;

    for (i = 0; i < n; i++) {
        err = nf_register_hook(®[i]);
        if (err)
            goto err;
    }
    return err;

err:
    if (i > 0)
        nf_unregister_hooks(reg, i);
    return err;
}
  1. Loop 3 times for filter table.
  1. nf_register_hook
    1. net/netfilter/core.c
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
static DEFINE_MUTEX(nf_hook_mutex);

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)  // In ascending priority order under one hook point
            break;

    }
    list_add_rcu(®->list, elem->list.prev); // Insert nf_hook_ops to list according to its priority
    mutex_unlock(&nf_hook_mutex);
    return 0;
}
EXPORT_SYMBOL(nf_register_hook);
    1. Global list array nf_hooks for all registerred hooks
  1. Final Data structure(Assume this is the very first table registerred in Netfilter and only consider ipv4)
  1. When hooks are invoked(Take receiving a IP packet as an example)?
  1. Receiving a IP packet(net/ipv4/ip_input.c)
/*
 *     Main IP Receive routine.
 */
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
    struct iphdr *iph;
    u32 len;

    /* When the interface is in promisc. mode, drop all the crap
     * that it receives, do not try to analyse it.
     */
    if (skb->pkt_type == PACKET_OTHERHOST)
        goto drop;

    IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);

    if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
        IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
        goto out;
    }

    if (!pskb_may_pull(skb, sizeof(struct iphdr)))
        goto inhdr_error;

    iph = ip_hdr(skb);

    /*
     *    RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
     *
     *    Is the datagram acceptable?
     *
     *    1.    Length at least the size of an ip header
     *    2.    Version of 4
     *    3.    Checksums correctly. [Speed optimisation for later, skip loopback checksums]
     *    4.    Doesn't have a bogus length
     */

    if (iph->ihl < 5 || iph->version != 4)
        goto inhdr_error;

    if (!pskb_may_pull(skb, iph->ihl*4))
        goto inhdr_error;

    iph = ip_hdr(skb);

    if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
        goto inhdr_error;

    len = ntohs(iph->tot_len);
    if (skb->len < len) {
        IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
        goto drop;
    } else if (len < (iph->ihl*4))
        goto inhdr_error;

    /* Our transport medium may have padded the buffer out. Now we know it
     * is IP we can trim to the true length of the frame.
     * Note this now means skb->len holds ntohs(iph->tot_len).
     */
    if (pskb_trim_rcsum(skb, len)) {
        IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
        goto drop;
    }

    /* Remove any debris in the socket control block */
    memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));

    return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
               ip_rcv_finish);


inhdr_error:
    IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
    kfree_skb(skb);
out:
    return NET_RX_DROP;
}

  1. NF_HOOK
    1. NF_HOOK(include/linux/netfilter.h)
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
    NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
    1. NF_HOOK_THRESH(include/linux/netfilter.h)
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)           \
({int __ret;                                       \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
    __ret = (okfn)(skb);     // In this example, okfn is ip_rcv_finish                          \
__ret;})
    1. nf_hook_thresh(include/linux/netfilter.h)
/**
 *    nf_hook_thresh - call a netfilter hook
 *    
 *    Returns 1 if the hook has allowed the packet to pass.  The function
 *    okfn must be invoked by the caller in this case.  Any other return
 *    value indicates the packet has been consumed by the hook.
 */
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
                 struct sk_buff *skb,
                 struct net_device *indev,
                 struct net_device *outdev,
                 int (*okfn)(struct sk_buff *), int thresh,
                 int cond)
{
    if (!cond)
        return 1;
#ifndef CONFIG_NETFILTER_DEBUG
    if (list_empty(&nf_hooks[pf][hook]))
        return 1;
#endif
    return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
  1. nf_hook_slow(net/netfilter/core.c)
/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
         struct net_device *indev,
         struct net_device *outdev,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
    struct list_head *elem;
    unsigned int verdict;
    int ret = 0;

    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();

    elem = &nf_hooks[pf][hook];
next_hook:
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                 outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        ret = 1; // This means okfn will be invoked, that is ip_rcv_finish in this example
    } else if (verdict == NF_DROP) {
        kfree_skb(skb);
        ret = -EPERM;
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                  verdict >> NF_VERDICT_BITS))
            goto next_hook;
    }
    rcu_read_unlock();
    return ret;
}
EXPORT_SYMBOL(nf_hook_slow);

  1. nf_iterate(net/netfilter/core.c)
unsigned int nf_iterate(struct list_head *head,
            struct sk_buff *skb,
            unsigned int hook,
            const struct net_device *indev,
            const struct net_device *outdev,
            struct list_head **i,
            int (*okfn)(struct sk_buff *),
            int hook_thresh)
{
    unsigned int verdict;

    /*
     * The caller must not block between calls to this
     * function because of risk of continuing from deleted element.
     */
    list_for_each_continue_rcu(*i, head) {
        struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

        if (hook_thresh > elem->priority)
            continue;

        /* Optimization: we don't need to hold module
           reference here, since function can't sleep. --RR */
        verdict = elem->hook(hook, skb, indev, outdev, okfn);  // Here registerred hook is invoked
        if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
            if (unlikely((verdict & NF_VERDICT_MASK)
                            > NF_MAX_VERDICT)) {
                NFDEBUG("Evil return from %p(%u).\n",
                    elem->hook, hook);
                continue;
            }
#endif
            if (verdict != NF_REPEAT)
                return verdict;
            *i = (*i)->prev; // Repeat this hook
        }
    }
    return NF_ACCEPT;
}
  1. Dig it more
    1. Networking Namespaces
    2. Macros in Linux2.6.30

阅读(643) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~