How to register hooks in Netfilter
本博客Netfilter/IPtables系列文章均基于Linux2.6.30内核。
本文档版权归hereitis所有,可以自由拷贝/转载,转载时请保持文档的完整性并且注明来源,禁止用于任何商业用途。
hereitis.cu@gmail.comHow to register a hook(Take filter table as an example).
- Call stack
- Source code
- Module init function iptable_filter_init
- net/ipv4/netfilter/iptable_filter.c
static int __init iptable_filter_init(void)
{
int ret;
if (forward < 0 || forward > NF_MAX_VERDICT) {
printk("iptables forward must be 0 or 1\n");
return -EINVAL;
}
/* Entry 1 is the FORWARD hook */
initial_table.entries[1].target.verdict = -forward - 1;
ret = register_pernet_subsys(&iptable_filter_net_ops);
if (ret < 0)
return ret;
/* Register hooks */
ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
if (ret < 0)
goto cleanup_table;
return ret;
cleanup_table:
unregister_pernet_subsys(&iptable_filter_net_ops);
return ret;
}
- Hook array
struct nf_hook_ops
{
struct list_head list;
/* User fills in from here down. */
nf_hookfn *hook;
struct module *owner;
u_int8_t pf;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
};
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_local_in_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_local_out_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_FILTER,
},
};
enum nf_inet_hooks {
NF_INET_PRE_ROUTING,
NF_INET_LOCAL_IN,
NF_INET_FORWARD,
NF_INET_LOCAL_OUT,
NF_INET_POST_ROUTING,
NF_INET_NUMHOOKS
};
- Hook priority(Lowest number, highest priority)
enum nf_ip_hook_priorities {
NF_IP_PRI_FIRST = INT_MIN,
NF_IP_PRI_CONNTRACK_DEFRAG = -400,
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
NF_IP_PRI_CONNTRACK = -200,
NF_IP_PRI_MANGLE = -150,
NF_IP_PRI_NAT_DST = -100,
NF_IP_PRI_FILTER = 0,
NF_IP_PRI_SECURITY = 50,
NF_IP_PRI_NAT_SRC = 100,
NF_IP_PRI_SELINUX_LAST = 225,
NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
NF_IP_PRI_LAST = INT_MAX,
};
- nf_register_hooks
- net/netfilter/core.c
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_hook(®[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_hooks(reg, i);
return err;
}
- Loop 3 times for filter table.
- nf_register_hook
- net/netfilter/core.c
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
int err;
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority) // In ascending priority order under one hook point
break;
}
list_add_rcu(®->list, elem->list.prev); // Insert nf_hook_ops to list according to its priority
mutex_unlock(&nf_hook_mutex);
return 0;
}
EXPORT_SYMBOL(nf_register_hook);
- Global list array nf_hooks for all registerred hooks
- Final Data structure(Assume this is the very first table registerred in Netfilter and only consider ipv4)
- When hooks are invoked(Take receiving a IP packet as an example)?
- Receiving a IP packet(net/ipv4/ip_input.c)
/*
* Main IP Receive routine.
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct iphdr *iph;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto out;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
iph = ip_hdr(skb);
/*
* RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
*
* Is the datagram acceptable?
*
* 1. Length at least the size of an ip header
* 2. Version of 4
* 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
* 4. Doesn't have a bogus length
*/
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto inhdr_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
/* Our transport medium may have padded the buffer out. Now we know it
* is IP we can trim to the true length of the frame.
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
/* Remove any debris in the socket control block */
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
inhdr_error:
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
- NF_HOOK
- NF_HOOK(include/linux/netfilter.h)
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
- NF_HOOK_THRESH(include/linux/netfilter.h)
#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
({int __ret; \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
__ret = (okfn)(skb); // In this example, okfn is ip_rcv_finish \
__ret;})
- nf_hook_thresh(include/linux/netfilter.h)
/**
* nf_hook_thresh - call a netfilter hook
*
* Returns 1 if the hook has allowed the packet to pass. The function
* okfn must be invoked by the caller in this case. Any other return
* value indicates the packet has been consumed by the hook.
*/
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh,
int cond)
{
if (!cond)
return 1;
#ifndef CONFIG_NETFILTER_DEBUG
if (list_empty(&nf_hooks[pf][hook]))
return 1;
#endif
return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}
- nf_hook_slow(net/netfilter/core.c)
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
elem = &nf_hooks[pf][hook];
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1; // This means okfn will be invoked, that is ip_rcv_finish in this example
} else if (verdict == NF_DROP) {
kfree_skb(skb);
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
}
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(nf_hook_slow);
- nf_iterate(net/netfilter/core.c)
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
unsigned int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
unsigned int verdict;
/*
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
list_for_each_continue_rcu(*i, head) {
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
if (hook_thresh > elem->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
verdict = elem->hook(hook, skb, indev, outdev, okfn); // Here registerred hook is invoked
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
elem->hook, hook);
continue;
}
#endif
if (verdict != NF_REPEAT)
return verdict;
*i = (*i)->prev; // Repeat this hook
}
}
return NF_ACCEPT;
}
- Dig it more
- Networking Namespaces
- Macros in Linux2.6.30
阅读(2829) | 评论(0) | 转发(4) |