一、网络结构
当PC有两个网卡,W1是192.168.1.10=====连接的路由器IP:192.168.1.1===PC2:168.1.1.10
W2是10.0.0.30====连接的路由器:10.0.0.1=====PC3
PC想要发送数据时,在路由器表中寻找目的地址168.1.1.10,网关:192.168.1.1
二、UDP包数据流程
入口:首先是socket的文件对应的file_operations
-
/*
-
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
-
* in the operation structures but are done directly via the socketcall() multiplexor.
-
*/
-
-
static const struct file_operations socket_file_ops = {
-
.owner = THIS_MODULE,
-
.llseek = no_llseek,
-
.aio_read = sock_aio_read, //读入口
-
.aio_write = sock_aio_write, //写入口
-
.poll = sock_poll,
-
.unlocked_ioctl = sock_ioctl,
-
#ifdef CONFIG_COMPAT
-
.compat_ioctl = compat_sock_ioctl,
-
#endif
-
.mmap = sock_mmap,
-
.open = sock_no_open, /* special open code to disallow open via /proc */
-
.release = sock_close,
-
.fasync = sock_fasync,
-
.sendpage = sock_sendpage,
-
.splice_write = generic_splice_sendpage,
-
.splice_read = sock_splice_read,
-
};
sock_aio_write:
-
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-
unsigned long nr_segs, loff_t pos)
-
{
-
struct sock_iocb siocb, *x;
-
-
if (pos != 0)
-
return -ESPIPE;
-
-
x = alloc_sock_iocb(iocb, &siocb);
-
if (!x)
-
return -ENOMEM;
-
-
return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); //调用do_sock_write
-
}
do_sock_write:
-
static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-
struct file *file, const struct iovec *iov,
-
unsigned long nr_segs)
-
{
-
struct socket *sock = file->private_data;
-
size_t size = 0;
-
int i;
-
-
for (i = 0; i < nr_segs; i++)
-
size += iov[i].iov_len;
-
-
msg->msg_name = NULL;
-
msg->msg_namelen = 0;
-
msg->msg_control = NULL;
-
msg->msg_controllen = 0;
-
msg->msg_iov = (struct iovec *)iov;
-
msg->msg_iovlen = nr_segs;
-
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-
if (sock->type == SOCK_SEQPACKET)
-
msg->msg_flags |= MSG_EOR;
-
-
return __sock_sendmsg(iocb, sock, msg, size); //调用__sock_sendmsg
-
}
__sock_sendmsg:
-
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-
struct msghdr *msg, size_t size)
-
{
-
struct sock_iocb *si = kiocb_to_siocb(iocb);
-
int err;
-
-
si->sock = sock;
-
si->scm = NULL;
-
si->msg = msg;
-
si->size = size;
-
-
err = security_socket_sendmsg(sock, msg, size);
-
if (err)
-
return err;
-
-
return sock->ops->sendmsg(iocb, sock, msg, size); //调用ops->sendmsg函数指针,这个proto_ops可能是应用层socket函数的接口
-
}
到这里是SCI和协议无关层的部分。
协议栈入口:udp_sendmsg
-
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-
size_t len)
-
{
-
struct inet_sock *inet = inet_sk(sk);
-
struct udp_sock *up = udp_sk(sk);
-
int ulen = len;
-
struct ipcm_cookie ipc;
-
struct rtable *rt = NULL;
-
int free = 0;
-
int connected = 0;
-
__be32 daddr, faddr, saddr;
-
__be16 dport;
-
u8 tos;
-
int err, is_udplite = IS_UDPLITE(sk);
-
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
-
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
-
-
if (len > 0xFFFF)
-
return -EMSGSIZE;
-
-
.........................
-
-
security_sk_classify_flow(sk, &fl);
-
err = ip_route_output_flow(net, &rt, &fl, sk, 1); //udp处理的函数(选择路由)
-
if (err) {
-
if (err == -ENETUNREACH)
-
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
-
goto out;
-
}
-
-
err = -EACCES;
-
if ((rt->rt_flags & RTCF_BROADCAST) &&
-
!sock_flag(sk, SOCK_BROADCAST))
-
goto out;
-
if (connected)
-
sk_dst_set(sk, dst_clone(&rt->u.dst));
-
}
-
-
..........................
-
-
do_append_data:
-
up->len += ulen;
-
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
-
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
-
sizeof(struct udphdr), &ipc, &rt,
-
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
-
if (err)
-
udp_flush_pending_frames(sk);
-
else if (!corkreq)
-
err = udp_push_pending_frames(sk); //用于数据传输,该函数为数据包添加UDP包头,校验数据,将数据发送出去。
-
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
-
up->pending = 0;
-
release_sock(sk);
-
-
out:
-
ip_rt_put(rt);
-
if (free)
-
kfree(ipc.opt);
-
if (!err)
-
return len;
-
/*
-
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
-
* ENOBUFS might not be good (it's not tunable per se), but otherwise
-
* we don't have a good statistic (IpOutDiscards but it can be too many
-
* things). We could add another new stat but at least for now that
-
* seems like overkill.
-
*/
-
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-
UDP_INC_STATS_USER(sock_net(sk),
-
UDP_MIB_SNDBUFERRORS, is_udplite);
-
}
-
return err;
-
-
do_confirm:
-
dst_confirm(&rt->u.dst);
-
if (!(msg->msg_flags&MSG_PROBE) || len)
-
goto back_from_confirm;
-
err = 0;
-
goto out;
-
}
dup_push_pending_frames:
-
/*
-
* Push out all pending data as one UDP datagram. Socket is locked.
-
*/
-
static int udp_push_pending_frames(struct sock *sk)
-
{
-
struct udp_sock *up = udp_sk(sk);
-
struct inet_sock *inet = inet_sk(sk);
-
struct flowi *fl = &inet->cork.fl;
-
struct sk_buff *skb;
-
struct udphdr *uh;
-
int err = 0;
-
int is_udplite = IS_UDPLITE(sk);
-
__wsum csum = 0;
-
-
/* Grab the skbuff where UDP header space exists. */
-
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
-
goto out;
-
-
/*
-
* Create a UDP header
-
*/
-
uh = udp_hdr(skb);
-
uh->source = fl->fl_ip_sport;
-
uh->dest = fl->fl_ip_dport;
-
uh->len = htons(up->len);
-
uh->check = 0;
-
-
if (is_udplite) /* UDP-Lite */
-
csum = udplite_csum_outgoing(sk, skb);
-
-
else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
-
-
skb->ip_summed = CHECKSUM_NONE;
-
goto send;
-
-
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
-
-
udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
-
goto send;
-
-
} else /* `normal' UDP */
-
csum = udp_csum_outgoing(sk, skb);
-
-
/* add protocol-dependent pseudo-header */
-
uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
-
sk->sk_protocol, csum );
-
if (uh->check == 0)
-
uh->check = CSUM_MANGLED_0;
-
-
send:
-
err = ip_push_pending_frames(sk); //同时也是ip处理函数的入口
-
out:
-
up->len = 0;
-
up->pending = 0;
-
if (!err)
-
UDP_INC_STATS_USER(sock_net(sk),
-
UDP_MIB_OUTDATAGRAMS, is_udplite);
-
return err;
-
}
三、IP协议关系
该图是《Understanding Linux Network Internals》的原图
* ip包的接收函数调用流程
* ip包接收函数调用流程(kernel-2.6.27)
ip_rcv()
->ip_rcv_finish()
->ip_route_input()
->dst_input()
->ip_local_deliver() //本地数据包,调用传输层协议处理函数处理
->ip_forward() // 非本地包,转发
->ip_mr_input() //多播数据包转发
ip_push_pending_frames:
-
/*
-
* Combined all pending IP fragments on the socket as one IP datagram
-
* and push them out.
-
*/
-
int ip_push_pending_frames(struct sock *sk)
-
{
-
..........
-
-
-
/* Netfilter gets whole the not fragmented skb. */
-
err = ip_local_out(skb); //这里调用了ip_local_out
-
if (err) {
-
if (err > 0)
-
err = inet->recverr ? net_xmit_errno(err) : 0;
-
if (err)
-
goto error;
-
}
-
-
out:
-
ip_cork_release(inet);
-
return err;
-
-
error:
-
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
-
goto out;
-
}
ip_local_out:
-
int ip_local_out(struct sk_buff *skb)
-
{
-
int err;
-
-
err = __ip_local_out(skb); //继续调用__ip_local_out
-
if (likely(err == 1))
-
err = dst_output(skb); //这里调用dst_output
-
-
return err;
-
}
_dst_output:
-
static inline int dst_output(struct sk_buff *skb)
-
{
-
return skb->dst->output(skb); //调用sk_buff里的dst函数中的output函数(ip_finish_output)
-
}
ip_finish_output:
-
static int ip_finish_output(struct sk_buff *skb)
-
{
-
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
-
/* Policy lookup after SNAT yielded a new policy */
-
if (skb->dst->xfrm != NULL) {
-
IPCB(skb)->flags |= IPSKB_REROUTED;
-
return dst_output(skb);
-
}
-
#endif
-
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
-
return ip_fragment(skb, ip_finish_output2);
-
else
-
return ip_finish_output2(skb); //调用ip_finish_output2
-
}
ip_finish_output2:
-
static inline int ip_finish_output2(struct sk_buff *skb)
-
{
-
struct dst_entry *dst = skb->dst;
-
struct rtable *rt = (struct rtable *)dst;
-
struct net_device *dev = dst->dev;
-
unsigned int hh_len = LL_RESERVED_SPACE(dev);
-
-
if (rt->rt_type == RTN_MULTICAST)
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
-
else if (rt->rt_type == RTN_BROADCAST)
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
-
-
/* Be paranoid, rather than too clever. */
-
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-
struct sk_buff *skb2;
-
-
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-
if (skb2 == NULL) {
-
kfree_skb(skb);
-
return -ENOMEM;
-
}
-
if (skb->sk)
-
skb_set_owner_w(skb2, skb->sk);
-
kfree_skb(skb);
-
skb = skb2;
-
}
-
-
if (dst->hh) //判断路由是否有信息
-
return neigh_hh_output(dst->hh, skb);
-
else if (dst->neighbour) //如果没有,建立邻居子系统
-
return dst->neighbour->output(skb); //neighbour->output起始对应arp_generic_ops下面的neigh_resolve_output
-
-
if (net_ratelimit())
-
printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
-
kfree_skb(skb);
-
return -EINVAL;
-
}
arp_generic_ops:
-
static struct neigh_ops arp_generic_ops = {
-
.family = AF_INET,
-
.solicit = arp_solicit,
-
.error_report = arp_error_report,
-
.output = neigh_resolve_output, //neigh_resolve_output在这里
-
.connected_output = neigh_connected_output,
-
.hh_output = dev_queue_xmit,
-
.queue_xmit = dev_queue_xmit,
-
};
neigh_resolve_output:
-
/* Slow and careful. */
-
-
int neigh_resolve_output(struct sk_buff *skb)
-
{
-
struct dst_entry *dst = skb->dst;
-
struct neighbour *neigh;
-
int rc = 0;
-
-
if (!dst || !(neigh = dst->neighbour))
-
goto discard;
-
-
__skb_pull(skb, skb_network_offset(skb));
-
-
if (!neigh_event_send(neigh, skb)) {
-
int err;
-
struct net_device *dev = neigh->dev;
-
if (dev->header_ops->cache && !dst->hh) {
-
write_lock_bh(&neigh->lock);
-
if (!dst->hh)
-
neigh_hh_init(neigh, dst, dst->ops->protocol);
-
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-
neigh->ha, NULL, skb->len);
-
write_unlock_bh(&neigh->lock);
-
} else {
-
read_lock_bh(&neigh->lock);
-
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-
neigh->ha, NULL, skb->len);
-
read_unlock_bh(&neigh->lock);
-
}
-
if (err >= 0)
-
rc = neigh->ops->queue_xmit(skb); //这里调用queue_xmit函数
-
else
-
goto out_kfree_skb;
-
}
-
out:
-
return rc;
-
discard:
-
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
-
dst, dst ? dst->neighbour : NULL);
-
out_kfree_skb:
-
rc = -EINVAL;
-
kfree_skb(skb);
-
goto out;
-
}
到此协议栈就结束了。
设备无关接口:dev_queue_xmit:
-
-
int dev_queue_xmit(struct sk_buff *skb)
-
{
-
struct net_device *dev = skb->dev;
-
struct netdev_queue *txq;
-
struct Qdisc *q;
-
int rc = -ENOMEM;
-
-
.............
-
-
if (dev->flags & IFF_UP) {
-
int cpu = smp_processor_id(); /* ok because BHs are off */
-
-
if (txq->xmit_lock_owner != cpu) {
-
-
HARD_TX_LOCK(dev, txq, cpu);
-
-
if (!netif_tx_queue_stopped(txq)) {
-
rc = 0;
-
if (!dev_hard_start_xmit(skb, dev, txq)) { //这里调用dev_hard_start_xmit
-
HARD_TX_UNLOCK(dev, txq);
-
goto out;
-
}
-
}
-
HARD_TX_UNLOCK(dev, txq);
-
if (net_ratelimit())
-
printk(KERN_CRIT "Virtual device %s asks to "
-
"queue packet!\n", dev->name);
-
} else {
-
/* Recursion is It is possible,
-
* unfortunately */
-
if (net_ratelimit())
-
printk(KERN_CRIT "Dead loop on virtual device "
-
"%s, fix it urgently!\n", dev->name);
-
}
-
}
-
-
rc = -ENETDOWN;
-
rcu_read_unlock_bh();
-
-
out_kfree_skb:
-
kfree_skb(skb);
-
return rc;
-
out:
-
rcu_read_unlock_bh();
-
return rc;
-
}
dev_hard_start_xmit:
-
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
const struct net_device_ops *ops = dev->netdev_ops; //赋值了net_dev_ops操作函数集
-
int rc;
-
-
if (likely(!skb->next)) {
-
if (!list_empty(&ptype_all))
-
dev_queue_xmit_nit(skb, dev);
-
-
if (netif_needs_gso(dev, skb)) {
-
if (unlikely(dev_gso_segment(skb)))
-
goto out_kfree_skb;
-
if (skb->next)
-
goto gso;
-
}
-
-
rc = ops->ndo_start_xmit(skb, dev); //调用了ndo_start_xmit函数指针,调用了驱动函数
-
/*
-
* TODO: if skb_orphan() was called by
-
* dev->hard_start_xmit() (for example, the unmodified
-
* igb driver does that; bnx2 doesn't), then
-
* skb_tx_software_timestamp() will be unable to send
-
* back the time stamp.
-
*
-
* How can this be prevented? Always create another
-
* reference to the socket before calling
-
* dev->hard_start_xmit()? Prevent that skb_orphan()
-
* does anything in dev->hard_start_xmit() by clearing
-
* the skb destructor before the call and restoring it
-
* afterwards, then doing the skb_orphan() ourselves?
-
*/
-
return rc;
-
}
-
-
gso:
-
do {
-
struct sk_buff *nskb = skb->next;
-
-
skb->next = nskb->next;
-
nskb->next = NULL;
-
rc = ops->ndo_start_xmit(nskb, dev);
-
if (unlikely(rc)) {
-
nskb->next = skb->next;
-
skb->next = nskb;
-
return rc;
-
}
-
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
-
return NETDEV_TX_BUSY;
-
} while (skb->next);
-
-
skb->destructor = DEV_GSO_CB(skb)->destructor;
-
-
out_kfree_skb:
-
kfree_skb(skb);
-
return 0;
-
}
用户程序recvmsg接受网络包,
驱动则使用中断:①接收:从硬件中读取数据放到skb结构中,netif_rx();
netif_rx软中断,在设备无关接口中net_rx_action来处理(dev.c):
-
/*
-
* This is called single threaded during boot, so no need
-
* to take the rtnl semaphore.
-
*/
-
static int __init net_dev_init(void)
-
{
-
int i, rc = -ENOMEM;
-
-
BUG_ON(!dev_boot_phase);
-
-
if (dev_proc_init())
-
goto out;
-
-
if (netdev_kobject_init())
-
goto out;
-
-
INIT_LIST_HEAD(&ptype_all);
-
for (i = 0; i < PTYPE_HASH_SIZE; i++)
-
INIT_LIST_HEAD(&ptype_base[i]);
-
-
if (register_pernet_subsys(&netdev_net_ops))
-
goto out;
-
-
/*
-
* Initialise the packet receive queues.
-
*/
-
-
for_each_possible_cpu(i) {
-
struct softnet_data *queue;
-
-
queue = &per_cpu(softnet_data, i);
-
skb_queue_head_init(&queue->input_pkt_queue);
-
queue->completion_queue = NULL;
-
INIT_LIST_HEAD(&queue->poll_list);
-
-
queue->backlog.poll = process_backlog;
-
queue->backlog.weight = weight_p;
-
queue->backlog.gro_list = NULL;
-
queue->backlog.gro_count = 0;
-
}
-
-
dev_boot_phase = 0;
-
-
/* The loopback device is special if any other network devices
-
* is present in a network namespace the loopback device must
-
* be present. Since we now dynamically allocate and free the
-
* loopback device ensure this invariant is maintained by
-
* keeping the loopback device as the first device on the
-
* list of network devices. Ensuring the loopback devices
-
* is the first device that appears and the last network device
-
* that disappears.
-
*/
-
if (register_pernet_device(&loopback_net_ops))
-
goto out;
-
-
if (register_pernet_device(&default_device_ops))
-
goto out;
-
-
open_softirq(NET_TX_SOFTIRQ, net_tx_action); //这里在调用net_rx_action
-
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
-
-
hotcpu_notifier(dev_cpu_callback, 0);
-
dst_init();
-
dev_mcast_init();
-
rc = 0;
-
out:
-
return rc;
-
}
netif_receive_skb:
-
int netif_receive_skb(struct sk_buff *skb)
-
{
-
struct packet_type *ptype, *pt_prev;
-
struct net_device *orig_dev;
-
struct net_device *null_or_orig;
-
int ret = NET_RX_DROP;
-
__be16 type;
-
-
if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
-
return NET_RX_SUCCESS;
-
-
/* if we've gotten here through NAPI, check netpoll */
-
if (netpoll_receive_skb(skb))
-
return NET_RX_DROP;
-
-
if (!skb->tstamp.tv64)
-
net_timestamp(skb);
-
-
if (!skb->iif)
-
skb->iif = skb->dev->ifindex;
-
-
null_or_orig = NULL;
-
orig_dev = skb->dev;
-
if (orig_dev->master) {
-
if (skb_bond_should_drop(skb))
-
null_or_orig = orig_dev; /* deliver only exact match */
-
else
-
skb->dev = orig_dev->master;
-
}
-
-
__get_cpu_var(netdev_rx_stat).total++;
-
-
skb_reset_network_header(skb);
-
skb_reset_transport_header(skb);
-
skb->mac_len = skb->network_header - skb->mac_header;
-
-
pt_prev = NULL;
-
-
rcu_read_lock();
-
-
#ifdef CONFIG_NET_CLS_ACT
-
if (skb->tc_verd & TC_NCLS) {
-
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
-
goto ncls;
-
}
-
#endif
-
-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
-
if (ptype->dev == null_or_orig || ptype->dev == skb->dev || //判断协议的类型
-
ptype->dev == orig_dev) {
-
if (pt_prev)
-
ret = deliver_skb(skb, pt_prev, orig_dev); //调用deliver_skb
-
pt_prev = ptype;
-
}
-
}
-
-
#ifdef CONFIG_NET_CLS_ACT
-
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
ncls:
-
#endif
-
-
skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
-
skb_orphan(skb);
-
-
type = skb->protocol;
-
list_for_each_entry_rcu(ptype,
-
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-
if (ptype->type == type &&
-
(ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-
ptype->dev == orig_dev)) {
-
if (pt_prev)
-
ret = deliver_skb(skb, pt_prev, orig_dev);
-
pt_prev = ptype;
-
}
-
}
-
-
if (pt_prev) {
-
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-
} else {
-
kfree_skb(skb);
-
/* Jamal, now you will not able to escape explaining
-
* me how you were going to use this. :-)
-
*/
-
ret = NET_RX_DROP;
-
}
-
-
out:
-
rcu_read_unlock();
-
return ret;
-
}
deliver_skb:
-
static inline int deliver_skb(struct sk_buff *skb,
-
struct packet_type *pt_prev,
-
struct net_device *orig_dev)
-
{
-
atomic_inc(&skb->users);
-
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); //调用func函数
-
}
如果是ip包,会交给网络层处理,ip协议栈的入口函数(ip_rcv):
-
/*
-
* Main IP Receive routine.
-
*/
-
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-
{
-
struct iphdr *iph;
-
u32 len;
-
-
/* When the interface is in promisc. mode, drop all the crap
-
* that it receives, do not try to analyse it.
-
*/
-
if (skb->pkt_type == PACKET_OTHERHOST)
-
goto drop;
-
-
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
-
-
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
-
goto out;
-
}
-
-
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-
goto inhdr_error;
-
-
iph = ip_hdr(skb);
-
-
/*
-
* RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
-
*
-
* Is the datagram acceptable?
-
*
-
* 1. Length at least the size of an ip header
-
* 2. Version of 4
-
* 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
-
* 4. Doesn't have a bogus length
-
*/
-
-
if (iph->ihl < 5 || iph->version != 4)
-
goto inhdr_error;
-
-
if (!pskb_may_pull(skb, iph->ihl*4))
-
goto inhdr_error;
-
-
iph = ip_hdr(skb);
-
-
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-
goto inhdr_error;
-
-
len = ntohs(iph->tot_len);
-
if (skb->len < len) {
-
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
-
goto drop;
-
} else if (len < (iph->ihl*4))
-
goto inhdr_error;
-
-
/* Our transport medium may have padded the buffer out. Now we know it
-
* is IP we can trim to the true length of the frame.
-
* Note this now means skb->len holds ntohs(iph->tot_len).
-
*/
-
if (pskb_trim_rcsum(skb, len)) {
-
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
-
goto drop;
-
}
-
-
/* Remove any debris in the socket control block */
-
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-
-
return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
-
ip_rcv_finish);
-
-
inhdr_error:
-
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
-
drop:
-
kfree_skb(skb);
-
out:
-
return NET_RX_DROP;
-
}
阅读(687) | 评论(0) | 转发(0) |