SOCK_RAW给了用户更大的主动性,可以自己构造L4甚至L3的头,直接和内核进行交互,略过上层的协议栈,如ping命令:
-
socket(PF_INET, SOCK_RAW, IPPROTO_ICMP) = 3
看下面这个例子:
-
int sockfd;
-
struct sockaddr_in addr;
-
char sendbuf[2048];
-
char recvbuf[2048];
-
int datalen = 56;
-
-
unsigned short my_cksum(unsigned short *data, int len) { //校验算法
-
int result = 0;
-
int i;
-
for(i=0; i<len/2; i++) {
-
result += *data;
-
data++;
-
}
-
while(result >> 16)result = (result&0xffff) + (result>>16);
-
return ~result;
-
}
-
void send_icmp() {
-
struct icmp* icmp = (struct icmp*)sendbuf;
-
int len = 8+datalen;//长度为L4的头加上负载
-
icmp->icmp_type = ICMP_ECHO;
-
icmp->icmp_code = 0;
-
icmp->icmp_cksum = my_cksum((unsigned short*)icmp, len); //校验值,如果校验值错误的话,不影响发送,但是不会有响应报文
-
int retval = sendto(sockfd, sendbuf, len, 0, (struct sockaddr*)&addr,sizeof(addr));
-
printf("send %d\n",retval);
-
}
-
void recv_icmp() {
-
for(;;) {
-
int len = recvfrom(sockfd, recvbuf, sizeof(recvbuf), 0, 0, 0);
-
printf("recv len=%d\n",len); //接收到的数据为L3+L4+负载
-
}
-
}
-
-
int main(int argc, char **argv)
-
{
-
-
int ret;
-
sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
-
-
memset(&addr, 0, sizeof(addr));
-
addr.sin_family = AF_INET;
-
ret=inet_pton(AF_INET, argv[1], &addr.sin_addr); //初始化目的地址
-
-
send_icmp();
-
recv_icmp();
-
return 0;
-
}
运行一次输出结果如下:
-
root@pavel:/home/pavel# ./3 192.168.1.1
-
send 64
-
recv len=84
tcpdump抓的数据如下:
-
21:42:22.501278 IP 192.168.1.105 > 192.168.1.1: ICMP echo request, id 0, seq 0, length 64
-
0x0000: 0023 cd5b ead6 8056 f2db 2f7b 0800 4500
-
0x0010: 0054 18a5 4000 4001 9e49 c0a8 0169 c0a8
-
0x0020: 0101 0800 f7ff 0000 0000 0000 0000 0000
-
0x0030: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0040: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0050: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0060: 0000
-
21:42:22.731070 IP 192.168.1.1 > 192.168.1.105: ICMP echo reply, id 0, seq 0, length 64
-
0x0000: 8056 f2db 2f7b 0023 cd5b ead6 0800 4500
-
0x0010: 0054 8460 4000 4001 328e c0a8 0101 c0a8
-
0x0020: 0169 0000 ffff 0000 0000 0000 0000 0000
-
0x0030: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0040: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0050: 0000 0000 0000 0000 0000 0000 0000 0000
-
0x0060: 0000
可以看到上述代码基本完成了类似ping的功能
SOCK_RAW除了用户可以构造L4的头,甚至L3的头,还可以用开监听接收报文。同样的刚才的那个程序,如果单独运行的话,只发送一次,接收打印一次,然后就等待,如果此时在开另外一个窗口运行:ping 192.168.1.1的话,刚才的程序同样可以接收到响应这个ping命令的报文。
这个主要是在函数ip_local_deliver_finish中实现的,该函数在调用真正的L4层注册的协议前调用了函数raw_local_deliver:
-
int raw_local_deliver(struct sk_buff *skb, int protocol)
-
{
-
int hash;
-
struct sock *raw_sk;
-
-
hash = protocol & (RAW_HTABLE_SIZE - 1);
-
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
-
-
/* If there maybe a raw socket we must check - if not we
-
* don't care less
-
*/
-
if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
-
raw_sk = NULL;
-
-
return raw_sk != NULL;
-
-
}
-
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
-
{
-
struct sock *sk;
-
struct hlist_head *head;
-
int delivered = 0;
-
struct net *net;
-
-
read_lock(&raw_v4_hashinfo.lock);
-
head = &raw_v4_hashinfo.ht[hash];
-
if (hlist_empty(head))
-
goto out;
-
-
net = dev_net(skb->dev);
-
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
-
iph->saddr, iph->daddr,
-
skb->dev->ifindex);
-
-
while (sk) {
-
delivered = 1;
-
if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
-
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
-
-
/* Not releasing hash */
-
if (clone)
-
raw_rcv(sk, clone);
-
}
-
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, //循环调用,直到遍历完,因此如果多开几个测试程序,然后运行ping命令的话
-
iph->saddr, iph->daddr, //所有的测试程序都能接收到reply报文
-
skb->dev->ifindex);
-
}
-
out:
-
read_unlock(&raw_v4_hashinfo.lock);
-
return delivered;
-
}
符合条件的每一个sock都会调用raw_rcv:
-
int raw_rcv(struct sock *sk, struct sk_buff *skb)
-
{
-
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
-
atomic_inc(&sk->sk_drops);
-
kfree_skb(skb);
-
return NET_RX_DROP;
-
}
-
nf_reset(skb);
-
-
skb_push(skb, skb->data - skb_network_header(skb));//从这里可以看到接收到的报文是包含L3的头的,但是不包含L2的头
-
//要获取L2的头的话,需要使用PF_PACKET
-
raw_rcv_skb(sk, skb);
-
return 0;
-
}
系统中raw相关的信息可以通过proc查到,下面这个是开了两个测试程序以及一个ping命令后的结果:
-
pavel@pavel:~$ cat /proc/net/raw
-
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops
-
1: 00000000:0001 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 200182 2 0000000000000000 0
-
1: 00000000:0001 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 190496 2 0000000000000000 0
-
1: 00000000:0001 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 180978 2 0000000000000000 0
SOCK_RAW同样可以从驱动层直接获取报文,因此可以用于网络包的分析,如tcpdump命令,不过需要和PF_PACKET一起:
-
socket(PF_PACKET, SOCK_RAW, 768) = 3 //ETH_P_ALL
先看一下tcpdump相关的内容,看tcpdump是如何实现的,试着写一个类似的程序:
PF_PACKET的初始化如下:
-
static int __init packet_init(void)
-
{
-
int rc = proto_register(&packet_proto, 0);
-
-
if (rc != 0)
-
goto out;
-
-
sock_register(&packet_family_ops);
-
register_pernet_subsys(&packet_net_ops);
-
register_netdevice_notifier(&packet_netdev_notifier);
-
out:
-
return rc;
-
}
相应的数据结构:
-
static struct proto packet_proto = {
-
.name = "PACKET",
-
.owner = THIS_MODULE,
-
.obj_size = sizeof(struct packet_sock),
-
};
-
static const struct net_proto_family packet_family_ops = {
-
.family = PF_PACKET,
-
.create = packet_create,
-
.owner = THIS_MODULE,
-
};
-
static struct pernet_operations packet_net_ops = {
-
.init = packet_net_init,
-
.exit = packet_net_exit,
-
};
-
static const struct proto_ops packet_ops = {
-
.family = PF_PACKET,
-
.owner = THIS_MODULE,
-
.release = packet_release,
-
.bind = packet_bind,
-
.connect = sock_no_connect,
-
.socketpair = sock_no_socketpair,
-
.accept = sock_no_accept,
-
.getname = packet_getname,
-
.poll = packet_poll,
-
.ioctl = packet_ioctl,
-
.listen = sock_no_listen,
-
.shutdown = sock_no_shutdown,
-
.setsockopt = packet_setsockopt,
-
.getsockopt = packet_getsockopt,
-
.sendmsg = packet_sendmsg,
-
.recvmsg = packet_recvmsg,
-
.mmap = packet_mmap,
-
.sendpage = sock_no_sendpage,
-
};
socket系统调用分配struct socket数据结构,赋值分量:
-
sock->type = type; //SOCK_RAW
然后调用packet_create,初始化相应的分量:
-
static int packet_create(struct net *net, struct socket *sock, int protocol,
-
int kern)
-
{
-
struct sock *sk;
-
struct packet_sock *po;
-
__be16 proto = (__force __be16)protocol; /* weird, but documented */ //ETH_P_ALL
-
int err;
-
-
if (!capable(CAP_NET_RAW))
-
return -EPERM;
-
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
-
sock->type != SOCK_PACKET)
-
return -ESOCKTNOSUPPORT;
-
-
sock->state = SS_UNCONNECTED;
-
-
err = -ENOBUFS;
-
sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
-
if (sk == NULL)
-
goto out;
-
-
sock->ops = &packet_ops;
-
if (sock->type == SOCK_PACKET) //SOCK_RAW
-
sock->ops = &packet_ops_spkt;
-
-
sock_init_data(sock, sk);
-
-
po = pkt_sk(sk);
-
sk->sk_family = PF_PACKET;
-
po->num = proto;
-
-
sk->sk_destruct = packet_sock_destruct;
-
sk_refcnt_debug_inc(sk);
-
-
/*
-
* Attach a protocol block
-
*/
-
-
spin_lock_init(&po->bind_lock);
-
mutex_init(&po->pg_vec_lock);
-
-
po->prot_hook.func = packet_rcv;
-
-
if (sock->type == SOCK_PACKET)
-
po->prot_hook.func = packet_rcv_spkt;
-
-
po->prot_hook.af_packet_priv = sk;
-
-
if (proto) {
-
po->prot_hook.type = proto;
-
register_prot_hook(sk); //注册当前packet_type到ptype_all链表中,这样接收的时候可以触发相应的接收函数
-
}
-
-
spin_lock_bh(&net->packet.sklist_lock);
-
sk_add_node_rcu(sk, &net->packet.sklist);
-
sock_prot_inuse_add(net, &packet_proto, 1);
-
spin_unlock_bh(&net->packet.sklist_lock);
-
-
return 0;
-
out:
-
return err;
-
}
利用上述的socket类型,很容易获取到一个完整的数据包:
-
int main(int argc, char **argv) {
-
int sock, n;
-
char buffer[LENGTH];
-
unsigned char *iphead, *ethhead;
-
-
if ( (sock=socket(PF_PACKET, SOCK_RAW,htons(ETH_P_ALL)))<0) {
-
perror("socket");
-
return -1;
-
}
-
-
while (1) {
-
int i;
-
n = recvfrom(sock,buffer,LENGTH,0,NULL,NULL);
-
printf("%d bytes read \n",n);
-
for(i=0;i<n;i++)
-
printf("%x ",buffer[i]);
-
printf("\n");
-
}
-
}
其中一次的接收数据如下:
-
98 bytes read //64+L3+L2
-
ffffff80 56 fffffff2 ffffffdb 2f 7b 0 23 ffffffcd 5b ffffffea ffffffd6 8 0 45 0 0 54 78 ffffffea 40 0 40 1 3e 4 ffffffc0 ffffffa8 1 1 ffffffc0 ffffffa8 1 69 0 0 3b 33 24 30 0 1 40 38 52 53 0 0 0 0 4d 3d 2 0 0 0 0 0 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 30 31 32 33 34 35 36 37
对应的tcpdump输出如下:
-
16:48:00.177672 IP 192.168.1.1 > 192.168.1.105: ICMP echo reply, id 9264, seq 1, length 64
-
0x0000: 8056 f2db 2f7b 0023 cd5b ead6 0800 4500
-
0x0010: 0054 78ea 4000 4001 3e04 c0a8 0101 c0a8
-
0x0020: 0169 0000 3b33 2430 0001 4038 5253 0000
-
0x0030: 0000 4d3d 0200 0000 0000 1011 1213 1415
-
0x0040: 1617 1819 1a1b 1c1d 1e1f 2021 2223 2425
-
0x0050: 2627 2829 2a2b 2c2d 2e2f 3031 3233 3435
-
0x0060: 3637
可以看到两者完全对应。
我们知道在函数__netif_receive_skb中会把根据收到的包的类型,调用不同的L3注册的函数,其中涉及ptype_all和ptype_base两个数据结构。
默认情况下如下:
-
Type Device Function
-
0800 ip_rcv
-
0011 llc_rcv [llc]
-
0004 llc_rcv [llc]
-
0806 arp_rcv
-
86dd ipv6_rcv
运行测试程序如下:
-
Type Device Function
-
ALL packet_rcv //增加了这一样,意味着所有的包都会增加调用这个函数的一步,register_prot_hook函数完成
-
0800 ip_rcv
-
0011 llc_rcv [llc]
-
0004 llc_rcv [llc]
-
0806 arp_rcv
-
86dd ipv6_rcv
tcpdump运行后的输出如下:
-
Type Device Function
-
ALL eth0 tpacket_rcv //tcpdump增加了PACKET_RX_RING的设置
-
0800 ip_rcv
-
0011 llc_rcv [llc]
-
0004 llc_rcv [llc]
-
0806 arp_rcv
-
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
-
struct packet_type *pt, struct net_device *orig_dev)
-
{
-
struct sock *sk;
-
struct sockaddr_ll *sll;
-
struct packet_sock *po;
-
u8 *skb_head = skb->data;
-
int skb_len = skb->len;
-
unsigned int snaplen, res;
-
-
if (skb->pkt_type == PACKET_LOOPBACK)
-
goto drop;
-
-
sk = pt->af_packet_priv; //packet_create的时候赋值
-
po = pkt_sk(sk);
-
-
if (!net_eq(dev_net(dev), sock_net(sk)))
-
goto drop;
-
-
skb->dev = dev;
-
-
if (dev->header_ops) {
-
/* The device has an explicit notion of ll header,
-
* exported to higher levels.
-
*
-
* Otherwise, the device hides details of its frame
-
* structure, so that corresponding packet head is
-
* never delivered to user.
-
*/
-
if (sk->sk_type != SOCK_DGRAM) //L2头在驱动程序中pull了,因此这里push,使得用于可以接收到L2头的数据
-
skb_push(skb, skb->data - skb_mac_header(skb));
-
else if (skb->pkt_type == PACKET_OUTGOING) {
-
/* Special case: outgoing packets have ll header at head */
-
skb_pull(skb, skb_network_offset(skb));
-
}
-
}
-
-
snaplen = skb->len;
-
-
res = run_filter(skb, sk, snaplen);
-
if (!res)
-
goto drop_n_restore;
-
if (snaplen > res)
-
snaplen = res;
-
-
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-
(unsigned)sk->sk_rcvbuf)
-
goto drop_n_acct;
-
-
if (skb_shared(skb)) {
-
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
if (nskb == NULL)
-
goto drop_n_acct;
-
-
if (skb_head != skb->data) {
-
skb->data = skb_head;
-
skb->len = skb_len;
-
}
-
kfree_skb(skb);
-
skb = nskb;
-
}
-
-
BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
-
sizeof(skb->cb));
-
-
sll = &PACKET_SKB_CB(skb)->sa.ll;
-
sll->sll_family = AF_PACKET;
-
sll->sll_hatype = dev->type;
-
sll->sll_protocol = skb->protocol;
-
sll->sll_pkttype = skb->pkt_type;
-
if (unlikely(po->origdev))
-
sll->sll_ifindex = orig_dev->ifindex;
-
else
-
sll->sll_ifindex = dev->ifindex;
-
-
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
-
-
PACKET_SKB_CB(skb)->origlen = skb->len;
-
-
if (pskb_trim(skb, snaplen))
-
goto drop_n_acct;
-
-
skb_set_owner_r(skb, sk);
-
skb->dev = NULL;
-
skb_dst_drop(skb);
-
-
/* drop conntrack reference */
-
nf_reset(skb);
-
-
spin_lock(&sk->sk_receive_queue.lock);
-
po->stats.tp_packets++;
-
skb->dropcount = atomic_read(&sk->sk_drops);
-
__skb_queue_tail(&sk->sk_receive_queue, skb); //和其他socket类型不同是不处理L3以及L4的头,直接把原始数据传给用户态
-
spin_unlock(&sk->sk_receive_queue.lock);
-
sk->sk_data_ready(sk, skb->len);
-
return 0;
-
-
drop_n_acct:
-
spin_lock(&sk->sk_receive_queue.lock);
-
po->stats.tp_drops++;
-
atomic_inc(&sk->sk_drops);
-
spin_unlock(&sk->sk_receive_queue.lock);
-
-
drop_n_restore:
-
if (skb_head != skb->data && skb_shared(skb)) {
-
skb->data = skb_head;
-
skb->len = skb_len;
-
}
-
drop:
-
consume_skb(skb);
-
return 0;
-
}
总结:
1)tcpdump的原理是利用PF_PACKET类型的socket类型,在
ptype_all链表中增加处理函数,使得所有进来的包都能捕捉到
2)
PF_PACKET对应的收包函数不处理L3和L4的头,因此用户可以得到完全原始的数据,包括L2、L3以及L4的头
混杂模式,默认情况下网卡只是接收发给自己或者组播、广播的报文,对于发给其他MAC地址的报文,硬件上就直接丢弃了。为了更好的监听网络信息,需要获取这些报文,这个可以通过把网卡设成混杂模式实现。
一般运行tcpdump后会打印:device eth* entered promiscuous mode
用户态程序通过ioctl可以实现:
-
struct ifreq ethreq;
-
strncpy(ethreq.ifr_name,"eth1",IFNAMSIZ);
-
if (ioctl(sock,SIOCGIFFLAGS,ðreq)==-1) {
-
perror("ioctl");
-
close(sock);
-
return -1;
-
}
-
ethreq.ifr_flags |= IFF_PROMISC;
-
if (ioctl(sock,SIOCSIFFLAGS,ðreq)==-1) {
-
perror("ioctl");
-
close(sock);
-
return -1;
-
}
内核的调用流程如下:
-
[ 144.165484] [<ffffffff815e0af7>] __dev_set_rx_mode+0x57/0xa0
-
[ 144.165486] [<ffffffff815e0b66>] dev_set_rx_mode+0x26/0x40
-
[ 144.165489] [<ffffffff815e0d27>] dev_set_promiscuity+0x37/0x50
-
[ 144.165491] [<ffffffff815e0f01>] __dev_change_flags+0xd1/0x170
-
[ 144.165494] [<ffffffff815e104d>] dev_change_flags+0x1d/0x60
-
[ 144.165499] [<ffffffff81644463>] devinet_ioctl+0x693/0x790
-
[ 144.165502] [<ffffffff8164523d>] inet_ioctl+0x6d/0xa0
-
[ 144.165504] [<ffffffff8169c6f1>] packet_ioctl+0xc1/0x150
-
[ 144.165508] [<ffffffff815c4ff5>] sock_do_ioctl+0x25/0x50
-
[ 144.165510] [<ffffffff815c5438>] sock_ioctl+0x1c8/0x280
-
[ 144.165513] [<ffffffff811b75c5>] do_vfs_ioctl+0x2e5/0x4d0
__dev_set_rx_mode会调用驱动的ndo_set_rx_mode或者ndo_set_multicast_list函数,r8169驱动注册的钩子函数如下:
-
static void rtl_set_rx_mode(struct net_device *dev)
-
{
-
struct rtl8169_private *tp = netdev_priv(dev);
-
void __iomem *ioaddr = tp->mmio_addr;
-
unsigned long flags;
-
u32 mc_filter[2]; /* Multicast hash filter */
-
int rx_mode;
-
u32 tmp = 0;
-
-
if (dev->flags & IFF_PROMISC) {
-
/* Unconditionally log net taps. */
-
netif_notice(tp, link, dev, "Promiscuous mode enabled\n");
-
rx_mode =
-
AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
-
AcceptAllPhys; //混杂模式下接收所有报文
-
mc_filter[1] = mc_filter[0] = 0xffffffff;
-
} else if ((netdev_mc_count(dev) > multicast_filter_limit) ||
-
(dev->flags & IFF_ALLMULTI)) {
-
/* Too many to filter perfectly -- accept all multicasts. */
-
rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
-
mc_filter[1] = mc_filter[0] = 0xffffffff;
-
} else {
-
struct netdev_hw_addr *ha;
-
-
rx_mode = AcceptBroadcast | AcceptMyPhys;
-
mc_filter[1] = mc_filter[0] = 0;
-
netdev_for_each_mc_addr(ha, dev) {
-
int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
-
mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
-
rx_mode |= AcceptMulticast;
-
}
-
}
-
-
spin_lock_irqsave(&tp->lock, flags);
-
-
tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
-
-
if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
-
u32 data = mc_filter[0];
-
-
mc_filter[0] = swab32(mc_filter[1]);
-
mc_filter[1] = swab32(data);
-
}
-
RTL_W32(MAR0 + 4, mc_filter[1]);
-
RTL_W32(MAR0 + 0, mc_filter[0]);
-
-
RTL_W32(RxConfig, tmp);
-
-
spin_unlock_irqrestore(&tp->lock, flags);
-
}
其中变量定义如下:
-
AcceptErr = 0x20,
-
AcceptRunt = 0x10,
-
AcceptBroadcast = 0x08,
-
AcceptMulticast = 0x04,
-
AcceptMyPhys = 0x02,
-
AcceptAllPhys = 0x01,
对照芯片手册,比较清楚的可以知道上述的含义,就是写相关的硬件寄存器,让其接收所有的报文。
看一下发送的数据包是如何捕捉到的:
-
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
const struct net_device_ops *ops = dev->netdev_ops;
-
int rc = NETDEV_TX_OK;
-
unsigned int skb_len;
-
-
if (likely(!skb->next)) {
-
u32 features;
-
-
/*
-
* If device doesn't need skb->dst, release it right now while
-
* its hot in this cpu cache
-
*/
-
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-
skb_dst_drop(skb);
-
-
if (!list_empty(&ptype_all))
-
dev_queue_xmit_nit(skb, dev); //调用钩子函数
-
...
-
}
-
static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
-
{
-
struct packet_type *ptype;
-
struct sk_buff *skb2 = NULL;
-
struct packet_type *pt_prev = NULL;
-
-
rcu_read_lock();
-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
-
/* Never send packets back to the socket
-
* they originated from - MvS (miquels@drinkel.ow.org)
-
*/
-
if ((ptype->dev == dev || !ptype->dev) &&
-
(ptype->af_packet_priv == NULL ||
-
(struct sock *)ptype->af_packet_priv != skb->sk)) {
-
if (pt_prev) {
-
deliver_skb(skb2, pt_prev, skb->dev);
-
pt_prev = ptype;
-
continue;
-
}
-
-
skb2 = skb_clone(skb, GFP_ATOMIC);
-
if (!skb2)
-
break;
-
-
net_timestamp_set(skb2);
-
-
/* skb->nh should be correctly
-
set by sender, so that the second statement is
-
just protection against buggy protocols.
-
*/
-
skb_reset_mac_header(skb2);
-
-
if (skb_network_header(skb2) < skb2->data ||
-
skb2->network_header > skb2->tail) {
-
if (net_ratelimit())
-
printk(KERN_CRIT "protocol %04x is "
-
"buggy, dev %s\n",
-
ntohs(skb2->protocol),
-
dev->name);
-
skb_reset_network_header(skb2);
-
}
-
skb2->transport_header = skb2->network_header;
-
skb2->pkt_type = PACKET_OUTGOING;
-
pt_prev = ptype;
-
}
-
}
-
if (pt_prev)
-
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
-
rcu_read_unlock();
-
}
发送的流程调用的是和接收流程一样的函数。
阅读(10149) | 评论(0) | 转发(0) |