SOCK_PACKET的实现是用dev_add_pack()来的,它用的处理包的函数是packet_rcv(), tcpdump 设置网卡为混杂模式,socket(,SOCK_PACKET,)能够收到所有的包。
而netfilter HOOK 第一个挂载点在NF_INET_PRE_ROUTING, 在(linux-2.6.33.2/net/ipv4/ip_input.c) ip_rcv函数最后(真正的处理函数在ip_rcv_finish())。我们看看这个ip_rcv()函数:
/*
* Main IP Receive routine.
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct iphdr *iph;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;//所有非本机数据都丢弃。
IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto out;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
iph = ip_hdr(skb);
/*
* RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
*
* Is the datagram acceptable?
*
* 1. Length at least the size of an ip header
* 2. Version of 4
* 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
* 4. Doesn't have a bogus length
*/
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto inhdr_error;
len = ntohs(iph->tot_len);
if (skb->len < len) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
/* Our transport medium may have padded the buffer out. Now we know it
* is IP we can trim to the true length of the frame.
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
/* Remove any debris in the socket control block */
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
inhdr_error:
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
网卡驱动接收到数据包,其中一个函数eth_type_trans是关键,该函数代码如下:
unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct ethhdr *eth;
unsigned char *rawp;
skb->mac.raw=skb->data;
skb_pull(skb,dev->hard_header_len);
eth= skb->mac.ethernet;
if(*eth->h_dest&1)
{
if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
skb->pkt_type=PACKET_BROADCAST;
else
skb->pkt_type=PACKET_MULTICAST;
}
/*
* This ALLMULTI check should be redundant by 1.4
* so don't forget to remove it.
*
* Seems, you forgot to remove it. All silly devices
* seems to set IFF_PROMISC.
*/
else if(1 /*dev->flags&IFF_PROMISC*/)
{
if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
skb->pkt_type=PACKET_OTHERHOST;
}
if (ntohs(eth->h_proto) >= 1536)
return eth->h_proto;
rawp = skb->data;
/*
* This is a magic hack to spot IPX packets. Older Novell breaks
* the protocol design and runs IPX over 802.3 without an 802.2 LLC
* layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
* won't work for fault tolerant netware but does for the rest.
*/
if (*(unsigned short *)rawp == 0xFFFF)
return htons(ETH_P_802_3);
/*
* Real 802.2 LLC
*/
return htons(ETH_P_802_2);
}
所以要想接收所有经过把网卡的数据,须得使用SOCK_PACKET,如今使用socket(AF_PACKET, SOCK_RAW/SOCK_DGRAM, int protocol), 这个函数创建的socket用来在网卡驱动层(OSI L2)上接收和发送数据。
而AF_PACKET是通过dev_add_packet(内核协议注册函数)来实现的:
在net/packet/af_packet.c里面的packet_create函数,这个就是通过packet_proto_init加入的回调
函数,假设定义了CONFIG_SOCK_PACKET,代码整理如下,这个函数是在用户创建链路层socket的时候被调用的:
static int packet_create(struct socket *sock, int protocol)
{
struct sock *sk;
int err;
if (!capable(CAP_NET_RAW))
return -EPERM;
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
&& sock->type != SOCK_PACKET
)
return -ESOCKTNOSUPPORT;
//只有socket(AF_PACKET, [SOCK_DGRAM, SOCK_RAW],
//或者socket(AF_INET, SOCK_PACKET ,才能调用成功
sock->state = SS_UNCONNECTED;
MOD_INC_USE_COUNT;
err = -ENOBUFS;
sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
if (sk == NULL)
goto out;
sk->reuse = 1;
sock->ops = &packet_ops;
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
//如果是old_style的SOCK_PACKET,就使用packet_ops_spkt
//如果是AF_PACKET,就使用packet_ops作为对应的socket的
//回调函数
sock_init_data(sock,sk);
sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt),
GFP_KERNEL);
//protinfo是一个union
if (sk->protinfo.af_packet == NULL)
goto out_free;
memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
sk->zapped=0;
//这个zapped属性表示一个TCP的socket收到了RST
sk->family = PF_PACKET;
sk->num = protocol;
sk->protinfo.af_packet->prot_hook.func = packet_rcv;
if (sock->type == SOCK_PACKET)
sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
sk->protinfo.af_packet->prot_hook.data = (void *)sk;
if (protocol) {
sk->protinfo.af_packet->prot_hook.type = protocol;
dev_add_pack(&sk->protinfo.af_packet->prot_hook);
//注意到了没有,如果protocol非零的话也可以dev_add_pack
//的,不过当然不能达到phrack55-12的目的,因为这时候你的
//数据已经在用户地址空间了,内核的数据也是改不了的
sk->protinfo.af_packet->running = 1;
}
sklist_insert_socket(&packet_sklist, sk);
//这个函数显然应该实现非常简单,在net/core/sock.c里面.
//packet_sklist是用来给每个socket通知interface状态变化
//的消息的,包括UP/DOWN/MULTICAST_LIST_CHANGE
//这个回调函数的实现是我们说过的register_netdev_notifier
return(0);
out_free:
sk_free(sk);
out:
MOD_DEC_USE_COUNT;
return err;
}
只有在创建了packet
socket以后应用程序才能接收链路层的数据包.而只有你设置了一个非零的protocol以后才能dev_add_pack,你的socket才能接
收数据的.现在看来,dev_add_pack确实是实现底层数据改写的一个重要的函数.所以下面我们将注意dev_add_pack设置的回调函数
func的使用.
7 们已经知道了,如果使用socket(AF_SOCKET, ..)产生一个PACKET SOCKET的话,dev_add_pack加入的函数是packet_rcv,下面是这个在net/packet/af_packet.c里面的函数:
static int packet_rcv(struct sk_buff *skb, struct device *dev,
struct packet_type *pt)
{
struct sock *sk;
struct sockaddr_ll *sll = (struct sockaddr_ll*)skb->cb;
sk = (struct sock *) pt->data;
//我们在packet_create中令data = sk了,remember?
if (skb->pkt_type == PACKET_LOOPBACK) {
kfree_skb(skb);
return 0;
}
skb->dev = dev;
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
sll->sll_ifindex = dev->ifindex;
sll->sll_halen = 0;
if (dev->hard_header_parse)
sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
if (dev->hard_header)
if (sk->type != SOCK_DGRAM)
skb_push(skb, skb->data - skb->mac.raw);
else if (skb->pkt_type == PACKET_OUTGOING)
skb_pull(skb, skb->nh.raw - skb->data);
if (sock_queue_rcv_skb(sk,skb)<0)
{
kfree_skb(skb);
return 0;
}
return(0);
}
在这个函数里面有个地方我很疑惑,那就是pkt_type属性,不知道这个属性是什么地方确定的,从网卡驱动
->netif_rx->net_bh中间都没有,而且甚至在net_bh函数中间就有if(skb->pkt_type==...)
这样的语句,说明在驱动中间就设置了,但是没有找到,我faint了.这个属性以后还会大量使用,还是很重要的.
这里还有几个函数要说明:
skb_pull在include/linux/skbuff.h中间:
extern __inline__ char *__skb_pull(struct sk_buff *skb,
unsigned int len)
{
skb->len-=len;
return skb->data+=len;
}
extern __inline__ unsigned char * skb_pull(struct sk_buff *skb,
unsigned int len)
{
if (len > skb->len)
return NULL;
return __skb_pull(skb,len);
}
不过是把头部的数据空出来,相应调整数据头部data的地址和长度.
同样skb_push在include/linux/skbuff.h中间:
extern __inline__ unsigned char *__skb_push(struct sk_buff *skb,
unsigned int len)
{
skb->data-=len;
skb->len+=len;
return skb->data;
}
extern __inline__ unsigned char *skb_push(struct sk_buff *skb,
unsigned int len)
{
skb->data-=len;
skb->len+=len;
if(skb->datahead)
{
__label__ here;
skb_under_panic(skb, len, &&here);
here: ;
}
return skb->data;
}
这个调整使数据长度加长,和skb_pull相反,不过skb_push显然更加安全一点.
在上面的程序中间,如果设备有一个明确的link_level_header,就考虑要不要调整数据长度和地址,如果sk->type不是
SOCK_DGRAM的话,说明程序对整个数据包包括ll地址都感兴趣.这样需要加长数据段使得数据包含ll头部.不然如果数据是向外走的,则需要把数据
裁减到只包含从网络层数据包头开始的地方.所以
是从nh.raw剪掉data,这就是差值.(nh=network header)
经过了这些处理以后,现在的skb已经是可以提交的了,这样就调用sock_queue_rcv_skb函数将这个skb加入到相应socket的接收缓冲区中去. 这个
函数以后再说把.
阅读(5206) | 评论(0) | 转发(0) |