Linux ,c/c++, web,前端,php,js
分类:
2012-07-30 10:18:38
原文地址:ping源码分析 作者:rosettasky
第一回分析源码,欢迎拍砖。
程序流程一览图
感觉有点简单吧,ping的核心就是上面的图了,而其它都是对选项的处理,不仅仅是ping源码,其它开源代码一般都是一堆选项,如果一开始就被那些对分析程序核心没有帮助的选项所困扰,那么分析一个程序需要的时间和效率就可想而知。当然ping的很多选项都是值得分析的。
这个ping源码控制发包的时间间隔是通过main_loop()比较复杂的时间计算控制,因为它要控制诸如-l这样的选项,如果纯粹是控制时间,那么注册个SIGALRM信号处理函数,再加个1秒的定时器alarm(1)或者就直接sleep(1)就可以很轻松的实现。-w选项就是通过alarm完成的,SIGALRM信号处理函数是在main()->setup()中设置的,具体请看setup函数分析。
1,准备工作,获得ping源码并学会编译
会编译了看代码其实就是后续的活,网上找的方法,挺好用……
[root@xxx study_2]# type ping
ping is /bin/ping
[root@xxx study_2]# rpm -qf /bin/ping
iputils-20020927-46.el5
从而得知ping属于iputils包,去下iputils源码包即可,其它linux 命令源码获取方法类似。
[root@xxx iputils]# make
Please, set correct KERNEL_INCLUDE
make: *** [check-kernel] 错误 1
反正是一堆错误,改了这个来了那个,看了INSTALL说是被定制过的linux头文件可能会不支持,反正只要能编出ping就行,改了下Makefile,把有关KERNEL_INCLUDE的都屏蔽了,编译成功。
2,源码分析
只分析实现最简单的不带选项的ping程序。也花了一个星期的业余时间。
112 main(int argc, char **argv)
113 {
114 struct hostent *hp;
115 int ch, hold, packlen;
116 int socket_errno;
117 u_char *packet;
118 char *target, hnamebuf[MAXHOSTNAMELEN];
119 char rspace[3 + 4 * NROUTES + 1]; /* record route space */
120 #ifdef DO_IPSEC
121 char *policy_string = NULL;
122 #endif
123
124 icmp_sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); //创建icmp套接字
125 socket_errno = errno;
126
127 uid = getuid();//设置进程有效用户ID为实际用户uid,访问权限方面的知识。
128 setuid(uid);
129
130 source.sin_family = AF_INET;
131
132 preload = 1;//这个参数默认为1,可由-l选项控制,一次发多个包而不管对方是否回应,其本质还是通过计算时间来实现,具体可看main_loop().没兴趣不用管。
133 while ((ch = getopt(argc, argv, COMMON_OPTSTR "bRT:P:")) != EOF) {
134 switch(ch) {//getopt得好好学会。
213 default:
214 usage();
215 }
216 }
217 argc -= optind;//可参考GETOPT(3) ,是个全局变量
218 argv += optind;
219
//这里对选项的处理,不管,option为0。
220 printf("options :%d\n", options );
221 if (argc == 0)
222 usage();
223 if (argc > 1) {
224 if (options & F_RROUTE)
225 usage();
226 else if (options & F_TIMESTAMP) {
227 if (ts_type != IPOPT_TS_PRESPEC)
228 usage();
229 if (argc > 5)
230 usage();
231 } else {
232 if (argc > 10)
233 usage();
234 options |= F_SOURCEROUTE;
235 }
236 }
//处理传给ping的参数
237 while (argc > 0) {
//我只传一个目标IP,所以argc=1,这个循环只做一回,把IP地址给了target
238 target = *argv;
239 printf("target:%s\n", target);
240
241 bzero((char *)&whereto, sizeof(whereto));
242 whereto.sin_family = AF_INET;
243 if (inet_aton(target, &whereto.sin_addr) == 1) {//如果是IP地址
244 hostname = target;
245 if (argc == 1)
246 options |= F_NUMERIC;
247 } else {//如果是其它域名
248 hp = gethostbyname(target);
249 if (!hp) {//如果解析域名不成功,返回
250 fprintf(stderr, "ping: unknown host %s\n", target);
251 exit(2);
252 }
253 memcpy(&whereto.sin_addr, hp->h_addr, 4);
254 strncpy(hnamebuf, hp->h_name, sizeof(hnamebuf) - 1);
255 hnamebuf[sizeof(hnamebuf) - 1] = 0;
256 hostname = hnamebuf;
257 }
258 if (argc > 1)
259 route[nroute++] = whereto.sin_addr.s_addr;
260 argc--;
261 argv++;
262 }
//这部分只是对目标IP的一个connect尝试,或者判断是否是广播地址,connect失败退出,
//如果成功再获取本机struct sockaddr结构到source,
264 if (source.sin_addr.s_addr == 0) {
265 int alen;
266 struct sockaddr_in dst = whereto;
267 int probe_fd = socket(AF_INET, SOCK_DGRAM, 0);
268
269 if (probe_fd < 0) {
270 perror("socket");
271 exit(2);
……
……
……
322 alen = sizeof(source);
323 if (getsockname(probe_fd, (struct sockaddr*)&source, &alen) == -1) {
324 perror("getsockname");
325 exit(2);
326 }
327 source.sin_port = 0;
328 close(probe_fd);
329 } while (0);
330
//如果目标IP为0.0.0.0,则赋值目标IP为127.0.0.1,即ping本地回环地址。
331 if (whereto.sin_addr.s_addr == 0)
332 whereto.sin_addr.s_addr = source.sin_addr.s_addr;
333
//如果icmp_sock套接字创建失败,返回。
334 if (icmp_sock < 0) {
335 errno = socket_errno;
336 perror("ping: icmp open socket");
337 exit(2);
338 }
339
//device是加-I选项选择出口接口,这回没加-I选项。
338 if (device) {
……
……
349 }
……
//这部分都是对一些选项处理, 省略。
……
//,
446
447 /* Estimate memory eaten by single packet. It is rough estimate.
448 * Actually, for small datalen's it depends on kernel side a lot. */
449 hold = datalen + 8;
450 hold += ((hold+511)/512)*(optlen + 20 + 16 + 64 + 160);
451 sock_setbufs(icmp_sock, hold);//设置此套接字接收和发送缓冲区大小。
515 printf("PING %s (%s) ", hostname, inet_ntoa(whereto.sin_addr));
516 if (device || (options&F_STRICTSOURCE))
517 printf("from %s %s: ", inet_ntoa(source.sin_addr), device ?: "");
518 printf("%d(%d) bytes of data.\n", datalen, datalen+8+optlen+20);
519
520 setup(icmp_sock);
521
522 main_loop(icmp_sock, packet, packlen);
//到这里main就结束了。现在来分析setup和main_loop。
//setup()函数
……
478 set_signal(SIGINT, sigexit);
479 set_signal(SIGALRM, sigexit);
480 set_signal(SIGQUIT, sigstatus);
481
482 gettimeofday(&start_time, NULL);
483
484 if (deadline) {
485 struct itimerval it;
486
487 it.it_interval.tv_sec = 0;
488 it.it_interval.tv_usec = 0;
489 it.it_value.tv_sec = deadline;
490 it.it_value.tv_usec = 0;
491 setitimer(ITIMER_REAL, &it, NULL);
492 }
……
set_signal(SIGALRM, sigexit);
static void sigexit(int signo)
{
exiting = 1;
}
主要是注册了几个信号量。为-w选项设置定时器,如果超时,置exiting为1,而在main_loop()的主循环中一开始就对这个变量做了判断,如果exiting为真跳出循环,程序退出。
for (;;) {
/* Check exit conditions. */
if (exiting)
break;
……
……
}
//main_loop()函数,关键东西都在这里。
504 void main_loop(int icmp_sock, __u8 *packet, int packlen)
505 {
506 char addrbuf[128];
507 char ans_data[4096];
508 struct iovec iov;
509 struct msghdr msg;
510 struct cmsghdr *c;
511 int cc;
512 int next;
513 int polling;
514
515 iov.iov_base = (char *)packet;
516
517 for (;;) {
518 /* Check exit conditions. */
519 if (exiting)
520 break;
521 if (npackets && nreceived + nerrors >= npackets)
522 break;
523 if (deadline && nerrors)
524 break;
525 /* Check for and do special actions. */
526 if (status_snapshot)
527 status();
528
529 /* Send probes scheduled to this time. */
530 do {
531 next = pinger();//发送ICMP包
532 next = schedule_exit(next);
533 } while (next <= 0);
534
535 /* "next" is time to send next probe, if positive.
536 * If next<=0 send now or as soon as possible. */
537
538 /* Technical part. Looks wicked. Could be dropped,
539 * if everyone used the newest kernel. :-)
540 * Its purpose is:
541 * 1. Provide intervals less than resolution of scheduler.
542 * Solution: spinning.
543 * 2. Avoid use of poll(), when recvmsg() can provide
544 * timed waiting (SO_RCVTIMEO). */
545 polling = 0;
546 if ((options & (F_ADAPTIVE|F_FLOOD_POLL)) || next
547 int recv_expected = in_flight();
548
549 /* If we are here, recvmsg() is unable to wait for
550 * required timeout. */
551 if (1000*next <= 1000000/(int)HZ) {
552 /* Very short timeout... So, if we wait for
553 * something, we sleep for MININTERVAL.
554 * Otherwise, spin! */
555 if (recv_expected) {
556 next = MININTERVAL;
557 } else {
558 next = 0;
559 /* When spinning, no reasons to poll.
560 * Use nonblocking recvmsg() instead. */
561 polling = MSG_DONTWAIT;
562 /* But yield yet. */
563 sched_yield();
564 }
565 }
566
567 if (!polling &&
568 ((options & (F_ADAPTIVE|F_FLOOD_POLL)) || interval)) {
569 struct pollfd pset;
570 pset.fd = icmp_sock;
571 pset.events = POLLIN|POLLERR;
572 pset.revents = 0;
573 if (poll(&pset, 1, next) < 1 ||
574 !(pset.revents&(POLLIN|POLLERR)))
575 continue;
576 polling = MSG_DONTWAIT;
577 }
578 }
579
580 for (;;) {//接收ICMP_ECHOREPLY回应包
581 struct timeval *recv_timep = NULL;
582 struct timeval recv_time;
583 int not_ours = 0; /* Raw socket can receive messages
584 * destined to other running pings. */
585
586 iov.iov_len = packlen;
587 msg.msg_name = addrbuf;
588 msg.msg_namelen = sizeof(addrbuf);
589 msg.msg_iov = &iov;
590 msg.msg_iovlen = 1;
591 msg.msg_control = ans_data;
592 msg.msg_controllen = sizeof(ans_data);
593
594 cc = recvmsg(icmp_sock, &msg, polling);//接收
595 polling = MSG_DONTWAIT;
596
597 if (cc < 0) {
598 if (errno == EAGAIN || errno == EINTR)
599 break;
600 if (!receive_error_msg()) {
601 if (errno) {
602 perror("ping: recvmsg");
603 break;
604 }
605 not_ours = 1;
606 }
607 } else {
608
609 #ifdef SO_TIMESTAMP
610 for (c = CMSG_FIRSTHDR(&msg); c; c = CMSG_NXTHDR(&msg, c)) {
611 if (c->cmsg_level != SOL_SOCKET ||
612 c->cmsg_type != SO_TIMESTAMP)
613 continue;
614 if (c->cmsg_len < CMSG_LEN(sizeof(struct timeval)))
615 continue;
616 recv_timep = (struct timeval*)CMSG_DATA(c);
617 }
618 #endif
619
620 if ((options&F_LATENCY) || recv_timep == NULL) {
621 if ((options&F_LATENCY) ||
622 ioctl(icmp_sock, SIOCGSTAMP, &recv_time))
623 gettimeofday(&recv_time, NULL);
624 recv_timep = &recv_time;
625 }
626
627 not_ours = parse_reply(&msg, cc, addrbuf, recv_timep);//处理回应包
628 }
629
630 /* See? ... someone runs another ping on this host. */
631 if (not_ours)
632 install_filter();
633
634 /* If nothing is in flight, "break" returns us to pinger. */
635 if (in_flight() == 0)
636 break;
637
638 /* Otherwise, try to recvmsg() again. recvmsg()
639 * is nonblocking after the first iteration, so that
640 * if nothing is queued, it will receive EAGAIN
641 * and return to pinger. */
642 }
643 }
644 finish();//收尾
645 }
//pinger()
int pinger(void)
{
static int oom_count;
static int tokens;
int i;
/* Have we already sent enough? If we have, return an arbitrary positive value. */
if (exiting || (npackets && ntransmitted >= npackets && !deadline))
return 1000;
/* Check that packets < rate*time + preload */
if (cur_time.tv_sec == 0) {//这部分对时间的控制
gettimeofday(&cur_time, NULL);
tokens = interval*(preload-1);//preload就是通过-l选项送进来的,默认为1。
} else {
long ntokens;
struct timeval tv;
gettimeofday(&tv, NULL);
ntokens = (tv.tv_sec - cur_time.tv_sec)*1000 +
(tv.tv_usec-cur_time.tv_usec)/1000;
if (!interval) {
/* Case of unlimited flood is special;
* if we see no reply, they are limited to 100pps */
if (ntokens < MININTERVAL && in_flight() >= preload)
return MININTERVAL-ntokens;
}
ntokens += tokens;
if (ntokens > interval*preload)
ntokens = interval*preload;
if (ntokens < interval)
return interval - ntokens;
cur_time = tv;
tokens = ntokens - interval;
}
resend:
i = send_probe();//发送数据包的核心。
if (i == 0) {//发送数据包成功。
oom_count = 0;
advance_ntransmitted();
if (!(options & F_QUIET) && (options & F_FLOOD)) {
/* Very silly, but without this output with
* high preload or pipe size is very confusing. */
if ((preload < screen_width && pipesize < screen_width) ||
in_flight() < screen_width)
write(STDOUT_FILENO, ".", 1);
}
return interval - tokens;
}
//发送失败,错误处理。
/* And handle various errors... */
if (i > 0) {
/* Apparently, it is some fatal bug. */
abort();
} else if (errno == ENOBUFS || errno == ENOMEM) {
……
……
}
return SCHINT(interval);
}
//send_probe()
int send_probe()
{
struct icmphdr *icp;//icmp报文头部结构。
int cc;
int i;
icp = (struct icmphdr *)outpack;
icp->type = ICMP_ECHO; //request请求类型。
icp->code = 0;
icp->checksum = 0;
icp->un.echo.sequence = htons(ntransmitted+1);//数据包序列。可以把htons去掉抓包对比。
icp->un.echo.id = ident; /* ID */ //进程ID。
CLR((ntransmitted+1) % mx_dup_ck);
if (timing) {
if (options&F_LATENCY) {
static volatile int fake_fucked_egcs = sizeof(struct timeval);
struct timeval tmp_tv;
gettimeofday(&tmp_tv, NULL);//获得当前时间发送给对方,为计算来回时间差RTT
/* egcs is crap or glibc is crap, but memcpy
does not copy anything, if len is constant! */
memcpy(icp+1, &tmp_tv, fake_fucked_egcs);
} else {
memset(icp+1, 0, sizeof(struct timeval));
}
}
cc = datalen + 8; /* skips ICMP portion */
/* compute ICMP checksum here */
icp->checksum = in_cksum((u_short *)icp, cc, 0);
//数据校验,不分析了,《计算机组成原理》貌似讲过原理,可以参考TCPv1,TCPv2或者其它书籍。
if (timing && !(options&F_LATENCY)) {
static volatile int fake_fucked_egcs = sizeof(struct timeval);
struct timeval tmp_tv;
gettimeofday(&tmp_tv, NULL);
/* egcs is crap or glibc is crap, but memcpy
does not copy anything, if len is constant! */
memcpy(icp+1, &tmp_tv, fake_fucked_egcs);
icp->checksum = in_cksum((u_short *)(icp+1), fake_fucked_egcs, ~icp->checksum);
}
do {
static struct iovec iov = {outpack, 0};
static struct msghdr m = { &whereto, sizeof(whereto),
&iov, 1, &cmsg, 0, 0 };
m.msg_controllen = cmsg_len;
iov.iov_len = cc;//可选数据为56字节再加8字节ICMP报头=64字节。所以整个icmp包就是64字节,最终产生64+20字节IPv4首部=84字节的IP数据包。
i = sendmsg(icmp_sock, &m, confirm);//终于发出去了。
confirm = 0;
} while (0);
return (cc == i ? 0 : i);
}
//parse_reply(),最后一部分对回应包的处理,这也是核心,需要对icmp包和ip数据包很好了解。
parse_reply(struct msghdr *msg, int cc, void *addr, struct timeval *tv)
{
struct sockaddr_in *from = addr;
__u8 *buf = msg->msg_iov->iov_base;
struct icmphdr *icp;
struct iphdr *ip;
int hlen;
int csfailed;
/* Check the IP header */
ip = (struct iphdr *)buf;
hlen = ip->ihl*4;//这个为什么*4可看TCPv1,它其实就是IP首部以字节为单位的长度。
if (cc < hlen + 8 || ip->ihl < 5) {
if (options & F_VERBOSE)
fprintf(stderr, "ping: packet too short (%d bytes) from %s\n", cc,
pr_addr(from->sin_addr.s_addr));
return 1;
}
/* Now the ICMP part */
cc -= hlen;
icp = (struct icmphdr *)(buf + hlen);//指针往后移ip首部长度就是ICMP头部了。
csfailed = in_cksum((u_short *)icp, cc, 0);//数据校验。
if (icp->type == ICMP_ECHOREPLY) {//如果是回应报
if (icp->un.echo.id != ident)//判断是否是自己发的请求包。如果不是返回。
return 1; /* 'Twas not our ECHO */
if (gather_statistics((__u8*)(icp+1), cc,
ntohs(icp->un.echo.sequence),
ip->ttl, 0, tv, pr_addr(from->sin_addr.s_addr)))
//计算来回时间差,并打印。
return 0;//返回到main_loop,继续循环做时间计算-->发包-->收包。
} else {//如果不是,做其它处理。。。。
return 0;
}
4,总结。
本次ping源码分析知识点:
源码获得方法、
Linu网络编程、
Linux系统编程信号使用、
Linux时间相关系统调用、
getopt对选项处理过程(可以看下功能更强大的getopt_long)
ICMP报文组成理解、
本地和网络字节序理解、
抓包分析能力
附常用抓包命令:
tcpdump抓包常用命令:
tcpdump -ni any icmp
tcpdump -ni any host 192.168.147.1
tcpdump -ni any port \(500 or 4500\)
tcpdump -ni any arp -s0 -w arp.cap
当然windows一下的wireshark也是非常不错的抓包分析工具。