关于Linux下原始套接字发送分片包的问题?
如题,在Linux环境下,使用原始套接字发送数据包,
如果是普通的小于1500字节的TCP,UDP,ICMP数据包都没有问题,
但是如果是大于1500字节,程序就会分片IP包,可是问题来了
IP分片的代码都是同一个函数,但是TCP,UDP协议只能够发出第一个分片包,余下的分片包就发不出来了,
而ICMP则没有这个问题,都能够发出来,并且可以收到对方的响应
分片包都是使用sendto发送,返回值都是对的,没有错误发生。
这个问题在linux2.4.20和2.6.35.6下都是一样的,firewall都是关闭的
可是相同的代码在windows2003下运行就没有问题,
真是奇怪,大家有遇到过类似问题,知道是什么原因吗?
我查了一下linux2.6.35.6内核的代码linux+v2.6.35.6/net/ipv4/raw.c
并没有发现内核对分片包做什么特殊处理啊?大家知道什么原因吗?多谢指教,谢谢
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
450 size_t len)
451{
452 struct inet_sock *inet = inet_sk(sk);
453 struct ipcm_cookie ipc;
454 struct rtable *rt = NULL;
455 int free = 0;
456 __be32 daddr;
457 __be32 saddr;
458 u8 tos;
459 int err;
460
461 err = -EMSGSIZE;
462 if (len > 0xFFFF)
463 goto out;
464
465 /*
466 * Check the flags.
467 */
468
469 err = -EOPNOTSUPP;
470 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */
471 goto out; /* compatibility */
472
473 /*
474 * Get and verify the address.
475 */
476
477 if (msg->msg_namelen) {
478 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
479 err = -EINVAL;
480 if (msg->msg_namelen < sizeof(*usin))
481 goto out;
482 if (usin->sin_family != AF_INET) {
483 static int complained;
484 if (!complained++)
485 printk(KERN_INFO "%s forgot to set AF_INET in "
486 "raw sendmsg. Fix it!\n",
487 current->comm);
488 err = -EAFNOSUPPORT;
489 if (usin->sin_family)
490 goto out;
491 }
492 daddr = usin->sin_addr.s_addr;
493 /* ANK: I did not forget to get protocol from port field.
494 * I just do not know, who uses this weirdness.
495 * IP_HDRINCL is much more convenient.
496 */
497 } else {
498 err = -EDESTADDRREQ;
499 if (sk->sk_state != TCP_ESTABLISHED)
500 goto out;
501 daddr = inet->inet_daddr;
502 }
503
504 ipc.addr = inet->inet_saddr;
505 ipc.opt = NULL;
506 ipc.shtx.flags = 0;
507 ipc.oif = sk->sk_bound_dev_if;
508
509 if (msg->msg_controllen) {
510 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
511 if (err)
512 goto out;
513 if (ipc.opt)
514 free = 1;
515 }
516
517 saddr = ipc.addr;
518 ipc.addr = daddr;
519
520 if (!ipc.opt)
521 ipc.opt = inet->opt;
522
523 if (ipc.opt) {
524 err = -EINVAL;
525 /* Linux does not mangle headers on raw sockets,
526 * so that IP options + IP_HDRINCL is non-sense.
527 */
528 if (inet->hdrincl)
529 goto done;
530 if (ipc.opt->srr) {
531 if (!daddr)
532 goto done;
533 daddr = ipc.opt->faddr;
534 }
535 }
536 tos = RT_CONN_FLAGS(sk);
537 if (msg->msg_flags & MSG_DONTROUTE)
538 tos |= RTO_ONLINK;
539
540 if (ipv4_is_multicast(daddr)) {
541 if (!ipc.oif)
542 ipc.oif = inet->mc_index;
543 if (!saddr)
544 saddr = inet->mc_addr;
545 }
546
547 {
548 struct flowi fl = { .oif = ipc.oif,
549 .mark = sk->sk_mark,
550 .nl_u = { .ip4_u =
551 { .daddr = daddr,
552 .saddr = saddr,
553 .tos = tos } },
554 .proto = inet->hdrincl ? IPPROTO_RAW :
555 sk->sk_protocol,
556 };
557 if (!inet->hdrincl) {
558 err = raw_probe_proto_opt(&fl, msg);
559 if (err)
560 goto done;
561 }
562
563 security_sk_classify_flow(sk, &fl);
564 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
565 }
566 if (err)
567 goto done;
568
569 err = -EACCES;
570 if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
571 goto done;
572
573 if (msg->msg_flags & MSG_CONFIRM)
574 goto do_confirm;
575back_from_confirm:
576
577 if (inet->hdrincl)
578 err = raw_send_hdrinc(sk, msg->msg_iov, len,
579 rt, msg->msg_flags);
580
581 else {
582 if (!ipc.addr)
583 ipc.addr = rt->rt_dst;
584 lock_sock(sk);
585 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
586 &ipc, &rt, msg->msg_flags);
587 if (err)
588 ip_flush_pending_frames(sk);
589 else if (!(msg->msg_flags & MSG_MORE)) {
590 err = ip_push_pending_frames(sk);
591 if (err == -ENOBUFS && !inet->recverr)
592 err = 0;
593 }
594 release_sock(sk);
595 }
596done:
597 if (free)
598 kfree(ipc.opt);
599 ip_rt_put(rt);
600
601out:
602 if (err < 0)
603 return err;
604 return len;
605
606do_confirm:
607 dst_confirm(&rt->u.dst);
608 if (!(msg->msg_flags & MSG_PROBE) || len)
609 goto back_from_confirm;
610 err = 0;
611 goto done;
612}
static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
317 struct rtable *rt,
318 unsigned int flags)
319{
320 struct inet_sock *inet = inet_sk(sk);
321 struct net *net = sock_net(sk);
322 struct iphdr *iph;
323 struct sk_buff *skb;
324 unsigned int iphlen;
325 int err;
326
327 if (length > rt->u.dst.dev->mtu) {
328 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
329 rt->u.dst.dev->mtu);
330 return -EMSGSIZE;
331 }
332 if (flags&MSG_PROBE)
333 goto out;
334
335 skb = sock_alloc_send_skb(sk,
336 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
337 flags & MSG_DONTWAIT, &err);
338 if (skb == NULL)
339 goto error;
340 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
341
342 skb->priority = sk->sk_priority;
343 skb->mark = sk->sk_mark;
344 skb_dst_set(skb, dst_clone(&rt->u.dst));
345
346 skb_reset_network_header(skb);
347 iph = ip_hdr(skb);
348 skb_put(skb, length);
349
350 skb->ip_summed = CHECKSUM_NONE;
351
352 skb->transport_header = skb->network_header;
353 err = -EFAULT;
354 if (memcpy_fromiovecend((void *)iph, from, 0, length))
355 goto error_free;
356
357 iphlen = iph->ihl * 4;
358
359 /*
360 * We don't want to modify the ip header, but we do need to
361 * be sure that it won't cause problems later along the network
362 * stack. Specifically we want to make sure that iph->ihl is a
363 * sane value. If ihl points beyond the length of the buffer passed
364 * in, reject the frame as invalid
365 */
366 err = -EINVAL;
367 if (iphlen > length)
368 goto error_free;
369
370 if (iphlen >= sizeof(*iph)) {
371 if (!iph->saddr)
372 iph->saddr = rt->rt_src;
373 iph->check = 0;
374 iph->tot_len = htons(length);
375 if (!iph->id)
376 ip_select_ident(iph, &rt->u.dst, NULL);
377
378 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
379 }
380 if (iph->protocol == IPPROTO_ICMP)
381 icmp_out_count(net, ((struct icmphdr *)
382 skb_transport_header(skb))->type);
383
384 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
385 rt->u.dst.dev, dst_output);
386 if (err > 0)
387 err = net_xmit_errno(err);
388 if (err)
389 goto error;
390out:
391 return 0;
392
393error_free:
394 kfree_skb(skb);
395error:
396 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
397 if (err == -ENOBUFS && !inet->recverr)
398 err = 0;
399 return err;
400}
阅读(1997) | 评论(0) | 转发(0) |