Chinaunix首页 | 论坛 | 博客
  • 博客访问: 43090
  • 博文数量: 5
  • 博客积分: 388
  • 博客等级: 一等列兵
  • 技术积分: 60
  • 用 户 组: 普通用户
  • 注册时间: 2006-01-09 10:08
文章分类
文章存档

2011年(1)

2010年(2)

2007年(2)

我的朋友

分类:

2010-02-05 23:15:34

这个是第二版本PATCH.

和第一个PATCH的区别是不需要添加TCP_ATOMIC SOCKET OPTION选项。这样,用户程序不需要改动。
TCP_ATOMIC选项是控制SOKET本身的原子性的。也就是可以有选择地让你建立的一个链接是原子写,而其他的链接不受影响。这样做的缺点是需要添加一个SOCKET OPTION, 程序中不透明,要自己控制setsockopt.

这一个补丁是用一个内核变量tcp_atomic控制全局的原子性。该变量在/proc/sys/kernel下被导出,可以读写。默认是0---非原子写。

当你用
echo 512 > /proc/sys/kernel/tcp_atomic
后,设置内核的所有TCP SOCKET的写都是原子写。原子写的最大块的尺寸是512字节。

该补丁的适用内核版本仍然是2.6.27.7-9。 和上一个一样。

---

--- old/net/ipv4/tcp.c  2008-12-05 09:48:57.000000000 +0800
+++ new/net/ipv4/tcp.c  2010-02-05 21:50:21.000000000 +0800
@@ -811,6 +811,7 @@
        return tmp;
 }

+int tcp_atomic;
 int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
                size_t size)
 {
@@ -822,6 +823,7 @@
        int mss_now, size_goal;
        int err, copied;
        long timeo;
+       int atomic;     /* is atomic write? johnye. Feb 2, 2010 */

        lock_sock(sk);
        TCP_CHECK_TIMER(sk);
@@ -849,6 +851,11 @@
        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
                goto do_error;

+
+        /* for multi-seg data or too big chunk, no atomic. johnye. */
+       atomic = tcp_atomic;
+        if(iovlen > 1 || iov->iov_len > atomic) atomic = 0;
+
        while (--iovlen >= 0) {
                int seglen = iov->iov_len;
                unsigned char __user *from = iov->iov_base;
@@ -889,14 +896,28 @@
                        if (copy > seglen)
                                copy = seglen;

+                       /* if atomic write. johnye */
+                       if (atomic)
+                               copy = seglen;
+
                        /* Where to copy to? */
                        if (skb_tailroom(skb) > 0) {
                                /* We have some space in skb head. Superb! */
-                               if (copy > skb_tailroom(skb))
+                               /* consider atomic write, johnye */
+                               if (copy > skb_tailroom(skb)) {
+                                       if(atomic)
+                                           goto skb_page_start;        /* q mark yet, johnye */
+
                                        copy = skb_tailroom(skb);
+                               }
                                if ((err = skb_add_data(skb, from, copy)) != 0)
                                        goto do_fault;
-                       } else {
+
+                               goto skb_page_done;
+                       //} else {
+                       }
+                       skb_page_start:
+                       {
                                int merge = 0;
                                int i = skb_shinfo(skb)->nr_frags;
                                struct page *page = TCP_PAGE(sk);
@@ -925,8 +946,17 @@
                                } else
                                        off = 0;

-                               if (copy > PAGE_SIZE - off)
-                                       copy = PAGE_SIZE - off;
+                               /* consider atomic write, johnye */
+                               if (copy > PAGE_SIZE - off) {
+                                       if (atomic && page) {
+                                                put_page(page);
+                                                TCP_PAGE(sk) = page = NULL;
+                                                off = 0;
+                                               merge = 0;
+                                       } else {
+                                               copy = PAGE_SIZE - off;
+                                       }
+                               }

                                if (!sk_wmem_schedule(sk, copy))
                                        goto wait_for_memory;
@@ -968,6 +998,7 @@

                                TCP_OFF(sk) = off + copy;
                        }
+                       skb_page_done:

                        if (!copied)
                                TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -2019,6 +2050,7 @@
        lock_sock(sk);

        switch (optname) {
+
        case TCP_MAXSEG:
                /* Values greater than interface MTU won't take effect. However
                 * at the point when this call is done we typically don't yet
@@ -2276,6 +2308,7 @@
                return -EINVAL;

        switch (optname) {
+
        case TCP_MAXSEG:
                val = tp->mss_cache;
                if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
--- old/kernel/sysctl.c 2008-12-05 09:49:00.000000000 +0800
+++ new/kernel/sysctl.c 2010-02-05 21:49:04.000000000 +0800
@@ -190,6 +190,9 @@
 extern int prove_locking;
 extern int lock_stat;

+extern int tcp_atomic;
+
 /* The default sysctl tables: */

 static struct ctl_table root_table[] = {
@@ -894,6 +897,26 @@
                .proc_handler   = &proc_dointvec,
        },
 #endif
+
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "tcp_atomic",
+               .data           = &tcp_atomic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+
        { .ctl_name = 0 }
 };

阅读(1941) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~