这个是第二版本PATCH.
和第一个PATCH的区别是不需要添加TCP_ATOMIC SOCKET OPTION选项。这样,用户程序不需要改动。
TCP_ATOMIC选项是控制SOKET本身的原子性的。也就是可以有选择地让你建立的一个链接是原子写,而其他的链接不受影响。这样做的缺点是需要添加一个SOCKET OPTION, 程序中不透明,要自己控制setsockopt.
这一个补丁是用一个内核变量tcp_atomic控制全局的原子性。该变量在/proc/sys/kernel下被导出,可以读写。默认是0---非原子写。
当你用
echo 512 > /proc/sys/kernel/tcp_atomic
后,设置内核的所有TCP SOCKET的写都是原子写。原子写的最大块的尺寸是512字节。
该补丁的适用内核版本仍然是2.6.27.7-9。 和上一个一样。
---
--- old/net/ipv4/tcp.c 2008-12-05 09:48:57.000000000 +0800
+++ new/net/ipv4/tcp.c 2010-02-05 21:50:21.000000000 +0800
@@ -811,6 +811,7 @@
return tmp;
}
+int tcp_atomic;
int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
{
@@ -822,6 +823,7 @@
int mss_now, size_goal;
int err, copied;
long timeo;
+ int atomic; /* is atomic write? johnye. Feb 2, 2010 */
lock_sock(sk);
TCP_CHECK_TIMER(sk);
@@ -849,6 +851,11 @@
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
+
+ /* for multi-seg data or too big chunk, no atomic. johnye. */
+ atomic = tcp_atomic;
+ if(iovlen > 1 || iov->iov_len > atomic) atomic = 0;
+
while (--iovlen >= 0) {
int seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
@@ -889,14 +896,28 @@
if (copy > seglen)
copy = seglen;
+ /* if atomic write. johnye */
+ if (atomic)
+ copy = seglen;
+
/* Where to copy to? */
if (skb_tailroom(skb) > 0) {
/* We have some space in skb head. Superb! */
- if (copy > skb_tailroom(skb))
+ /* consider atomic write, johnye */
+ if (copy > skb_tailroom(skb)) {
+ if(atomic)
+ goto skb_page_start; /* q mark yet, johnye */
+
copy = skb_tailroom(skb);
+ }
if ((err = skb_add_data(skb, from, copy)) != 0)
goto do_fault;
- } else {
+
+ goto skb_page_done;
+ //} else {
+ }
+ skb_page_start:
+ {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
struct page *page = TCP_PAGE(sk);
@@ -925,8 +946,17 @@
} else
off = 0;
- if (copy > PAGE_SIZE - off)
- copy = PAGE_SIZE - off;
+ /* consider atomic write, johnye */
+ if (copy > PAGE_SIZE - off) {
+ if (atomic && page) {
+ put_page(page);
+ TCP_PAGE(sk) = page = NULL;
+ off = 0;
+ merge = 0;
+ } else {
+ copy = PAGE_SIZE - off;
+ }
+ }
if (!sk_wmem_schedule(sk, copy))
goto wait_for_memory;
@@ -968,6 +998,7 @@
TCP_OFF(sk) = off + copy;
}
+ skb_page_done:
if (!copied)
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -2019,6 +2050,7 @@
lock_sock(sk);
switch (optname) {
+
case TCP_MAXSEG:
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
@@ -2276,6 +2308,7 @@
return -EINVAL;
switch (optname) {
+
case TCP_MAXSEG:
val = tp->mss_cache;
if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
--- old/kernel/sysctl.c 2008-12-05 09:49:00.000000000 +0800
+++ new/kernel/sysctl.c 2010-02-05 21:49:04.000000000 +0800
@@ -190,6 +190,9 @@
extern int prove_locking;
extern int lock_stat;
+extern int tcp_atomic;
+
/* The default sysctl tables: */
static struct ctl_table root_table[] = {
@@ -894,6 +897,26 @@
.proc_handler = &proc_dointvec,
},
#endif
+
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_atomic",
+ .data = &tcp_atomic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+
{ .ctl_name = 0 }
};
阅读(1839) | 评论(0) | 转发(0) |