1.插入
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.hash = inet_hash,
}
Feb 16 10:26:36 chro kernel: [ 14.108487] __inet_hash+0x7d/0x2f0
Feb 16 10:26:36 chro kernel: [ 14.108488] inet_hash+0x28/0x50
Feb 16 10:26:36 chro kernel: [ 14.108490] inet_csk_listen_start+0xc3/0xf0
Feb 16 10:26:36 chro kernel: [ 14.108499] inet_listen+0x9b/0x1a0
Feb 16 10:26:36 chro kernel: [ 14.108502] ? apparmor_socket_listen+0x1e/0x20
Feb 16 10:26:36 chro kernel: [ 14.108505] __sys_listen+0x71/0xb0
Feb 16 10:26:36 chro kernel: [ 14.108506] __x64_sys_listen+0x16/0x20
Feb 16 10:26:36 chro kernel: [ 14.108508] do_syscall_64+0x57/0x190
int inet_hash(struct sock *sk)
{
int err = 0;
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
err = __inet_hash(sk, NULL);
local_bh_enable();
}
return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
inet_ehash_nolisten(sk, osk);
return 0;
}
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
if (sk->sk_reuseport) {
err = inet_reuseport_add_sock(sk, ilb);
if (err)
goto unlock;
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&sk->sk_family == AF_INET6)
hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
else
hlist_add_head_rcu(&sk->sk_node, &ilb->head);
inet_hash2(hashinfo, sk);
ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
spin_unlock(&ilb->lock);
return err;
}
EXPORT_SYMBOL(__inet_hash);
static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
{
struct inet_listen_hashbucket *ilb2;
if (!h->lhash2)
return;
ilb2 = inet_lhash2_bucket_sk(h, sk);
spin_lock(&ilb2->lock);
if (sk->sk_reuseport && sk->sk_family == AF_INET6)
hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,&ilb2->head);
else
hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,&ilb2->head);
ilb2->count++;
spin_unlock(&ilb2->lock);
}
2.读取
Feb 16 10:26:39 chro kernel: [ 18.604601] inet_lhash2_lookup+0x13d/0x150
Feb 16 10:26:39 chro kernel: [ 18.604602] __inet_lookup_listener+0x14d/0x160
Feb 16 10:26:39 chro kernel: [ 18.604603] tcp_v4_rcv+0x3f5/0xad0
Feb 16 10:26:39 chro kernel: [ 18.604605] ? generic_copy_file_checks+0xca/0x130
Feb 16 10:26:39 chro kernel: [ 18.604606] ip_protocol_deliver_rcu+0x31/0x1b0
Feb 16 10:26:39 chro kernel: [ 18.604608] ip_local_deliver_finish+0x48/0x50
Feb 16 10:26:39 chro kernel: [ 18.604609] ip_local_deliver+0x7e/0xe0
Feb 16 10:26:39 chro kernel: [ 18.604610] ip_rcv_finish+0x84/0xa0
Feb 16 10:26:39 chro kernel: [ 18.604611] ip_rcv+0xbc/0xd0
Feb 16 10:26:39 chro kernel: [ 18.604613] __netif_receive_skb_one_core+0x84/0xa0
Feb 16 10:26:39 chro kernel: [ 18.604613] __netif_receive_skb+0x18/0x60
Feb 16 10:26:39 chro kernel: [ 18.604614] process_backlog+0xa0/0x170
Feb 16 10:26:39 chro kernel: [ 18.604615] net_rx_action+0x140/0x3c0
Feb 16 10:26:39 chro kernel: [ 18.604616] __do_softirq+0xe4/0x2da
Feb 16 10:26:39 chro kernel: [ 18.604617] do_softirq_own_stack+0x2a/0x40
Feb 16 10:26:39 chro kernel: [ 18.604618]
Feb 16 10:26:39 chro kernel: [ 18.604619] do_softirq.part.20+0x46/0x50
Feb 16 10:26:39 chro kernel: [ 18.604619] __local_bh_enable_ip+0x50/0x60
Feb 16 10:26:39 chro kernel: [ 18.604620] ip_finish_output2+0x1ac/0x5a0
Feb 16 10:26:39 chro kernel: [ 18.604621] ? __switch_to_asm+0x40/0x70
Feb 16 10:26:39 chro kernel: [ 18.604622] ? __switch_to_asm+0x34/0x70
Feb 16 10:26:39 chro kernel: [ 18.604623] __ip_finish_output+0xfa/0x1c0
Feb 16 10:26:39 chro kernel: [ 18.604624] ? __ip_finish_output+0xfa/0x1c0
Feb 16 10:26:39 chro kernel: [ 18.604625] ip_finish_output+0x2c/0xa0
Feb 16 10:26:39 chro kernel: [ 18.604626] ip_output+0x6d/0xe0
Feb 16 10:26:39 chro kernel: [ 18.604626] ? __ip_finish_output+0x1c0/0x1c0
Feb 16 10:26:39 chro kernel: [ 18.604627] ip_local_out+0x3b/0x50
Feb 16 10:26:39 chro kernel: [ 18.604628] __ip_queue_xmit+0x165/0x400
Feb 16 10:26:39 chro kernel: [ 18.604629] ip_queue_xmit+0x10/0x20
Feb 16 10:26:39 chro kernel: [ 18.604630] __tcp_transmit_skb+0x58d/0xb20
Feb 16 10:26:39 chro kernel: [ 18.604631] tcp_connect+0xaf5/0xd90
Feb 16 10:26:39 chro kernel: [ 18.604632] tcp_v4_connect+0x3df/0x4f0
Feb 16 10:26:39 chro kernel: [ 18.604634] __inet_stream_connect+0x249/0x390
Feb 16 10:26:39 chro kernel: [ 18.604635] ? _cond_resched+0x19/0x40
Feb 16 10:26:39 chro kernel: [ 18.604635] ? aa_sk_perm+0x43/0x190
Feb 16 10:26:39 chro kernel: [ 18.604637] inet_stream_connect+0x3b/0x60
Feb 16 10:26:39 chro kernel: [ 18.604638] __sys_connect+0xa3/0x120
Feb 16 10:26:39 chro kernel: [ 18.604640] ? fd_install+0x25/0x30
Feb 16 10:26:39 chro kernel: [ 18.604641] ? __sys_socket+0x98/0xf0
Feb 16 10:26:39 chro kernel: [ 18.604642] __x64_sys_connect+0x1a/0x20
Feb 16 10:26:39 chro kernel: [ 18.604643] do_syscall_64+0x57/0x190
3.SO_REUSEPORT套接字
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
switch (optname) {
case SO_REUSEPORT:
sk->sk_reuseport = valbool;
break;
}
static int inet_reuseport_add_sock(struct sock *sk,
struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
sk_for_each_rcu(sk2, &ilb->head) {
if (sk2 != sk &&
sk2->sk_family == sk->sk_family &&
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
inet_rcv_saddr_equal(sk, sk2, false))
return reuseport_add_sock(sk, sk2,inet_rcv_saddr_any(sk));
}
return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
}
-------------------------------
这里重点讲两个瓶颈点,第一个就是全局的队列,在我们在写用户态网络程序中,对同一个网络端口,仅允许一个监听实例,接收的数据包由一个队列来维护,并发的短连接请求较大时,会对这个队列造成较大的竞争压力,成为一个很大瓶颈点,至少在linuxkernel 3.9版本之前是这样,在3.9的版本合并了一个很关键的特性SO_REUSEPORT,支持多个进程或线程监听相同的端口,每个实例分配一个独立的队列,一定程度上缓解这个问题。用更容易理解的角度来描述,就是支持了我们在用户态上对一个网络端口,可以有多个进程或线程去监听它。正是因为有这样一个特性,我们可以根据CPU的核心数量来进行端口监听实例的选择,进一步优化网络连接处理的性能。第二点也是一个比较大的问题,在linuxkernel中有一个全局的连接表,用于维护TCP连接状态,这个表在维护大量的TCP连接时,会造成相当严重的资源竞争。总的来说,有锁的地方,有资源占用的地方都可能会成为瓶颈点。 ----摘自《高性能网络编程(四):从C10K到C10M高性能网络应用的理论探索》
阅读(1055) | 评论(0) | 转发(0) |