最近的研究方向:Nginx
分类: LINUX
2016-09-14 11:33:09
socket系列文章都是承接第一篇socket创建,因此这里的编号和内核版本都继承了第一篇文章。
2. SYSCALL_DEFINE3函数
Bind系统调用通过SYSCALL_DEFINE3调用各个协议不同的bind函数,
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
int err, fput_needed;
/*根据文件描述符fd,查找到对应的套接字socket*/
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, &address);
if (err >= 0) {
err = security_socket_bind(sock,
(struct sockaddr *)&address,
addrlen);
if (!err)
err = sock->ops->bind(sock,
(struct sockaddr *)
&address, addrlen);
}
fput_light(sock->file, fput_needed);
}
return err;
}
(1)sock->ops->bind在创建TCP类型的socket时,进行了下面的赋值初始化操作,这里的bind定位为inet_bind()函数。
onst struct proto_ops inet_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct fd f = fdget(fd);//通过fd获取到struct fd结构体,然后获取file
struct socket *sock;
*err = -EBADF;
if (f.file) {
sock = sock_from_file(f.file, err);//返回套接字所对应的指针,存储在file->private_data;在sock_alloc_file函数中对其进行赋值
if (likely(sock)) {
*fput_needed = f.flags;
return sock;//返回socket结构体指针
}
fdput(f);
}
return NULL;
}
bind系统调用通过套接口层Inet_bind(),然后便会调用传输接口层的函数,TCP中的传输层接口函数为inet_csk_get_port函数,该函数主要实现bind的作用,如果用户系统调用使用的端口号为0,系统会自动选择一个可用的端口号,这里选择可用端口号思路是:先在绑定表中选择可用的端口号,如果在绑定表中没有可用的端口号,再选择空闲的端口号。
在af_inet.c文件中。
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;//要绑定的sockaddr_in结构体
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
unsigned short snum;//要绑定的端口
int chk_addr_ret;//地址类型
int err;
/* If the socket has its own bind function then use it. (RAW) 对于RAW类型的socket,调用raw socket自己的bind函数 raw_bind */
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
}
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))//sockaddr_in长度错误
goto out;
chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);//地址类型检查,看看是否回环地址,多播地址,组播地址,在下面的判断中需要使用到
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
* allowing applications to make a non-local bind solves
* several problems with systems using dynamic addressing.
* (ie. your servers still start up even if your ISDN link
* is temporarily down)
sysctl_ip_nonlocal_bind表明是否允许绑定非本地的IP地址,默认为0,不允许绑定/proc/sys/net/ipv4# cat ip_nonlocal_bind
0
上面的那段注释说明了使用非本地地址绑定可以解决一些使用动态地址绑定的服务器程序,所以这个实现还是有实际意义的
inet->freebind 是通过do_ip_setsockopt函数进行设置的,默认值为1,该值表示允许绑定一个非本地IP地址和不存在的IP地址,可以通过IP_FREEBIND设置
inet->transparent:其含义就是可以使一个服务器程序侦听所有的IP地址,哪怕不是本机的IP地址
*/
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!(inet->freebind || inet->transparent) &&
addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
goto out;
snum = ntohs(addr->sin_port);//获取绑定的端口号
err = -EACCES;
/*如果要绑定0-1023以下的端口号,需要用户具有CAP_NET_BIND_SERVICE 权限 PROT_SOCK 就是1024*/
if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
goto out;
/* We keep a pair of addresses. rcv_saddr is the one
* used by hash lookups, and saddr is used for transmit.
*
* In the BSD API these are the same except where it
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
if (sk->sk_state != TCP_CLOSE || inet->num) //判断sk_state的状态是否为TCP_CLOSE,在创建socket时,sk_state初始为TCP_CLOSE,如果不等于TCP_CLOSE说明已经bind过,而num只有当raw socket时才会不为0
goto out_release_sock;
inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;//需要绑定的地址
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. 调用四层的bind函数,对于TCP来说就是inet_csk_get_port */
if (sk->sk_prot->get_port(sk, snum)) {
inet->saddr = inet->rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
if (inet->rcv_saddr)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;//设置sk中的sk->userlocks表示绑定地址
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK; //设置sk中的sk->userlocks表示绑定端口
inet->sport = htons(inet->num);
inet->daddr = 0;
inet->dport = 0;
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
out:
return err;
}
struct proto raw_prot = {
.name = "RAW",
.owner = THIS_MODULE,
.close = raw_close,
.destroy = raw_destroy,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = raw_ioctl,
.init = raw_init,
.setsockopt = raw_setsockopt,
.getsockopt = raw_getsockopt,
.sendmsg = raw_sendmsg,
.recvmsg = raw_recvmsg,
.bind = raw_bind,
.backlog_rcv = raw_rcv_skb,
.release_cb = ip4_datagram_release_cb,
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw_sock),
.h.raw_hash = &raw_v4_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_raw_setsockopt,
.compat_getsockopt = compat_raw_getsockopt,
.compat_ioctl = compat_raw_ioctl,
#endif
};
对于该类型的proto没有bind函数
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.recvmsg = tcp_recvmsg,
.backlog_rcv = tcp_v4_do_rcv,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};