UNIX域套接字用于在同一台机器上运行的进程之间的通信.虽然因特网域套接字可用于同一目的,但UNIX域套接字的效率更高.为什么unix套接字的效率更高呢,好像是因为在实现层面,UNIX域套接字仅仅复制数据;它们并不执行协议处理,不需要添加或删除网络报头,无需计算检验和,不要产生顺序号,无需发送确认报文.网络套接字处理了很多与网络相关的内容,还要查找路由,或者查找发送接口,总之就是相当麻烦,本地的套接字就简单多了,其实只是单纯的把一个进程想要发送的内容拷贝到内核里,再由内核把这段内存放在接收端的接收链表里,然后再拷贝到用户空间,就这么简单。
UNIX域套接字支持两种类型的套接字,stream sockets(与TCP相同) 和datagram socket(与UDP相同), 后续在详细讲解实现的时候,我们会看到数据流服务与数据报服务的大不同。
unix socket stream通信示例:
server
-
#include <stdio.h>
-
#include <unistd.h>
-
#include <stdlib.h>
-
#include <sys/types.h>
-
#include <sys/socket.h>
-
#include <errno.h>
-
#include <sys/un.h>
-
-
void str_echo(int sockfd)
-
{
-
ssize_t n;
-
char recv_buff[1024] = {0};
-
-
while ((n = read(sockfd, recv_buff, sizeof(recv_buff))) > 0) {
-
printf("recv from client %s\n", recv_buff);
-
write(sockfd, recv_buff, n);
-
memset(recv_buff, 0, sizeof(recv_buff));
-
}
-
-
}
-
int main(int argc, char **argv)
-
{
-
int ret = 0;
-
int sockfd = 0;
-
int connectfd = 0;
-
struct sockaddr_un addr;
-
struct sockaddr_un client_addr;
-
socklen_t addrlen = sizeof(addr);
-
-
pid_t childpid = 0;
-
-
if (argc != 2) {
-
printf("Usage:%s \n", argv[0]);
-
return -1;
-
}
-
-
sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
if (sockfd < 0) {
-
printf("socket create error:%s\n", strerror(errno));
-
return -1;
-
}
-
-
memset(&addr, 0, sizeof(addr));
-
addr.sun_family = AF_UNIX;
-
strncpy(addr.sun_path, argv[1], sizeof(addr.sun_path));
-
unlink(argv[1]);
-
-
ret = bind(sockfd, (struct sockaddr *)&addr, addrlen);
-
if (ret < 0) {
-
printf("socket bind error:%s\n", strerror(errno));
-
close(sockfd);
-
return -1;
-
}
-
-
if (listen(sockfd, 10) < 0) {
-
printf("socket listen error:%s\n", strerror(errno));
-
close(sockfd);
-
return -1;
-
}
-
-
while (1) {
-
connectfd = accept(sockfd, (struct sockaddr *)&client_addr, &addrlen);
-
if (connectfd < 0) {
-
printf("accept error:%s\n", strerror(errno));
-
continue;
-
}
-
-
childpid = fork();
-
if (childpid == 0) {
-
close(sockfd);
-
str_echo(connectfd);
-
exit(0);
-
}
-
close(connectfd);
-
}
-
-
close(sockfd);
-
return 0;
-
}
socket服务器端的代码大多类似,socket->bind->listen->accept,接收客户端的连接之后,从客户端接收数据,并发送回去。
client:
-
#include <sys/types.h>
-
#include <sys/socket.h>
-
#include <errno.h>
-
#include <sys/un.h>
-
#include <sys/sendfile.h>
-
#include <fcntl.h>
-
-
void show_flag(int fd)
-
{
-
int val = fcntl(fd, F_GETFL);
-
-
if (val == -1) {
-
printf("fcntl error for F_GETFL");
-
return;
-
}
-
-
printf("state:%d\n", val);
-
-
if (val & O_APPEND)
-
printf("append\n");
-
-
switch (val & O_APPEND) {
-
case 1:
-
printf("open witch O_APPEND");
-
break;
-
}
-
}
-
-
void str_cli(FILE *fp, int sockfd)
-
{
-
char input_buff[1024] = {0};
-
char recv_buff[1024] = {0};
-
int ret = 0;
-
-
while (fgets(input_buff, sizeof(input_buff), fp)) {
-
write(sockfd, input_buff, strlen(input_buff));
-
-
if (read(sockfd, recv_buff, sizeof(recv_buff)) > 0) {
-
printf("recv from server:%s\n", recv_buff);
-
}
-
memset(recv_buff, 0, sizeof(recv_buff));
-
memset(input_buff, 0, sizeof(input_buff));
-
}
-
-
-
/*
-
while (1) {
-
show_flag(STDIN_FILENO);
-
-
-
ret = sendfile(sockfd, STDIN_FILENO, NULL, 1024);
-
if (ret < 0) {
-
// printf("sendfile error:%s\n", strerror(errno));
-
continue;
-
}
-
-
if (read(sockfd, recv_buff, sizeof(recv_buff)) > 0) {
-
printf("recv from server:%s\n", recv_buff);
-
}
-
memset(recv_buff, 0, sizeof(recv_buff));
-
}
-
*/
-
}
-
-
int main(int argc, char **argv)
-
{
-
int ret = 0;
-
int sockfd = 0;
-
struct sockaddr_un server_addr;
-
socklen_t addrlen = sizeof(server_addr);
-
-
-
if (argc != 2) {
-
printf("Usage:%s \n", argv[0]);
-
return -1;
-
}
-
-
sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
if (sockfd < 0) {
-
printf("socket create error:%s\n", strerror(errno));
-
return -1;
-
}
-
-
memset(&server_addr, 0, sizeof(server_addr));
-
server_addr.sun_family = AF_UNIX;
-
strncpy(server_addr.sun_path, argv[1], sizeof(server_addr.sun_path));
-
-
ret = connect(sockfd, (struct sockaddr *)&server_addr, addrlen);
-
if (ret < 0) {
-
printf("connect %s error:%s\n", argv[1], strerror(errno));
-
close(sockfd);
-
return -1;
-
}
-
-
printf("connect to %s success\n", argv[1]);
-
str_cli(stdin, sockfd);
-
-
close(sockfd);
-
return 0;
-
}
客户端把服务器绑定的地址写入connect的第二个参数中,连接服务器,接收输入,然后发送到服务器,服务器将接受到的数据再发送给客户端。
创建socket,执行socket(AF_UNIX, SOCK_STREAM, 0),三个参数分别是协议族,socket类型,协议类型(最后一个我没有用过,基本都是填写0),在这个不对第三个参数进行分析,我们的重点放在前面两个参数
协议族有很多 包括但不限于
AF_UNIX, AF_LOCAL Local communication unix(7)
AF_INET IPv4 Internet protocols ip(7)
AF_INET6 IPv6 Internet protocols ipv6(7)
AF_IPX IPX - Novell protocols
AF_NETLINK Kernel user interface device netlink(7)
AF_X25 ITU-T X.25 / ISO-8208 protocol x25(7)
AF_AX25 Amateur radio AX.25 protocol
AF_ATMPVC Access to raw ATM PVCs
AF_APPLETALK AppleTalk ddp(7)
AF_PACKET Low level packet interface packet(7)
AF_ALG Interface to kernel crypto API
有这么多,其实说白了,bsd socket是一系列的通用通信接口,支持不同种类的socket,我们一般用的是AF_INET(IPV4通信),AF_UNIX(本地通信),AF_NETLINK(应用层与内核层通信),在我的工作中这三类socket都会用到,后续一点一点的写吧
socket函数在内核中执行的是SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
-
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
-
{
-
int retval;
-
struct socket *sock;
-
int flags;
-
-
/* Check the SOCK_* constants for consistency. */
-
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
-
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
-
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
-
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
-
-
flags = type & ~SOCK_TYPE_MASK;
-
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-
return -EINVAL;
-
type &= SOCK_TYPE_MASK;
-
-
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
-
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
-
-
retval = sock_create(family, type, protocol, &sock);
-
if (retval < 0)
-
goto out;
-
-
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
-
if (retval < 0)
-
goto out_release;
-
-
out:
-
/* It may be already another descriptor 8) Not kernel problem. */
-
return retval;
-
-
out_release:
-
sock_release(sock);
-
return retval;
-
}
内核中的函数都挺复杂的,这个函数做了很多检查,检查参数,在这里不再一一描述了,因为很多选项我们平时不会用到,用到了再写也不迟,今天的重点是socket创建,socket系统调用调用创建socket 的函数
sock_create(family, type, protocol, &sock);struct socket该结构体socket 主要使用在BSD socket 层,是最上层的结构,也就是说这个结构体离我们的应用层最近,这是我们进入内核之后接触到的第一层
sock_create实际调用_sock_create函数进行socket创建,
-
int __sock_create(struct net *net, int family, int type, int protocol,
-
struct socket **res, int kern)
-
{
-
int err;
-
struct socket *sock;
-
const struct net_proto_family *pf;
-
-
/*
-
* Check protocol is in range
-
*/
-
if (family < 0 || family >= NPROTO)
-
return -EAFNOSUPPORT;
-
if (type < 0 || type >= SOCK_MAX)
-
return -EINVAL;
-
-
/* Compatibility.
-
-
This uglymoron is moved from INET layer to here to avoid
-
deadlock in module load.
-
*/
-
if (family == PF_INET && type == SOCK_PACKET) {
-
pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
-
current->comm);
-
family = PF_PACKET;
-
}
-
-
err = security_socket_create(family, type, protocol, kern);
-
if (err)
-
return err;
-
-
/*
-
* Allocate the socket and allow the family to set things up. if
-
* the protocol is 0, the family is instructed to select an appropriate
-
* default.
-
*/
-
sock = sock_alloc();
-
if (!sock) {
-
net_warn_ratelimited("socket: no more sockets\n");
-
return -ENFILE; /* Not exactly a match, but its the
-
closest posix thing */
-
}
-
-
sock->type = type;
-
-
#ifdef CONFIG_MODULES
-
/* Attempt to load a protocol module if the find failed.
-
*
-
* 12/09/1996 Marcin: this makes REALLY only sense, if the user
-
* requested real, full-featured networking support upon configuration.
-
* Otherwise module support will
-
*/
-
if (rcu_access_pointer(net_families[family]) == NULL)
-
request_module("net-pf-%d", family);
-
#endif
-
-
rcu_read_lock();
-
pf = rcu_dereference(net_families[family]);
-
err = -EAFNOSUPPORT;
-
if (!pf)
-
goto out_release;
-
-
/*
-
* We will call the ->create function, that possibly is in a loadable
-
* module, so we have to bump that loadable module refcnt first.
-
*/
-
if (!try_module_get(pf->owner))
-
goto out_release;
-
-
/* Now protected by module ref count */
-
rcu_read_unlock();
-
-
err = pf->create(net, sock, protocol, kern);
-
if (err < 0)
-
goto out_module_put;
-
-
/*
-
* Now to bump the refcnt of the [loadable] module that owns this
-
* socket at sock_release time we decrement its refcnt.
-
*/
-
if (!try_module_get(sock->ops->owner))
-
goto out_module_busy;
-
-
/*
-
* Now that we're done with the ->create function, the [loadable]
-
* module can have its refcnt decremented
-
*/
-
module_put(pf->owner);
-
err = security_socket_post_create(sock, family, type, protocol, kern);
-
if (err)
-
goto out_sock_release;
-
*res = sock;
-
-
return 0;
-
-
out_module_busy:
-
err = -EAFNOSUPPORT;
-
out_module_put:
-
sock->ops = NULL;
-
module_put(pf->owner);
-
out_sock_release:
-
sock_release(sock);
-
return err;
-
-
out_release:
-
rcu_read_unlock();
-
goto out_sock_release;
-
}
pf = rcu_dereference(net_families[family]);根据协议族类型找到协议创建socket的函数,我们这里是AF_UNIX,使用的函数是unix_create,不同的协议有不同的创建函数,这里只是长征的第一步
-
static int unix_create(struct net *net, struct socket *sock, int protocol,
-
int kern)
-
{
-
if (protocol && protocol != PF_UNIX)
-
return -EPROTONOSUPPORT;
-
-
sock->state = SS_UNCONNECTED;
-
-
switch (sock->type) {
-
case SOCK_STREAM:
-
sock->ops = &unix_stream_ops;
-
break;
-
/*
-
* Believe it or not BSD has AF_UNIX, SOCK_RAW though
-
* nothing uses it.
-
*/
-
case SOCK_RAW:
-
sock->type = SOCK_DGRAM;
-
case SOCK_DGRAM:
-
sock->ops = &unix_dgram_ops;
-
break;
-
case SOCK_SEQPACKET:
-
sock->ops = &unix_seqpacket_ops;
-
break;
-
default:
-
return -ESOCKTNOSUPPORT;
-
}
-
-
return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
-
}
根据socket类型初始化sock->ops,然后调用unix_create1进行struct sock传输控制块的初始化
-
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
-
{
-
struct sock *sk = NULL;
-
struct unix_sock *u;
-
-
atomic_long_inc(&unix_nr_socks);
-
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
-
goto out;
-
-
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
-
if (!sk)
-
goto out;
-
-
sock_init_data(sock, sk);
-
lockdep_set_class(&sk->sk_receive_queue.lock,
-
&af_unix_sk_receive_queue_lock_key);
-
-
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
-
sk->sk_write_space = unix_write_space;
-
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
-
sk->sk_destruct = unix_sock_destructor;
-
u = unix_sk(sk);
-
u->path.dentry = NULL;
-
u->path.mnt = NULL;
-
spin_lock_init(&u->lock);
-
atomic_long_set(&u->inflight, 0);
-
INIT_LIST_HEAD(&u->link);
-
mutex_init(&u->iolock); /* single task reading lock */
-
mutex_init(&u->bindlock); /* single task binding lock */
-
init_waitqueue_head(&u->peer_wait);
-
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
-
unix_insert_socket(unix_sockets_unbound(sk), sk);
-
out:
-
if (sk == NULL)
-
atomic_long_dec(&unix_nr_socks);
-
else {
-
local_bh_disable();
-
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-
local_bh_enable();
-
}
-
return sk;
-
}
unix_create1返回值是struct sock*结构,这是我们接触到的第二层,传输控制块结构,这是一个很大的结构,做的事情很多,后续介绍。
最后返回时,会执行sock_map_fd,这一步是将socket与文件绑定,分配文件描述符。至此socket就创建成功了
阅读(1905) | 评论(0) | 转发(0) |