在iscsid.c文件的main函数中,有:sysfs_init();
这个函数主要是获取系统sys目录:
//获取sys目录
int sysfs_init(void)
{
const char *env;
/*从环境变量中获取sys路径*/
env = getenv("SYSFS_PATH");
if (env) {
strlcpy(sysfs_path, env, sizeof(sysfs_path));
/*去掉路径最后的斜杠*/
remove_trailing_chars(sysfs_path, '/');
} else
strlcpy(sysfs_path, "/sys", sizeof(sysfs_path));
dbg("sysfs_path='%s'\n", sysfs_path);
/*初始化设备和属性双链表*/
INIT_LIST_HEAD(&dev_list);
INIT_LIST_HEAD(&attr_list);
return 0;
}
其中dev_list和atte_list是list_head结构的双向链表,定义在sysfs.c文件中:
/* device cache */
static LIST_HEAD(dev_list);
/* attribute value cache */
static LIST_HEAD(attr_list);
main函数中:
//idbm初始化
if (idbm_init(iscsid_get_config_file)) {
log_close(log_pid);
exit(ISCSI_ERR);
}
参数iscsid_get_config_file是一个函数:
static char *iscsid_get_config_file(void)
{
return daemon_config.config_file;
}
在这里全局变量daemon_config结构还没有初始化,其实这里主要是将这个函数地址赋值给数据库全局变量db中的一个成员:
/*数据库初始化*/
int idbm_init(idbm_get_config_file_fn *fn)
{
/* make sure root db dir is there */
/*若/etc/iscsi目录不存在则创建该目录*/
if (access(ISCSI_CONFIG_ROOT, F_OK) != 0) {
if (mkdir(ISCSI_CONFIG_ROOT, 0660) != 0) {
log_error("Could not make %s %d\n", ISCSI_CONFIG_ROOT,
errno);
return errno;
}
}
/*分配一个idbm结构*/
db = malloc(sizeof(idbm_t));
if (!db) {
log_error("out of memory on idbm allocation");
return ISCSI_ERR_NOMEM;
}
memset(db, 0, sizeof(idbm_t));
/*设置获取配置文件名的函数*/
db->get_config_file = fn;
return 0;
}
idbm_get_config_file_fn的原型:
typedef char *(idbm_get_config_file_fn)(void);
idbm数据结构:
typedef struct idbm {
void *discdb;
void *nodedb;
char *configfile;
int refs;
idbm_get_config_file_fn *get_config_file;
node_rec_t nrec;
recinfo_t ninfo[MAX_KEYS];
discovery_rec_t drec_st;
recinfo_t dinfo_st[MAX_KEYS];
discovery_rec_t drec_slp;
recinfo_t dinfo_slp[MAX_KEYS];
discovery_rec_t drec_isns;
recinfo_t dinfo_isns[MAX_KEYS];
} idbm_t;
main函数中:
//建立一个本地socket进行进程间的通信
if ((mgmt_ipc_fd = mgmt_ipc_listen()) < 0) {
log_close(log_pid);
exit(ISCSI_ERR);
}
iscsiadm进程需要和守护进程iscsid通信,主要通过这个本地socket完成:
//建立一个本地socket进行进程间的通信
//返回socket的文件描述符
int mgmt_ipc_listen(void)
{
int fd, err;
struct sockaddr_un addr;
/*建立一个面向连接的数据流本地socket*/
fd = socket(AF_LOCAL, SOCK_STREAM, 0);
if (fd < 0) {
log_error("Can not create IPC socket");
return fd;
}
memset(&addr, 0, sizeof(addr));
/*设置unix socket地址中的两个参数,其中sun_path路径名类似ip地址,便于连接*/
addr.sun_family = AF_LOCAL;
memcpy((char *) &addr.sun_path + 1, ISCSIADM_NAMESPACE,
strlen(ISCSIADM_NAMESPACE));
/*将地址和这个socket绑定*/
if ((err = bind(fd, (struct sockaddr *) &addr, sizeof(addr))) < 0) {
log_error("Can not bind IPC socket");
close(fd);
return err;
}
/*进行监听,同时设置缓存等待连接数位32*/
if ((err = listen(fd, 32)) < 0) {
log_error("Can not listen IPC socket");
close(fd);
return err;
}
return fd;
}
main函数中:
if ((control_fd = ipc->ctldev_open()) < 0) {
log_close(log_pid);
exit(ISCSI_ERR);
}
这里返回的是一个控制文件描述符,用户用户进程iscsid和内核iscsi驱动的通信,在open-iscsi中实现了两种方式,ioctl和netlink方式,分析makefile:
OSNAME=$(shell uname -s)
# allow users to override these
# eg to compile for a kernel that you aren't currently running
KERNELRELEASE ?= $(shell uname -r)
KSRC ?= /lib/modules/$(KERNELRELEASE)/build
KSUBLEVEL=$(shell cat $(KSRC)/Makefile | awk -F= '/^SUBLEVEL =/ {print $$2}' | \
sed 's/^[ \t]*//;s/[ \t]*$$//')
ifeq ($(OSNAME),Linux)
ifeq ($(KSUBLEVEL),11)
IPC_CFLAGS=-DNETLINK_ISCSI=12 -D_GNU_SOURCE
else
ifeq ($(KSUBLEVEL),12)
IPC_CFLAGS=-DNETLINK_ISCSI=12 -D_GNU_SOURCE
else
IPC_CFLAGS=-DNETLINK_ISCSI=8 -D_GNU_SOURCE
endif
endif
IPC_OBJ=netlink.o
else
ifeq ($(OSNAME),FreeBSD)
IPC_CFLAGS=
IPC_OBJ=ioctl.o
endif
endif
可以发现,对于linux系统在这里是采用的netlink方式,所以我们分析netlink.c文件:
struct iscsi_ipc nl_ipc = {
.name = "Open-iSCSI Kernel IPC/NETLINK v.1",
.ctldev_bufmax = NLM_BUF_DEFAULT_MAX,
.ctldev_open = ctldev_open,
.ctldev_close = ctldev_close,
.ctldev_handle = ctldev_handle,
.sendtargets = ksendtargets,
.create_session = kcreate_session,
.destroy_session = kdestroy_session,
.unbind_session = kunbind_session,
.create_conn = kcreate_conn,
.destroy_conn = kdestroy_conn,
.bind_conn = kbind_conn,
.set_param = kset_param,
.set_host_param = kset_host_param,
.get_param = NULL,
.start_conn = kstart_conn,
.stop_conn = kstop_conn,
.get_stats = kget_stats,
.writev = kwritev,
.send_pdu_begin = ksend_pdu_begin,
.send_pdu_end = ksend_pdu_end,
.read = kread,
.recv_pdu_begin = krecv_pdu_begin,
.recv_pdu_end = krecv_pdu_end,
.set_net_config = kset_net_config,
.recv_conn_state = krecv_conn_state,
.exec_ping = kexec_ping,
.get_chap = kget_chap,
.delete_chap = kdelete_chap,
};
struct iscsi_ipc *ipc = &nl_ipc;
接着分析ctldev_open函数:
/*完成iscsi驱动和iscsid通信的一些准备工作,并创建一个netlink socket进行通信*/
static int
ctldev_open(void)
{
log_debug(7, "in %s", __FUNCTION__);
/*分配一个发送缓冲区*/
nlm_sendbuf = calloc(1, NLM_BUF_DEFAULT_MAX);
if (!nlm_sendbuf) {
log_error("can not allocate nlm_sendbuf");
return -1;
}
/*分配一个接受缓冲区*/
nlm_recvbuf = calloc(1, NLM_BUF_DEFAULT_MAX);
if (!nlm_recvbuf) {
log_error("can not allocate nlm_recvbuf");
goto free_nlm_sendbuf;
}
/*分配一个pdu发送缓冲区*/
pdu_sendbuf = calloc(1, PDU_SENDBUF_DEFAULT_MAX);
if (!pdu_sendbuf) {
log_error("can not allocate nlm_sendbuf");
goto free_nlm_recvbuf;
}
/*分配一个设置参数的缓冲区*/
setparam_buf = calloc(1, NLM_SETPARAM_DEFAULT_MAX);
if (!setparam_buf) {
log_error("can not allocate setparam_buf");
goto free_pdu_sendbuf;
}
/*创建一个netlink socket*/
ctrl_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ISCSI);
if (ctrl_fd < 0) {
log_error("can not create NETLINK_ISCSI socket");
goto free_setparam_buf;
}
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
src_addr.nl_groups = 1;
/*将地址和这个socket进行绑定*/
if (bind(ctrl_fd, (struct sockaddr *)&src_addr, sizeof(src_addr))) {
log_error("can not bind NETLINK_ISCSI socket");
goto close_socket;
}
/*目的地址为内核*/
memset(&dest_addr, 0, sizeof(dest_addr));
dest_addr.nl_family = AF_NETLINK;
dest_addr.nl_pid = 0; /* kernel */
dest_addr.nl_groups = 0; /* unicast */
log_debug(7, "created NETLINK_ISCSI socket...");
/*返回socket描述符*/
return ctrl_fd;
close_socket:
close(ctrl_fd);
free_setparam_buf:
free(setparam_buf);
free_pdu_sendbuf:
free(pdu_sendbuf);
free_nlm_recvbuf:
free(nlm_recvbuf);
free_nlm_sendbuf:
free(nlm_sendbuf);
return -1;
}
main函数:
event_loop(ipc, control_fd, mgmt_ipc_fd);
进入等待状态,等待进程间的通信:
/*守护进程进入等待状态*/
void event_loop(struct iscsi_ipc *ipc, int control_fd, int mgmt_ipc_fd)
{
struct pollfd poll_array[POLL_MAX];
int res, has_shutdown_children = 0;
/*设置等待的事件*/
poll_array[POLL_CTRL].fd = control_fd;
poll_array[POLL_CTRL].events = POLLIN;
poll_array[POLL_IPC].fd = mgmt_ipc_fd;
poll_array[POLL_IPC].events = POLLIN;
/*守护进行退出标志*/
event_loop_stop = 0;
while (1) {
if (event_loop_stop) {
/*没有关闭子进程,则对每个子进程发送SIGTERM信号进行关闭*/
if (!has_shutdown_children) {
has_shutdown_children = 1;
shutdown_notify_pids();
}
/*等待所有子进程返回*/
if (shutdown_wait_pids())
break;
}
/*进入等待*/
res = poll(poll_array, POLL_MAX, ACTOR_RESOLUTION);
if (res > 0) { /*有事件产生*/
log_debug(6, "poll result %d", res);
if (poll_array[POLL_CTRL].revents) /*内核通信*/
ipc->ctldev_handle();
if (poll_array[POLL_IPC].revents) /*其他进程通信*/
mgmt_ipc_handle(mgmt_ipc_fd);
} else if (res < 0) {
if (errno == EINTR) {
log_debug(1, "event_loop interrupted");
} else {
log_error("got poll() error (%d), errno (%d), "
"exiting", res, errno);
break;
}
} else
actor_poll();
reap_proc();
/*
* flush sysfs cache since kernel objs may
* have changed as a result of handling op
*/
sysfs_cleanup();
}
if (shutdown_qtask)
mgmt_ipc_write_rsp(shutdown_qtask, ISCSI_SUCCESS);
}
关闭所有进程:
/*关闭所有进程*/
static void shutdown_notify_pids(void)
{
struct shutdown_callback *cb;
list_for_each_entry(cb, &shutdown_callbacks, list) {
log_debug(1, "Killing %d\n", cb->pid);
kill(cb->pid, SIGTERM);
}
}
其中shutdonw_callback的原型:
static LIST_HEAD(shutdown_callbacks);
struct shutdown_callback {
struct list_head list;
pid_t pid;
};
回收所有子进程的状态,避免出现僵尸进程:
static int shutdown_wait_pids(void)
{
struct shutdown_callback *cb, *tmp;
list_for_each_entry_safe(cb, tmp, &shutdown_callbacks, list) {
/*
* the proc reaper could clean it up, so wait for any
* sign that it is gone.
*/
if (waitpid(cb->pid, NULL, WNOHANG)) {
log_debug(1, "%d done\n", cb->pid);
list_del(&cb->list);
free(cb);
}
}
return list_empty(&shutdown_callbacks);
}
参考资料:
2.