1.
struct sd_req { uint8_t proto_ver; uint8_t opcode; //操作类型 uint16_t flags;// uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t opcode_specific[8]; }; | struct sd_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t opcode_specific[7]; }; |
这两个数据结构应该是作为抽象类,可以看出sizeof(struct sd_req) == sizeof(struct sd_rsp),这个是设计者故意为之,因为在发送请求和接收响应时,客户端是使用同一片内存区域; |
2.
struct sd_obj_req { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint64_t oid;//object id uint64_t cow_oid; uint32_t copies;//副本个数 uint32_t tgt_epoch; uint64_t offset; }; | struct sd_obj_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t copies; uint32_t pad[6]; }; |
对object进行请求及响应,这里需要说明的一点:object在Sheepdog中作为数据存储单元,分为data_object 和vdi_object,分别存储数据和vdi的元数据,即后面提到的sheepdog_inode的内容,分片大小为4M。不知作者为何分这么小的分片? |
structsd_vdi_req{ uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint64_t vdi_size; //vdi的大小 uint32_t base_vdi_id; uint32_t copies; uint32_t snapid; uint32_t pad[3]; }; | structsd_vdi_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t rsvd; uint32_t vdi_id; uint32_t pad[5]; }; |
对vdi进行有关操作的请求和响应 |
3.
struct sd_vdi_req { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint64_t vdi_size; //vdi的大小 uint32_t base_vdi_id; uint32_t copies; uint32_t snapid; uint32_t pad[3]; }; | struct sd_vdi_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t rsvd; uint32_t vdi_id; uint32_t pad[5]; }; |
对vdi进行有关操作的请求和响应 |
4
struct sd_so_req { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint64_t oid; uint64_t ctime; uint32_t copies; uint32_t tag; uint32_t opcode_specific[2]; }; | struct sd_so_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t copies; uint64_t ctime; uint64_t oid; uint32_t opcode_specific[2]; }; |
这对请求和响应的数据结构,对应的opcode为SD_OP_MAKE_FS,对整个集群进行format,并提供copies参数,指定默认的副本的个数; |
5
struct sd_list_req { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint64_t start; //start_hval uint64_t end; //end_hval uint32_t tgt_epoch; //epoch参数 uint32_t pad[3]; }; | struct sd_list_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t rsvd; uint64_t next; uint32_t pad[4]; }; |
主要用于SD_OP_GET_OBJ_LIST操作,获得对应区间上node节点上的object list |
6
struct sd_node_req { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t request_ver; uint32_t pad[7]; }; | struct sd_node_rsp { uint8_t proto_ver; uint8_t opcode; uint16_t flags; uint32_t epoch; uint32_t id; uint32_t data_length; uint32_t result; uint32_t nr_nodes; uint32_t local_idx; uint32_t master_idx; uint64_t store_size; uint64_t store_free; }; |
针对SD_OP_STAT_SHEEP和SD_OP_GET_NODE_LIST操作,获得node 详细信息,包括store_size store_free等,或者是获得node list. |
7
struct sheepdog_inode { char name[SD_MAX_VDI_LEN]; //vdi的名字 char tag[SD_MAX_VDI_TAG_LEN]; //tag uint64_t ctime; // create time uint64_t snap_ctime; //snapshot time uint64_t vm_clock_nsec; uint64_t vdi_size; //vdi size uint64_t vm_state_size; uint16_t copy_policy; uint8_t nr_copies; //副本的个数 uint8_t block_size_shift; //data object size uint32_t snap_id; //snapshot of this vdi uint32_t vdi_id; //vdi id uint32_t parent_vdi_id; uint32_t child_vdi_id[MAX_CHILDREN]; uint32_t data_vdi_id[MAX_DATA_OBJS]; //data object id array }; |
sheepdog_inode相当于sheep中存储一个镜像文件,都会存在这个结构与之对应,该结构中保存了数据object的id数组,相当与镜像文件的元数据,同时该结构会持久化保存到vid object中; |
8
enum conn_state { C_IO_HEADER = 0, C_IO_DATA_INIT, C_IO_DATA, C_IO_END, C_IO_CLOSED, }; struct connection { int fd; //sockfd enum conn_state c_rx_state; //当前receive状态 int rx_length; void *rx_buf; struct sd_req rx_hdr; enum conn_state c_tx_state; //当前transfer状态 int tx_length; void *tx_buf; struct sd_rsp tx_hdr; }; |
struct connection结构存储socket连接的状态信息 |
9
struct client_info { struct connection conn; //conn state struct request *rx_req; //current rx_req struct request *tx_req; //current tx_req struct list_head reqs; //client 已经收到的request struct list_head done_reqs; //已经处理完的request,待发送response int refcnt; //引用计数,request 的个数 }; |
作为client保存信息,其中conn保存连接状态,reqs代表已经收到的request,done_reqs代表待发送响应的request. |
10
enum cpg_event_type { CPG_EVENT_CONCHG, CPG_EVENT_DELIVER, CPG_EVENT_REQUEST, }; struct cpg_event { enum cpg_event_type ctype; struct list_head cpg_event_list; unsigned int skip; }; typedef void (*req_end_t) (struct request *); struct request { struct cpg_event cev; struct sd_req rq; struct sd_rsp rp; void *data; struct client_info *ci; struct list_head r_siblings; //client_info->reqs struct list_head r_wlist; //client_info->done_reqs struct list_head pending_list; //sys->pending_list uint64_t local_oid[2]; struct sheepdog_node_list_entry entry[SD_MAX_NODES]; int nr_nodes; int check_consistency; req_end_t done; struct work work; }; |
Server端 请求的详细信息 |
11
struct cluster_info { cpg_handle_t handle; /* set after finishing the JOIN procedure */ int join_finished; uint32_t this_nodeid; uint32_t this_pid; struct sheepdog_node_list_entry this_node; uint32_t epoch; uint32_t status; /* * we add a node to cpg_node_list in confchg then move it to * sd_node_list when the node joins sheepdog. */ struct list_head cpg_node_list; struct list_head sd_node_list; struct list_head pending_list; //未收到响应的request DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); struct list_head outstanding_req_list; struct list_head req_wait_for_obj_list; struct list_head consistent_obj_list; uint32_t nr_sobjs; //副本个数 struct list_head cpg_event_siblings; struct cpg_event *cur_cevent; unsigned long cpg_event_work_flags; int nr_outstanding_io; int nr_outstanding_reqs; uint32_t recovered_epoch; }; extern struct cluster_info *sys; |
整个集群的信息 |