熟悉Linux下程序设计及各种应用程序 熟悉C Language 熟悉Glusterfs、FFmpeg、CDN 系统设计,计算机图形系统设计、分布式程序设计 目前主要研究方向:流媒体
分类: LINUX
2013-07-09 18:34:39
转自:glusterfs中xlator的介绍
在Gluster中,所有的操作都围绕着一条主线,那就是xlator,在对应的volume的配置中可以看到
volume testvol-client-0 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_0 option remote-host 192.168.1.194 end-volume volume testvol-client-1 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_1 option remote-host 192.168.1.194 end-volume volume testvol-client-2 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_2 option remote-host 192.168.1.194 end-volume volume testvol-client-3 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_3 option remote-host 192.168.1.194 end-volume volume testvol-client-4 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_4 option remote-host 192.168.1.194 end-volume volume testvol-client-5 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_5 option remote-host 192.168.1.194 end-volume volume testvol-client-6 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_6 option remote-host 192.168.1.194 end-volume volume testvol-client-7 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_7 option remote-host 192.168.1.194 end-volume volume testvol-client-8 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_8 option remote-host 192.168.1.194 end-volume volume testvol-client-9 type protocol/client option transport-type tcp option remote-subvolume /opt/data/data_9 option remote-host 192.168.1.194 end-volume volume testvol-stripe-0 type cluster/stripe subvolumes testvol-client-0 testvol-client-1 end-volume volume testvol-stripe-1 type cluster/stripe subvolumes testvol-client-2 testvol-client-3 end-volume volume testvol-stripe-2 type cluster/stripe subvolumes testvol-client-4 testvol-client-5 end-volume volume testvol-stripe-3 type cluster/stripe subvolumes testvol-client-6 testvol-client-7 end-volume volume testvol-stripe-4 type cluster/stripe subvolumes testvol-client-8 testvol-client-9 end-volume volume testvol-dht type cluster/distribute subvolumes testvol-stripe-0 testvol-stripe-1 testvol-stripe-2 testvol-stripe-3 testvol-stripe-4 end-volume volume testvol-write-behind type performance/write-behind subvolumes testvol-dht end-volume volume testvol-read-ahead type performance/read-ahead subvolumes testvol-write-behind end-volume volume testvol-io-cache type performance/io-cache subvolumes testvol-read-ahead end-volume volume testvol-quick-read type performance/quick-read subvolumes testvol-io-cache end-volume volume testvol-open-behind type performance/open-behind subvolumes testvol-quick-read end-volume volume testvol-md-cache type performance/md-cache subvolumes testvol-open-behind end-volume volume testvol type debug/io-stats option count-fop-hits off option latency-measurement off subvolumes testvol-md-cache end-volume
根据配置文件看到,volume名为testvol, 类型为debug/io-stats,操作count-fop-hits与latency-measurement对应的value为off,然后通过subvolumes来进入下一个xlator,其中,subvolumes后面的value即为下一层xlator。
从gluster的代码目录结构中,可以看到debug/io-stat这个目录
[root@CM glusterfs]# ls aclocal.m4 autom4te.cache config.h.in configure.ac cscope.files doc glusterfs-hadoop libglusterfs Makefile.in rfc.sh tests api ChangeLog config.log contrib cscope.in.out extras glusterfs.spec libtool missing rpc THANKS argp-standalone cli config.status CONTRIBUTING cscope.out glusterfs-api.pc glusterfs.spec.in ltmain.sh NEWS run-tests.sh xlators AUTHORS config.guess config.sub COPYING-GPLV2 cscope.po.out glusterfs-api.pc.in INSTALL Makefile py-compile stamp-h1 autogen.sh config.h configure COPYING-LGPLV3 depcomp glusterfsd install-sh Makefile.am README tags [root@CM glusterfs]# ls xlators/ bindings cluster debug encryption features lib Makefile Makefile.am Makefile.in meta mgmt mount nfs performance playground protocol storage system [root@CM glusterfs]# ls xlators/debug/ error-gen io-stats Makefile Makefile.am Makefile.in trace [root@CM glusterfs]# ls xlators/debug/io-stats/ Makefile Makefile.am Makefile.in src [root@CM glusterfs]#
可以看到,io-stats存在debug目录中,debug目录存在xlators中。
看一下xlators/debug/io-stats/src/下io-stats的实现
struct xlator_fops fops = { .stat = io_stats_stat, .readlink = io_stats_readlink, .mknod = io_stats_mknod, .mkdir = io_stats_mkdir, .unlink = io_stats_unlink, .rmdir = io_stats_rmdir, .symlink = io_stats_symlink, .rename = io_stats_rename, .link = io_stats_link, .truncate = io_stats_truncate, .open = io_stats_open, .readv = io_stats_readv, .writev = io_stats_writev, .statfs = io_stats_statfs, .flush = io_stats_flush, .fsync = io_stats_fsync, .setxattr = io_stats_setxattr, .getxattr = io_stats_getxattr, .removexattr = io_stats_removexattr, .fsetxattr = io_stats_fsetxattr, .fgetxattr = io_stats_fgetxattr, .fremovexattr = io_stats_fremovexattr, .opendir = io_stats_opendir, .readdir = io_stats_readdir, .readdirp = io_stats_readdirp, .fsyncdir = io_stats_fsyncdir, .access = io_stats_access, .ftruncate = io_stats_ftruncate, .fstat = io_stats_fstat, .create = io_stats_create, .lk = io_stats_lk, .inodelk = io_stats_inodelk, .finodelk = io_stats_finodelk, .entrylk = io_stats_entrylk, .lookup = io_stats_lookup, .xattrop = io_stats_xattrop, .fxattrop = io_stats_fxattrop, .setattr = io_stats_setattr, .fsetattr = io_stats_fsetattr, .fallocate = io_stats_fallocate, .discard = io_stats_discard, };
fops为glusterfs中对文件操作的一个亮点,fops中包含定义如下:
struct xlator_fops { fop_lookup_t lookup; fop_stat_t stat; fop_fstat_t fstat; fop_truncate_t truncate; fop_ftruncate_t ftruncate; fop_access_t access; fop_readlink_t readlink; fop_mknod_t mknod; fop_mkdir_t mkdir; fop_unlink_t unlink; fop_rmdir_t rmdir; fop_symlink_t symlink; fop_rename_t rename; fop_link_t link; fop_create_t create; fop_open_t open; fop_readv_t readv; fop_writev_t writev; fop_flush_t flush; fop_fsync_t fsync; fop_opendir_t opendir; fop_readdir_t readdir; fop_readdirp_t readdirp; fop_fsyncdir_t fsyncdir; fop_statfs_t statfs; fop_setxattr_t setxattr; fop_getxattr_t getxattr; fop_fsetxattr_t fsetxattr; fop_fgetxattr_t fgetxattr; fop_removexattr_t removexattr; fop_fremovexattr_t fremovexattr; fop_lk_t lk; fop_inodelk_t inodelk; fop_finodelk_t finodelk; fop_entrylk_t entrylk; fop_fentrylk_t fentrylk; fop_rchecksum_t rchecksum; fop_xattrop_t xattrop; fop_fxattrop_t fxattrop; fop_setattr_t setattr; fop_fsetattr_t fsetattr; fop_getspec_t getspec; fop_fallocate_t fallocate; fop_discard_t discard; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ fop_lookup_cbk_t lookup_cbk; fop_stat_cbk_t stat_cbk; fop_fstat_cbk_t fstat_cbk; fop_truncate_cbk_t truncate_cbk; fop_ftruncate_cbk_t ftruncate_cbk; fop_access_cbk_t access_cbk; fop_readlink_cbk_t readlink_cbk; fop_mknod_cbk_t mknod_cbk; fop_mkdir_cbk_t mkdir_cbk; fop_unlink_cbk_t unlink_cbk; fop_rmdir_cbk_t rmdir_cbk; fop_symlink_cbk_t symlink_cbk; fop_rename_cbk_t rename_cbk; fop_link_cbk_t link_cbk; fop_create_cbk_t create_cbk; fop_open_cbk_t open_cbk; fop_readv_cbk_t readv_cbk; fop_writev_cbk_t writev_cbk; fop_flush_cbk_t flush_cbk; fop_fsync_cbk_t fsync_cbk; fop_opendir_cbk_t opendir_cbk; fop_readdir_cbk_t readdir_cbk; fop_readdirp_cbk_t readdirp_cbk; fop_fsyncdir_cbk_t fsyncdir_cbk; fop_statfs_cbk_t statfs_cbk; fop_setxattr_cbk_t setxattr_cbk; fop_getxattr_cbk_t getxattr_cbk; fop_fsetxattr_cbk_t fsetxattr_cbk; fop_fgetxattr_cbk_t fgetxattr_cbk; fop_removexattr_cbk_t removexattr_cbk; fop_fremovexattr_cbk_t fremovexattr_cbk; fop_lk_cbk_t lk_cbk; fop_inodelk_cbk_t inodelk_cbk; fop_finodelk_cbk_t finodelk_cbk; fop_entrylk_cbk_t entrylk_cbk; fop_fentrylk_cbk_t fentrylk_cbk; fop_rchecksum_cbk_t rchecksum_cbk; fop_xattrop_cbk_t xattrop_cbk; fop_fxattrop_cbk_t fxattrop_cbk; fop_setattr_cbk_t setattr_cbk; fop_fsetattr_cbk_t fsetattr_cbk; fop_getspec_cbk_t getspec_cbk; fop_fallocate_cbk_t fallocate_cbk; fop_discard_cbk_t discard_cbk; };
fops中定义了88个接口,fops定义在xlator.h中,其中定义的每个接口都很重要,都为文件操作对应的接口,例如open,read,write,opendir,readdir,stat等。不过不用担心,88个接口挨个实现一下其实也需要精力,但是在某些场景下,其中的有些接口不会被用到,这是可以不必定义该接口,当本xlator中并未定义对应的接口时,xlator核心部分默认将会使用default.c中的对应的接口,例如前面的例子,stat与stat_cbk的例子,io-stats中并未定义stat_cbk,而在fops结构中却有stat_cbk的接口,这个时候如果用户使用stat_cbk时,将会调用default.c中的stat_cbk,default中的stat_cbk定义在libglusterfs/src/defaults.c。那么怎么使用的default这个接口的呢,可以参考xlator.c中的对应的使用:libglusterfs/src/xlator.c
int xlator_dynload (xlator_t *xl) { int ret = -1; char *name = NULL; void *handle = NULL; volume_opt_list_t *vol_opt = NULL; class_methods_t *vtbl = NULL; GF_VALIDATE_OR_GOTO ("xlator", xl, out); INIT_LIST_HEAD (&xl->volume_options); ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xl->type); if (-1 == ret) { gf_log ("xlator", GF_LOG_ERROR, "asprintf failed"); goto out; } ret = -1; gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name); handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL); if (!handle) { gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ()); goto out; } xl->dlhandle = handle; if (!(xl->fops = dlsym (handle, "fops"))) { gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s", dlerror ()); goto out; } if (!(xl->cbks = dlsym (handle, "cbks"))) { gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s", dlerror ()); goto out; } /* * If class_methods exists, its contents override any definitions of * init or fini for that translator. Otherwise, we fall back to the * older method of looking for init and fini directly. */ vtbl = dlsym(handle,"class_methods"); if (vtbl) { xl->init = vtbl->init; xl->fini = vtbl->fini; xl->reconfigure = vtbl->reconfigure; xl->notify = vtbl->notify; } else { if (!(*VOID(&xl->init) = dlsym (handle, "init"))) { gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s", dlerror ()); goto out; } if (!(*VOID(&(xl->fini)) = dlsym (handle, "fini"))) { gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s", dlerror ()); goto out; } if (!(*VOID(&(xl->reconfigure)) = dlsym (handle, "reconfigure"))) { gf_log ("xlator", GF_LOG_TRACE, "dlsym(reconfigure) on %s -- neglecting", dlerror()); } if (!(*VOID(&(xl->notify)) = dlsym (handle, "notify"))) { gf_log ("xlator", GF_LOG_TRACE, "dlsym(notify) on %s -- neglecting", dlerror ()); } } if (!(xl->dumpops = dlsym (handle, "dumpops"))) { gf_log ("xlator", GF_LOG_TRACE, "dlsym(dumpops) on %s -- neglecting", dlerror ()); } if (!(*VOID(&(xl->mem_acct_init)) = dlsym (handle, "mem_acct_init"))) { gf_log (xl->name, GF_LOG_TRACE, "dlsym(mem_acct_init) on %s -- neglecting", dlerror ()); } vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t), gf_common_mt_volume_opt_list_t); if (!vol_opt) { goto out; } if (!(vol_opt->given_opt = dlsym (handle, "options"))) { dlerror (); gf_log (xl->name, GF_LOG_TRACE, "Strict option validation not enforced -- neglecting"); } INIT_LIST_HEAD (&vol_opt->list); list_add_tail (&vol_opt->list, &xl->volume_options); fill_defaults (xl); ret = 0; out: GF_FREE (name); return ret; }
再次回到刚才所查看的配置中的type部分,当配置文件解析解析了volname以后,看到type为debug/io-stats,debug/io-stats为一个xlator,在xlator中将会打开这个debug/io-stats,打开的操作就是在xlator_dynload接口中进行,XLATORDIR为glusterfs对应的动态库所存储的根目录,XLATORDIR的定义是在Makefile时所传递。
[root@CM glusterfs]# ls /usr/local/lib/glusterfs/ 3git [root@CM glusterfs]# ls /usr/local/lib/glusterfs/3git/ auth rpc-transport xlator [root@CM glusterfs]# ls /usr/local/lib/glusterfs/3git/xlator/ cluster debug encryption features mgmt mount nfs performance protocol storage system testing [root@CM glusterfs]# ls /usr/local/lib/glusterfs/3git/xlator/debug/ error-gen.la error-gen.so io-stats.la io-stats.so trace.la trace.so [root@CM glusterfs]#
例如我在编译的时候,将路径指定在/usr/local/lib/glusterfs/3git/xlator/中,这时xlator_dynload中将会打开该目录下的debug/io-stats.so。
if (!(xl->fops = dlsym (handle, "fops"))) { gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s", dlerror ()); goto out; }
然后通过dlsym,将fops导出至xl->fops中
在后面通过 fill_defaults (xl);将xlator中并未定义的fops接口补充进xl中
#define SET_DEFAULT_FOP(fn) do { \ if (!xl->fops->fn) \ xl->fops->fn = default_##fn; \ } while (0) #define SET_DEFAULT_CBK(fn) do { \ if (!xl->cbks->fn) \ xl->cbks->fn = default_##fn; \ } while (0) static void fill_defaults (xlator_t *xl) { if (xl == NULL) { gf_log_callingfn ("xlator", GF_LOG_WARNING, "invalid argument"); return; } SET_DEFAULT_FOP (create); SET_DEFAULT_FOP (open); SET_DEFAULT_FOP (stat); SET_DEFAULT_FOP (readlink); SET_DEFAULT_FOP (mknod); SET_DEFAULT_FOP (mkdir); SET_DEFAULT_FOP (unlink); SET_DEFAULT_FOP (rmdir); SET_DEFAULT_FOP (symlink); SET_DEFAULT_FOP (rename); SET_DEFAULT_FOP (link); SET_DEFAULT_FOP (truncate); SET_DEFAULT_FOP (readv); SET_DEFAULT_FOP (writev); SET_DEFAULT_FOP (statfs); SET_DEFAULT_FOP (flush); SET_DEFAULT_FOP (fsync); SET_DEFAULT_FOP (setxattr); SET_DEFAULT_FOP (getxattr); SET_DEFAULT_FOP (fsetxattr); SET_DEFAULT_FOP (fgetxattr); SET_DEFAULT_FOP (removexattr); SET_DEFAULT_FOP (fremovexattr); SET_DEFAULT_FOP (opendir); SET_DEFAULT_FOP (readdir); SET_DEFAULT_FOP (readdirp); SET_DEFAULT_FOP (fsyncdir); SET_DEFAULT_FOP (access); SET_DEFAULT_FOP (ftruncate); SET_DEFAULT_FOP (fstat); SET_DEFAULT_FOP (lk); SET_DEFAULT_FOP (inodelk); SET_DEFAULT_FOP (finodelk); SET_DEFAULT_FOP (entrylk); SET_DEFAULT_FOP (fentrylk); SET_DEFAULT_FOP (lookup); SET_DEFAULT_FOP (rchecksum); SET_DEFAULT_FOP (xattrop); SET_DEFAULT_FOP (fxattrop); SET_DEFAULT_FOP (setattr); SET_DEFAULT_FOP (fsetattr); SET_DEFAULT_FOP (fallocate); SET_DEFAULT_FOP (discard); SET_DEFAULT_FOP (getspec); SET_DEFAULT_CBK (release); SET_DEFAULT_CBK (releasedir); SET_DEFAULT_CBK (forget); if (!xl->notify) xl->notify = default_notify; if (!xl->mem_acct_init) xl->mem_acct_init = default_mem_acct_init; return; }
通过SET_DEFAULT_FOP以及SET_DEFAULT_CBK来补充尚未定义的fops接口,其实现通过宏来进行,首先判断对应的接口是否已经实现,尚未实现就将default填充入对应的接口。
关于接口的定义以及使用工作流程已经大概的讲完了,接下来查看io-stats对应的option部分的定义
struct volume_options options[] = { { .key = {"dump-fd-stats"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", .description = "If on stats related to file-operations would be " "tracked inside GlusterFS data-structures." }, { .key = { "latency-measurement" }, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", .description = "If on stats related to the latency of each operation " "would be tracked inside GlusterFS data-structures. " }, { .key = {"count-fop-hits"}, .type = GF_OPTION_TYPE_BOOL, }, { .key = {"log-level"}, .type = GF_OPTION_TYPE_STR, .value = { "DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE", "TRACE"} }, /* These are synthetic entries to assist validation of CLI's * * volume set command */ { .key = {"client-log-level"}, .type = GF_OPTION_TYPE_STR, .default_value = "INFO", .description = "Changes the log-level of the clients", .value = { "DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE", "TRACE"} }, { .key = {"sys-log-level"}, .type = GF_OPTION_TYPE_STR, .default_value = "CRITICAL", .description = "Gluster's syslog log-level", .value = { "WARNING", "ERROR", "INFO", "CRITICAL"} }, { .key = {"brick-log-level"}, .type = GF_OPTION_TYPE_STR, .default_value = "INFO", .description = "Changes the log-level of the bricks", .value = { "DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE", "TRACE"} }, { .key = {NULL} }, };
在配置文件中,可以看到io-stats中的option中包含了两个配置:
option count-fop-hits off option latency-measurement off
这两个位置都是通过前面的option定义的:
volume_options定义如下:
/*每一个translator 应该定义一下这个结构体*/ typedef struct volume_options { char *key[ZR_VOLUME_MAX_NUM_KEY]; /* 不同的key, 一样的含义 */ volume_option_type_t type; double min; /* 0 means no range */ double max; /* 0 means no range */ char *value[ZR_OPTION_MAX_ARRAY_SIZE]; /* If specified, will check for one of the value from this array */ char *default_value; char *description; /* about the key */ /* Required for int options where only the min value * is given and is 0. This will cause validation not to * happen */ opt_validate_type_t validate; } volume_option_t;
其中type为volume_option_type_t,该类型定义为:
typedef enum { GF_OPTION_TYPE_ANY = 0, GF_OPTION_TYPE_STR, GF_OPTION_TYPE_INT, GF_OPTION_TYPE_SIZET, GF_OPTION_TYPE_PERCENT, GF_OPTION_TYPE_PERCENT_OR_SIZET, GF_OPTION_TYPE_BOOL, GF_OPTION_TYPE_XLATOR, GF_OPTION_TYPE_PATH, GF_OPTION_TYPE_TIME, GF_OPTION_TYPE_DOUBLE, GF_OPTION_TYPE_INTERNET_ADDRESS, GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, GF_OPTION_TYPE_PRIORITY_LIST, GF_OPTION_TYPE_SIZE_LIST, GF_OPTION_TYPE_MAX, } volume_option_type_t;
在io-stats中看到的两个option,在io-stats中将会被使用,两个选项都为BOOL型,由于其传递字符串为off,将会被转化为BOOL型进行使用。
通过执行xlator中的xl->init,来执行的io-stats中的init接口,init接口同样是在xlater_dyload中加载到xl中的。而option,需要在init中进行设置,设置操作如下:
ret = ios_init_top_stats (conf); if (ret) return -1; GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out); GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out); GF_OPTION_INIT ("latency-measurement", conf->measure_latency, bool, out);
关于option配置中的off字符串转化为BOOL,将通过接口dict_get_str_boolean来得到bool值:
dict_get_str_boolean实现如下:
/** * dict_get_str_boolean - get a boolean value based on string representation. * * @this : dictionary * @key : dictionary key queried * @default_val : default value if key not found * * @return : @default_val if key not found * : boolean interpretation of @this[@key] if it makes sense * (ie., "on", "true", "enable" ...) * : -1 if error occurs or @this[@key] doesn't make sens as * boolean * * So if you query a boolean option, then via @default_val you can choose * between following patterns: * * - fall back to _gf_false if @key is not set [@default_val = 0] * - fall back to _gf_true if @key is not set [@default_val = 1] * - regard as failure if @key is not set [@default_val = -1] * - handle specially (not as error) if @key is not set * [@default_val = anything else] */ int dict_get_str_boolean (dict_t *this, char *key, int default_val) { data_t *data = NULL; gf_boolean_t boo = _gf_false; int ret = 0; ret = dict_get_with_ref (this, key, &data); if (ret < 0) { if (ret == -ENOENT) ret = default_val; else ret = -1; goto err; } GF_ASSERT (data); if (!data->data) { ret = -1; goto err; } ret = gf_string2boolean (data->data, &boo); if (ret == -1) goto err; ret = boo; err: if (data) data_unref (data); return ret; }
通过gf_string2boolean将字符串幻化为bool:
int gf_string2boolean (const char *str, gf_boolean_t *b) { if (str == NULL) { gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid"); return -1; } if ((strcasecmp (str, "1") == 0) || (strcasecmp (str, "on") == 0) || (strcasecmp (str, "yes") == 0) || (strcasecmp (str, "true") == 0) || (strcasecmp (str, "enable") == 0)) { *b = _gf_true; return 0; } if ((strcasecmp (str, "0") == 0) || (strcasecmp (str, "off") == 0) || (strcasecmp (str, "no") == 0) || (strcasecmp (str, "false") == 0) || (strcasecmp (str, "disable") == 0)) { *b = _gf_false; return 0; } return -1; }
所以,option如果为bool型时,对应的配置可以为数字,on/off, yes/no,true/false,enable/disable。
关于option的设置以及读取,通过字典操作很方面dict_set/dict_get操作。
关于基础的介绍,大概就这么多