##该文档是我和同事bob讨论的结果,他书写的,由衷向他表示感谢
192.168.1.63:
wget -P /etc/yum.repos.d
# yum list glusterfs*
Loaded plugins: fastestmirror
Loading mirror speeds from cached hostfile
* base: mirrors.163.com
* epel: mirrors.hust.edu.cn
* extras: centos.ustc.edu.cn
* updates: mirrors.163.com
Installed Packages
glusterfs.x86_64 3.5.1-1.el6 @glusterfs-epel
glusterfs-api.x86_64 3.5.1-1.el6 @glusterfs-epel
glusterfs-cli.x86_64 3.5.1-1.el6 @glusterfs-epel
glusterfs-fuse.x86_64 3.5.1-1.el6 @glusterfs-epel
glusterfs-libs.x86_64 3.5.1-1.el6 @glusterfs-epel
glusterfs-server.x86_64 3.5.1-1.el6 @glusterfs-epel
Available Packages
glusterfs-api-devel.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-debuginfo.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-devel.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-extra-xlators.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-geo-replication.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-rdma.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-regression-tests.x86_64 3.5.1-1.el6 glusterfs-epel
glusterfs-resource-agents.noarch 3.5.1-1.el6 glusterfs-noarch-epel
# yum -y install glusterfs
# yum -y install glusterfs-server
# /etc/init.d/glusterd start
Starting glusterd: [ OK ]
##我在这里遇到一个问题,开始我安装的不是glusterfs的最新版本,我执行了yum -y update glusterfs-server 升级后,结果服务不能启动,报错如下:
-
[2014-08-17 03:05:32.374519] E [rpc-transport.c:266:rpc_transport_load] 0-rpc-transport: /usr/lib/glusterfs/3.5.2/rpc-transport/rdma.so: cannot open shared object file: No such file or directory
-
[2014-08-17 03:05:32.374576] W [rpc-transport.c:270:rpc_transport_load] 0-rpc-transport: volume 'rdma.management': transport-type 'rdma' is not valid or not found on this machine
-
[2014-08-17 03:05:32.374594] W [rpcsvc.c:1535:rpcsvc_transport_create] 0-rpc-service: cannot create listener, initing the transport failed
-
[2014-08-17 03:05:32.374839] I [socket.c:3561:socket_init] 0-socket.management: SSL support is NOT enabled
我去查找/usr/lib/glusterfs/3.5.2/rpc-transport/目录,结果真的没有rdma.so
该问题解决方法:
cd /var/lib
mv glusterd glusterd.bak
# chkconfig glusterfsd on
在192.168.1.63上面配置整个glustersfs集群:
# gluster peer probe 192.168.1.63
peer probe: success. Probe on localhost not needed
# gluster peer probe 192.168.1.64
peer probe: success.
# gluster peer probe 192.168.1.66
peer probe: success.
# gluster peer probe 192.168.1.244
peer probe: success.
##上面是4台机器组成一个集群!
查看glusterfs集群的状态:
# gluster peer status
Number of Peers: 3
Hostname: 192.168.1.66
Uuid: 7ab89229-fce2-4f86-a61f-d7b17a3c7308
State: Peer in Cluster (Connected)
Hostname: 192.168.1.64
Uuid: 33febb2d-08a2-4676-a86e-5312aa00f934
State: Peer in Cluster (Connected)
Hostname: 192.168.1.244
Uuid: a64a1ffb-c2e6-4171-a178-02e06cd6ad0e
State: Peer in Cluster (Connected)
创建数据存放目录:
mkdir /data/v3_upload
在192.168.1.63上创建glusterFS磁盘:
# gluster volume create v3_upload replica 4 192.168.1.63:/data/v3_upload/ 192.168.1.66:/data/v3_upload/ 192.168.1.64:/data/v3_upload/
192.168.1.244:/data/v3_upload/
volume create: v3_upload: success: please start the volume to access data
我在创建的时候遇到了一个问题,如下:
-
[root@haproxy2 v3_upload]# gluster volume create v3_upload replica 4 192.168.1.203:/data/v3_upload/ 192.168.1.204:/data/v3_upload/
-
-
192.168.1.205:/data/v3_upload/ 192.168.1.206:/data/v3_upload/
-
volume create: v3_upload: failed: The brick 192.168.1.203:/data/v3_upload is is being created in the root partition. It is recommended that you don't use the
-
-
system's root partition for storage backend. Or use 'force' at the end of the command if you want to override this behavior.
-
[root@haproxy2 v3_upload]#
-
[root@haproxy2 v3_upload]# gluster volume create v3_upload replica 4 192.168.1.203:/data/v3_upload/ 192.168.1.204:/data/v3_upload/ 192.168.1.205:/data/v3_upload/ 192.168.1.206:/data/v3_upload/ force
-
##加上force参数后,即可创建,问题即可解决
-
volume create: v3_upload: success: please start the volume to access data
# gluster volume start v3_upload
volume start: v3_upload: success
# gluster volume info
Volume Name: v3_upload
Type: Replicate
Volume ID: 2b1c361c-e711-4ad0-96c4-ca51110bc84a
Status: Started
Number of Bricks: 1 x 4 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.66:/data/v3_upload
Brick3: 192.168.1.64:/data/v3_upload
Brick4: 192.168.1.244:/data/v3_upload
客户端:
# wget -P /etc/yum.repos.d
# yum install glusterfs
# yum install glusterfs-fuse
# mkdir /data/v3_upload
# mount -t glusterfs -o ro 192.168.1.63:v3_upload /data/v3_upload/ ##以read only的方式挂载
# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 20G 1.5G 17G 9% /
tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 194M 27M 158M 15% /boot
/dev/mapper/vg_web-LogVol00
251G 1.3G 237G 1% /data
/dev/sda3 20G 217M 19G 2% /home
192.168.1.63:v3_upload
251G 802M 238G 1% /data/v3_upload
测试挂载点写入数据:
# umount /data/v3_upload/
# mount -t glusterfs 192.168.1.66:v3_upload /data/v3_upload/
# echo "this is 192.168.1.66" > /data/v3_upload/test.txt
# mkdir /data/v3_upload/testdir
随便选一台服务端查看:
# ll -h
total 12K
drwxr-xr-x 2 root root 4.0K Jul 1 14:20 testdir
-rw-r--r-- 2 root root 21 Jul 1 14:21 test.txt
数据写入成功
从192.168.1.63上修改test.txt文件,并在创建testdir2目录:
# cat test.txt
this is 192.168.1.66 create
this is 192.168.1.63 modified
# ll -h
total 16K
drwxr-xr-x 2 root root 4.0K Jul 1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul 1 14:23 testdir2
-rw-r--r-- 2 root root 58 Jul 1 14:23 test.txt
从客户端查看:
# cat test.txt
cat: test.txt: Input/output error
# ll -h
total 4.5K
drwxr-xr-x 2 root root 4.0K Jul 1 14:20 testdir
-rw-r--r-- 1 root root 21 Jul 1 14:21 test.txt
数据写入失败
在192.168.1.66上创建新的test2.txt文件和testdir3目录:
# ll -h
total 20K
-rw-r--r-- 2 root root 24 Jul 1 14:33 test2.txt
drwxr-xr-x 2 root root 4.0K Jul 1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul 1 14:33 testdir3
从客户端查看:
# ll -h
total 8.5K
-rw-r--r-- 1 root root 24 Jul 1 2014 test2.txt
drwxr-xr-x 2 root root 4.0K Jul 1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul 1 2014 testdir3
两者内容一致,当一gluster服务器被挂到客户端时,被挂载的这台具有读写权限,并同步到其他gluster上,从gluster集群里其他机器则没有写入权限.会导致报错。
最终结论:
在数据目录中直接写入数据,会导致其它节点因为得不到通知而使数据同步失败。
正确的做法是所有的读写操作都通过挂载点来进行。
# glusterfs --help
Usage: glusterfs [OPTION...] --volfile-server=SERVER [MOUNT-POINT]
or: glusterfs [OPTION...] --volfile=VOLFILE [MOUNT-POINT]
Basic options:
-f, --volfile=VOLFILE File to use as VOLUME_FILE
-l, --log-file=LOGFILE File to use for logging [default:
/var/log/glusterfs/glusterfs.log]
-L, --log-level=LOGLEVEL Logging severity. Valid options are DEBUG, INFO,
WARNING, ERROR, CRITICAL, TRACE and NONE [default:
INFO]
-s, --volfile-server=SERVER Server to get the volume file from. This
option overrides --volfile option
Advanced Options:
--acl Mount the filesystem with POSIX ACL support
--aux-gfid-mount Enable access to filesystem through gfid directly
--debug Run in debug mode. This option sets --no-daemon,
--log-level to DEBUG and --log-file to console
--enable-ino32[=BOOL] Use 32-bit inodes when mounting to workaround
broken applicationsthat don't support 64-bit
inodes
--fopen-keep-cache[=BOOL] Do not purge the cache on file open
--mac-compat[=BOOL] Provide stubs for attributes needed for seamless
operation on Macs [default: "off"]
-N, --no-daemon Run in foreground
-p, --pid-file=PIDFILE File to use as pid file
--read-only Mount the filesystem in 'read-only' mode
--selinux Enable SELinux label (extened attributes) support
on inodes
-S, --socket-file=SOCKFILE File to use as unix-socket
--volfile-id=KEY 'key' of the volfile to be fetched from server
--volfile-server-port=PORT Listening port number of volfile server
--volfile-server-transport=TRANSPORT
Transport type to get volfile from server
[default: socket]
--volume-name=XLATOR-NAME Translator name to be used for MOUNT-POINT
[default: top most volume definition in VOLFILE]
--worm Mount the filesystem in 'worm' mode
--xlator-option=XLATOR-NAME.OPTION=VALUE
Add/override an option for a translator in volume
file with specified value
Fuse options:
--attribute-timeout=SECONDS
Set attribute timeout to SECONDS for inodes in
fuse kernel module [default: 1]
--background-qlen=N Set fuse module's background queue length to N
[default: 64]
--congestion-threshold=N Set fuse module's congestion threshold to N
[default: 48]
--direct-io-mode[=BOOL] Use direct I/O mode in fuse kernel module
[default: "off" if big writes are supported, else
"on" for fds not opened with O_RDONLY]
--dump-fuse=PATH Dump fuse traffic to PATH
--entry-timeout=SECONDS Set entry timeout to SECONDS in fuse kernel
module [default: 1]
--gid-timeout=SECONDS Set auxilary group list timeout to SECONDS for
fuse translator [default: 0]
--negative-timeout=SECONDS Set negative timeout to SECONDS in fuse
kernel module [default: 0]
--use-readdirp[=BOOL] Use readdirp mode in fuse kernel module [default:
"off"]
--volfile-check Enable strict volume file checking
Miscellaneous Options:
-?, --help Give this help list
--usage Give a short usage message
-V, --version Print program version
Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.
Report bugs to
.
###glusterfs 常见操作:
删除glusterfs磁盘:
# gluster volume stop v3_upload
Stopping volume will make its data inaccessible. Do you want to continue? (y/n) y
volume stop: v3_upload: success
执行完这个命令后,在glusterfs 客户端,执行
cd /data/v3_upload
执行ls或ll有如下的提示:
-
[root@haproxy2 v3_upload]# ls
-
ls: cannot open directory .: Transport endpoint is not connected
-
[root@haproxy2 v3_upload]# ll
-
ls: cannot open directory .: Transport endpoint is not connected
-
[root@haproxy2 v3_upload]# cd
# gluster volume delete v3_upload
Deleting volume will erase all information about the volume. Do you want to continue? (y/n) y
volume delete: v3_upload: success
Glusterfs3.2.4/5支持五种Volume,即Distribute卷、Stripe卷、Replica卷、Distribute stripe卷和Distribute replica卷,这五种卷可以满足不同应用对高性能、高可用的需
求。
(1)distribute volume:分布式卷,文件通过hash算法分布到brick server上,这种卷是glusterfs的基础和最大特点;
(2)stripe volume:条带卷, 类似RAID0,条带数=brick server数量,文件分成数据块以Round Robin方式分布到brick server上,并发粒度是数据块,大文件性能高;
(3)replica volume:镜像卷,类似RAID1,镜像数=brick server数量,所以brick server上文件数据相同,构成n-way镜像,可用性高;
(4)distribute stripe volume:分布式条带卷,brick server数量是条带数的倍数,兼具distribute和stripe卷的特点;
(5)distribute replica volume:分布式镜像卷,brick server数量是镜像数的倍数,兼具distribute和replica卷的特点;
对于stripe volume和distribute stripe volume区别的简单理解:
如果对于4台机器组成的stripe volume,1个4M的文件,则平均分布在4台机器上,每个机器上的文件1M左右
对于4台机器组成的distribute stripe volume ,会分成两个组,4M的文件会写到一个组上(另外一个组上不会有文件写入),这个组内的两台机器,均分这个文件,1个上面2M
strip volume:
# gluster volume create v3_upload stripe 4 192.168.1.63:/data/v3_upload/ 192.168.1.244:/data/v3_upload/ 192.168.1.64:/data/v3_upload/
192.168.1.66:/data/v3_upload/
# gluster volume start v3_upload
# gluster volume info
Volume Name: v3_upload
Type: Stripe
Volume ID: 70457b69-348c-40e7-8831-1d283d2a5c25
Status: Created
Number of Bricks: 1 x 4 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.244:/data/v3_upload
Brick3: 192.168.1.64:/data/v3_upload
Brick4: 192.168.1.66:/data/v3_upload
# mount -t glusterfs -o ro 192.168.1.63:v3_upload /data/v3_upload/
# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 20G 1.5G 17G 9% /
tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 194M 27M 158M 15% /boot
/dev/mapper/vg_web-LogVol00
251G 1.3G 237G 1% /data
/dev/sda3 20G 217M 19G 2% /home
192.168.1.63:v3_upload
2.4T 9.0G 2.3T 1% /data/v3_upload
Distributed Striped volume:
[root@YQD-intranet-salt-master v3_upload]# gluster volume status;
Status of volume: raid10
Gluster process Port Online Pid
------------------------------------------------------------------------------
Brick 192.168.1.63:/data/v3_upload 49152 Y 2594
Brick 192.168.1.64:/data/v3_upload 49165 Y 12001
Brick 192.168.1.66:/data/v3_upload 49165 Y 2272
Brick 192.168.1.244:/data/v3_upload 49157 Y 6882
NFS Server on localhost 2049 Y 2606
NFS Server on 192.168.1.244 2049 Y 6894
NFS Server on 192.168.1.66 2049 Y 2284
NFS Server on 192.168.1.64 2049 Y 12013
# gluster volume create raid10 stripe 2 transport tcp 192.168.1.63:/data/v3_upload/ 192.168.1.64:/data/v3_upload/ 192.168.1.66:/data/v3_upload/
192.168.1.244:/data/v3_upload/
# gluster volume start
# gluster volume info
Volume Name: raid10
Type: Distributed-Stripe
Volume ID: 80bcc349-92c3-4839-8287-16462a9657f5
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.64:/data/v3_upload
Brick3: 192.168.1.66:/data/v3_upload
Brick4: 192.168.1.244:/data/v3_upload
Task Status of Volume raid10
------------------------------------------------------------------------------
###个人比较喜欢这种方式
Distributed Replicated Volumes:
# gluster volume create dr replica 2 transport tcp 192.168.1.63:/data/dr/ 192.168.1.64:/data/dr/ 192.168.1.66:/data/dr/ 192.168.1.244:/data/dr/
volume create: dr: success: please start the volume to access data
# gluster volume start dr
volume start: dr: success
# gluster volume info
Volume Name: dr
Type: Distributed-Replicate
Volume ID: c1aade6d-d2b9-4ff1-854a-89f97cf63c8f
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/dr
Brick2: 192.168.1.64:/data/dr
Brick3: 192.168.1.66:/data/dr
Brick4: 192.168.1.244:/data/dr
# gluster volume status
Status of volume: dr
Gluster process Port Online Pid
------------------------------------------------------------------------------
Brick 192.168.1.63:/data/dr 49153 Y 14432
Brick 192.168.1.64:/data/dr 49166 Y 12723
Brick 192.168.1.66:/data/dr 49166 Y 2458
Brick 192.168.1.244:/data/dr 49158 Y 7609
NFS Server on localhost 2049 Y 14446
Self-heal Daemon on localhost N/A Y 14451
NFS Server on 192.168.1.244 2049 Y 7623
Self-heal Daemon on 192.168.1.244 N/A Y 7628
NFS Server on 192.168.1.64 2049 Y 12737
Self-heal Daemon on 192.168.1.64 N/A Y 12743
NFS Server on 192.168.1.66 2049 Y 2472
Self-heal Daemon on 192.168.1.66 N/A Y 2477
Task Status of Volume dr
------------------------------------------------------------------------------
There are no active volume tasks
在client上挂载其中的一台:
# mount -t glusterfs 192.168.1.63:dr /data/dr/ ## 注意这里和普通的nfs挂载不同,是dr而非/dr
# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 20G 1.5G 17G 9% /
tmpfs 1.9G 0 1.9G 0% /dev/shm
/dev/sda1 194M 27M 158M 15% /boot
/dev/mapper/vg_web-LogVol00
251G 1.2G 237G 1% /data
/dev/sda3 20G 217M 19G 2% /home
192.168.1.63:dr 683G 3.5G 645G 1% /data/dr
配置文件:(4台机器上的配置文件需要一样)
volume dr-client-0
type protocol/client
option send-gids true
option transport-type tcp
option remote-subvolume /data/dr
option remote-host 192.168.1.63
end-volume
volume dr-client-1
type protocol/client
option send-gids true
option transport-type tcp
option remote-subvolume /data/dr
option remote-host 192.168.1.64
end-volume
volume dr-client-2
type protocol/client
option send-gids true
option transport-type tcp
option remote-subvolume /data/dr
option remote-host 192.168.1.66
end-volume
volume dr-client-3
type protocol/client
option send-gids true
option transport-type tcp
option remote-subvolume /data/dr
option remote-host 192.168.1.244
end-volume
volume dr-replicate-0
type cluster/replicate
subvolumes dr-client-0 dr-client-1
end-volume
volume dr-replicate-1
type cluster/replicate
subvolumes dr-client-2 dr-client-3
end-volume
volume dr-dht
type cluster/distribute
subvolumes dr-replicate-0 dr-replicate-1
end-volume
客户端挂截并上传数据测试:
在客户端机器的/etc/hosts文件添加如下内容:
ip对应主机名:
hostname ip
YQD-intranet-salt-master 192.168.1.63
YQD-Intranet-DB-NO1 192.168.1.66
YQD-Intranet-DB-NO2 192.168.1.64
YQD-Intranet-DB-NO3 192.168.1.244
从这里可以看出:
63 64一组 salt-master和NO2这两台主机的内容是一样的 而NO1和NO3的内容是一致的,在实际的环境中,你不可能一次性坏一组服务器吧,那真是太杯具了!!!!
[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO1' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO1:
aaa.txt
glusterfs-3.5.1.tar.gz
lmnp.tar.gz
[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO2' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO2:
mysql-proxy-0.8.4.tar.gz
[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO3' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO3:
aaa.txt
glusterfs-3.5.1.tar.gz
lmnp.tar.gz
[root@YQD-intranet-salt-master dr]# ls /data/dr/
mysql-proxy-0.8.4.tar.gz
还有机器宕机测试,某个组内的一个机器关闭或将glusterfs相关服务关闭掉,上传文件没有影响的,当别关闭的机器重启或glusterfs相关服务器的后,
会在组内自动同步