Chinaunix首页 | 论坛 | 博客
  • 博客访问: 2834577
  • 博文数量: 587
  • 博客积分: 6356
  • 博客等级: 准将
  • 技术积分: 6410
  • 用 户 组: 普通用户
  • 注册时间: 2008-10-23 10:54
个人简介

器量大者,福泽必厚

文章分类

全部博文(587)

文章存档

2019年(3)

2018年(1)

2017年(29)

2016年(39)

2015年(66)

2014年(117)

2013年(136)

2012年(58)

2011年(34)

2010年(50)

2009年(38)

2008年(16)

分类: LINUX

2014-07-02 15:24:03

##该文档是我和同事bob讨论的结果,他书写的,由衷向他表示感谢
192.168.1.63:
wget -P /etc/yum.repos.d
# yum list glusterfs*
Loaded plugins: fastestmirror
Loading mirror speeds from cached hostfile
 * base: mirrors.163.com
 * epel: mirrors.hust.edu.cn
 * extras: centos.ustc.edu.cn
 * updates: mirrors.163.com
Installed Packages
glusterfs.x86_64                                                3.5.1-1.el6                                @glusterfs-epel      
glusterfs-api.x86_64                                            3.5.1-1.el6                                @glusterfs-epel      
glusterfs-cli.x86_64                                            3.5.1-1.el6                                @glusterfs-epel      
glusterfs-fuse.x86_64                                           3.5.1-1.el6                                @glusterfs-epel      
glusterfs-libs.x86_64                                           3.5.1-1.el6                                @glusterfs-epel      
glusterfs-server.x86_64                                         3.5.1-1.el6                                @glusterfs-epel      
Available Packages
glusterfs-api-devel.x86_64                                      3.5.1-1.el6                                glusterfs-epel       
glusterfs-debuginfo.x86_64                                      3.5.1-1.el6                                glusterfs-epel       
glusterfs-devel.x86_64                                          3.5.1-1.el6                                glusterfs-epel       
glusterfs-extra-xlators.x86_64                                  3.5.1-1.el6                                glusterfs-epel       
glusterfs-geo-replication.x86_64                                3.5.1-1.el6                                glusterfs-epel       
glusterfs-rdma.x86_64                                           3.5.1-1.el6                                glusterfs-epel       
glusterfs-regression-tests.x86_64                               3.5.1-1.el6                                glusterfs-epel       
glusterfs-resource-agents.noarch                                3.5.1-1.el6                                glusterfs-noarch-epel


# yum -y install glusterfs
# yum -y install glusterfs-server


# /etc/init.d/glusterd start
Starting glusterd:                                         [  OK  ]
##我在这里遇到一个问题,开始我安装的不是glusterfs的最新版本,我执行了yum -y update glusterfs-server 升级后,结果服务不能启动,报错如下:

  1. [2014-08-17 03:05:32.374519] E [rpc-transport.c:266:rpc_transport_load] 0-rpc-transport: /usr/lib/glusterfs/3.5.2/rpc-transport/rdma.so: cannot open shared object file: No such file or directory
  2. [2014-08-17 03:05:32.374576] W [rpc-transport.c:270:rpc_transport_load] 0-rpc-transport: volume 'rdma.management': transport-type 'rdma' is not valid or not found on this machine
  3. [2014-08-17 03:05:32.374594] W [rpcsvc.c:1535:rpcsvc_transport_create] 0-rpc-service: cannot create listener, initing the transport failed
  4. [2014-08-17 03:05:32.374839] I [socket.c:3561:socket_init] 0-socket.management: SSL support is NOT enabled
我去查找/usr/lib/glusterfs/3.5.2/rpc-transport/目录,结果真的没有rdma.so
该问题解决方法:
cd /var/lib
mv  glusterd    glusterd.bak


 

# chkconfig glusterfsd on


在192.168.1.63上面配置整个glustersfs集群:


# gluster peer probe 192.168.1.63
peer probe: success. Probe on localhost not needed


# gluster peer probe 192.168.1.64
peer probe: success.


# gluster peer probe 192.168.1.66
peer probe: success.


# gluster peer probe 192.168.1.244
peer probe: success.
##上面是4台机器组成一个集群!



查看glusterfs集群的状态:
# gluster peer status
Number of Peers: 3


Hostname: 192.168.1.66
Uuid: 7ab89229-fce2-4f86-a61f-d7b17a3c7308
State: Peer in Cluster (Connected)


Hostname: 192.168.1.64
Uuid: 33febb2d-08a2-4676-a86e-5312aa00f934
State: Peer in Cluster (Connected)


Hostname: 192.168.1.244
Uuid: a64a1ffb-c2e6-4171-a178-02e06cd6ad0e
State: Peer in Cluster (Connected)




创建数据存放目录:
mkdir /data/v3_upload


在192.168.1.63上创建glusterFS磁盘:
# gluster volume create v3_upload  replica 4 192.168.1.63:/data/v3_upload/ 192.168.1.66:/data/v3_upload/ 192.168.1.64:/data/v3_upload/ 


192.168.1.244:/data/v3_upload/
volume create: v3_upload: success: please start the volume to access data
我在创建的时候遇到了一个问题,如下:

  1. [root@haproxy2 v3_upload]# gluster volume create v3_upload replica 4 192.168.1.203:/data/v3_upload/ 192.168.1.204:/data/v3_upload/

  2. 192.168.1.205:/data/v3_upload/ 192.168.1.206:/data/v3_upload/
  3. volume create: v3_upload: failed: The brick 192.168.1.203:/data/v3_upload is is being created in the root partition. It is recommended that you don't use the

  4. system's root partition for storage backend. Or use 'force' at the end of the command if you want to override this behavior.
  5. [root@haproxy2 v3_upload]#
  6. [root@haproxy2 v3_upload]# gluster volume create v3_upload replica 4 192.168.1.203:/data/v3_upload/ 192.168.1.204:/data/v3_upload/ 192.168.1.205:/data/v3_upload/ 192.168.1.206:/data/v3_upload/ force    
  7. ##加上force参数后,即可创建,问题即可解决
  8. volume create: v3_upload: success: please start the volume to access data


# gluster volume start v3_upload
volume start: v3_upload: success


# gluster volume info
Volume Name: v3_upload
Type: Replicate
Volume ID: 2b1c361c-e711-4ad0-96c4-ca51110bc84a
Status: Started
Number of Bricks: 1 x 4 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.66:/data/v3_upload
Brick3: 192.168.1.64:/data/v3_upload
Brick4: 192.168.1.244:/data/v3_upload






客户端:
# wget -P /etc/yum.repos.d
# yum install glusterfs
# yum install glusterfs-fuse


# mkdir /data/v3_upload
# mount -t glusterfs -o ro 192.168.1.63:v3_upload /data/v3_upload/      ##以read only的方式挂载
# df -h
Filesystem            Size  Used Avail Use% Mounted on
/dev/sda2              20G  1.5G   17G   9% /
tmpfs                 1.9G     0  1.9G   0% /dev/shm
/dev/sda1             194M   27M  158M  15% /boot
/dev/mapper/vg_web-LogVol00
                      251G  1.3G  237G   1% /data
/dev/sda3              20G  217M   19G   2% /home
192.168.1.63:v3_upload
                      251G  802M  238G   1% /data/v3_upload








测试挂载点写入数据:
# umount /data/v3_upload/
# mount -t glusterfs 192.168.1.66:v3_upload /data/v3_upload/
# echo "this is 192.168.1.66" > /data/v3_upload/test.txt
# mkdir /data/v3_upload/testdir


随便选一台服务端查看:
# ll -h
total 12K
drwxr-xr-x 2 root root 4.0K Jul  1 14:20 testdir
-rw-r--r-- 2 root root   21 Jul  1 14:21 test.txt


数据写入成功


从192.168.1.63上修改test.txt文件,并在创建testdir2目录:
# cat test.txt 
this is 192.168.1.66 create
this is 192.168.1.63 modified
# ll -h
total 16K
drwxr-xr-x 2 root root 4.0K Jul  1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul  1 14:23 testdir2
-rw-r--r-- 2 root root   58 Jul  1 14:23 test.txt


从客户端查看:
# cat test.txt 
cat: test.txt: Input/output error
# ll -h
total 4.5K
drwxr-xr-x 2 root root 4.0K Jul  1 14:20 testdir
-rw-r--r-- 1 root root   21 Jul  1 14:21 test.txt
数据写入失败


在192.168.1.66上创建新的test2.txt文件和testdir3目录:
# ll -h
total 20K
-rw-r--r-- 2 root root   24 Jul  1 14:33 test2.txt
drwxr-xr-x 2 root root 4.0K Jul  1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul  1 14:33 testdir3
从客户端查看:
# ll -h
total 8.5K
-rw-r--r-- 1 root root   24 Jul  1  2014 test2.txt
drwxr-xr-x 2 root root 4.0K Jul  1 14:20 testdir
drwxr-xr-x 2 root root 4.0K Jul  1  2014 testdir3
两者内容一致,当一gluster服务器被挂到客户端时,被挂载的这台具有读写权限,并同步到其他gluster上,从gluster集群里其他机器则没有写入权限.会导致报错。
最终结论:
在数据目录中直接写入数据,会导致其它节点因为得不到通知而使数据同步失败。
正确的做法是所有的读写操作都通过挂载点来进行。


# glusterfs --help
Usage: glusterfs [OPTION...] --volfile-server=SERVER [MOUNT-POINT]
  or:  glusterfs [OPTION...] --volfile=VOLFILE [MOUNT-POINT]


 Basic options:
  -f, --volfile=VOLFILE      File to use as VOLUME_FILE
  -l, --log-file=LOGFILE     File to use for logging [default:
                             /var/log/glusterfs/glusterfs.log]
  -L, --log-level=LOGLEVEL   Logging severity.  Valid options are DEBUG, INFO,
                             WARNING, ERROR, CRITICAL, TRACE and NONE [default:
                             INFO]
  -s, --volfile-server=SERVER   Server to get the volume file from.  This
                             option overrides --volfile option


 Advanced Options:
      --acl                  Mount the filesystem with POSIX ACL support
      --aux-gfid-mount       Enable access to filesystem through gfid directly
      --debug                Run in debug mode.  This option sets --no-daemon,
                             --log-level to DEBUG and --log-file to console
      --enable-ino32[=BOOL]  Use 32-bit inodes when mounting to workaround
                             broken applicationsthat don't support 64-bit
                             inodes
      --fopen-keep-cache[=BOOL]   Do not purge the cache on file open
      --mac-compat[=BOOL]    Provide stubs for attributes needed for seamless
                             operation on Macs [default: "off"]
  -N, --no-daemon            Run in foreground
  -p, --pid-file=PIDFILE     File to use as pid file
      --read-only            Mount the filesystem in 'read-only' mode
      --selinux              Enable SELinux label (extened attributes) support
                             on inodes
  -S, --socket-file=SOCKFILE File to use as unix-socket
      --volfile-id=KEY       'key' of the volfile to be fetched from server
      --volfile-server-port=PORT   Listening port number of volfile server
      --volfile-server-transport=TRANSPORT
                             Transport type to get volfile from server
                             [default: socket]
      --volume-name=XLATOR-NAME   Translator name to be used for MOUNT-POINT
                             [default: top most volume definition in VOLFILE]
      --worm                 Mount the filesystem in 'worm' mode
      --xlator-option=XLATOR-NAME.OPTION=VALUE
                             Add/override an option for a translator in volume
                             file with specified value


 Fuse options:
      --attribute-timeout=SECONDS
                             Set attribute timeout to SECONDS for inodes in
                             fuse kernel module [default: 1]
      --background-qlen=N    Set fuse module's background queue length to N
                             [default: 64]
      --congestion-threshold=N   Set fuse module's congestion threshold to N
                             [default: 48]
      --direct-io-mode[=BOOL]   Use direct I/O mode in fuse kernel module
                             [default: "off" if big writes are supported, else
                             "on" for fds not opened with O_RDONLY]
      --dump-fuse=PATH       Dump fuse traffic to PATH
      --entry-timeout=SECONDS   Set entry timeout to SECONDS in fuse kernel
                             module [default: 1]
      --gid-timeout=SECONDS  Set auxilary group list timeout to SECONDS for
                             fuse translator [default: 0]
      --negative-timeout=SECONDS   Set negative timeout to SECONDS in fuse
                             kernel module [default: 0]
      --use-readdirp[=BOOL]  Use readdirp mode in fuse kernel module [default:
                             "off"]
      --volfile-check        Enable strict volume file checking


 Miscellaneous Options:


  -?, --help                 Give this help list
      --usage                Give a short usage message
  -V, --version              Print program version


Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.
Report bugs to .




###glusterfs 常见操作:
删除glusterfs磁盘:
# gluster volume stop v3_upload
Stopping volume will make its data inaccessible. Do you want to continue? (y/n) y
volume stop: v3_upload: success
执行完这个命令后,在glusterfs 客户端,执行
cd /data/v3_upload
执行ls或ll有如下的提示:
  1. [root@haproxy2 v3_upload]# ls
  2. ls: cannot open directory .: Transport endpoint is not connected
  3. [root@haproxy2 v3_upload]# ll
  4. ls: cannot open directory .: Transport endpoint is not connected
  5. [root@haproxy2 v3_upload]# cd

# gluster volume delete v3_upload
Deleting volume will erase all information about the volume. Do you want to continue? (y/n) y
volume delete: v3_upload: success


Glusterfs3.2.4/5支持五种Volume,即Distribute卷、Stripe卷、Replica卷、Distribute stripe卷和Distribute replica卷,这五种卷可以满足不同应用对高性能、高可用的需


求。
(1)distribute volume:分布式卷,文件通过hash算法分布到brick server上,这种卷是glusterfs的基础和最大特点;
(2)stripe volume:条带卷, 类似RAID0,条带数=brick server数量,文件分成数据块以Round Robin方式分布到brick server上,并发粒度是数据块,大文件性能高;
(3)replica volume:镜像卷,类似RAID1,镜像数=brick server数量,所以brick server上文件数据相同,构成n-way镜像,可用性高;
(4)distribute stripe volume:分布式条带卷,brick server数量是条带数的倍数,兼具distribute和stripe卷的特点;
(5)distribute replica volume:分布式镜像卷,brick server数量是镜像数的倍数,兼具distribute和replica卷的特点;




对于stripe volume和distribute stripe volume区别的简单理解:
如果对于4台机器组成的stripe volume,1个4M的文件,则平均分布在4台机器上,每个机器上的文件1M左右
对于4台机器组成的distribute stripe volume ,会分成两个组,4M的文件会写到一个组上(另外一个组上不会有文件写入),这个组内的两台机器,均分这个文件,1个上面2M




strip volume:
# gluster volume create v3_upload stripe 4 192.168.1.63:/data/v3_upload/ 192.168.1.244:/data/v3_upload/ 192.168.1.64:/data/v3_upload/ 


192.168.1.66:/data/v3_upload/
# gluster volume start v3_upload
# gluster volume info
 
Volume Name: v3_upload
Type: Stripe
Volume ID: 70457b69-348c-40e7-8831-1d283d2a5c25
Status: Created
Number of Bricks: 1 x 4 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.244:/data/v3_upload
Brick3: 192.168.1.64:/data/v3_upload
Brick4: 192.168.1.66:/data/v3_upload
# mount -t glusterfs -o ro 192.168.1.63:v3_upload /data/v3_upload/
# df -h
Filesystem            Size  Used Avail Use% Mounted on
/dev/sda2              20G  1.5G   17G   9% /
tmpfs                 1.9G     0  1.9G   0% /dev/shm
/dev/sda1             194M   27M  158M  15% /boot
/dev/mapper/vg_web-LogVol00
                      251G  1.3G  237G   1% /data
/dev/sda3              20G  217M   19G   2% /home
192.168.1.63:v3_upload
                      2.4T  9.0G  2.3T   1% /data/v3_upload






















Distributed Striped volume:


[root@YQD-intranet-salt-master v3_upload]# gluster volume status;
Status of volume: raid10
Gluster process Port Online Pid
------------------------------------------------------------------------------
Brick 192.168.1.63:/data/v3_upload 49152 Y 2594
Brick 192.168.1.64:/data/v3_upload 49165 Y 12001
Brick 192.168.1.66:/data/v3_upload 49165 Y 2272
Brick 192.168.1.244:/data/v3_upload 49157 Y 6882
NFS Server on localhost 2049 Y 2606
NFS Server on 192.168.1.244 2049 Y 6894
NFS Server on 192.168.1.66 2049 Y 2284
NFS Server on 192.168.1.64 2049 Y 12013


# gluster volume create raid10 stripe 2 transport tcp 192.168.1.63:/data/v3_upload/ 192.168.1.64:/data/v3_upload/ 192.168.1.66:/data/v3_upload/ 


192.168.1.244:/data/v3_upload/
# gluster volume start
# gluster volume info
 
Volume Name: raid10
Type: Distributed-Stripe
Volume ID: 80bcc349-92c3-4839-8287-16462a9657f5
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/v3_upload
Brick2: 192.168.1.64:/data/v3_upload
Brick3: 192.168.1.66:/data/v3_upload
Brick4: 192.168.1.244:/data/v3_upload
 
Task Status of Volume raid10
------------------------------------------------------------------------------














###个人比较喜欢这种方式
Distributed Replicated Volumes:
# gluster volume create  dr replica 2 transport tcp 192.168.1.63:/data/dr/ 192.168.1.64:/data/dr/ 192.168.1.66:/data/dr/ 192.168.1.244:/data/dr/
volume create: dr: success: please start the volume to access data
# gluster volume start dr
volume start: dr: success
# gluster volume info
 
Volume Name: dr
Type: Distributed-Replicate
Volume ID: c1aade6d-d2b9-4ff1-854a-89f97cf63c8f
Status: Started
Number of Bricks: 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 192.168.1.63:/data/dr
Brick2: 192.168.1.64:/data/dr
Brick3: 192.168.1.66:/data/dr
Brick4: 192.168.1.244:/data/dr
# gluster volume status
Status of volume: dr
Gluster process Port Online Pid
------------------------------------------------------------------------------
Brick 192.168.1.63:/data/dr 49153 Y 14432
Brick 192.168.1.64:/data/dr 49166 Y 12723
Brick 192.168.1.66:/data/dr 49166 Y 2458
Brick 192.168.1.244:/data/dr 49158 Y 7609
NFS Server on localhost 2049 Y 14446
Self-heal Daemon on localhost N/A Y 14451
NFS Server on 192.168.1.244 2049 Y 7623
Self-heal Daemon on 192.168.1.244 N/A Y 7628
NFS Server on 192.168.1.64 2049 Y 12737
Self-heal Daemon on 192.168.1.64 N/A Y 12743
NFS Server on 192.168.1.66 2049 Y 2472
Self-heal Daemon on 192.168.1.66 N/A Y 2477
 
Task Status of Volume dr
------------------------------------------------------------------------------
There are no active volume tasks








在client上挂载其中的一台:
# mount -t glusterfs 192.168.1.63:dr /data/dr/   ## 注意这里和普通的nfs挂载不同,是dr而非/dr
# df -h
Filesystem            Size  Used Avail Use% Mounted on
/dev/sda2              20G  1.5G   17G   9% /
tmpfs                 1.9G     0  1.9G   0% /dev/shm
/dev/sda1             194M   27M  158M  15% /boot
/dev/mapper/vg_web-LogVol00
                      251G  1.2G  237G   1% /data
/dev/sda3              20G  217M   19G   2% /home
192.168.1.63:dr       683G  3.5G  645G   1% /data/dr








配置文件:(4台机器上的配置文件需要一样)
volume dr-client-0
    type protocol/client
    option send-gids true
    option transport-type tcp
    option remote-subvolume /data/dr
    option remote-host 192.168.1.63
end-volume


volume dr-client-1
    type protocol/client
    option send-gids true
    option transport-type tcp
    option remote-subvolume /data/dr
    option remote-host 192.168.1.64
end-volume


volume dr-client-2
    type protocol/client
    option send-gids true
    option transport-type tcp
    option remote-subvolume /data/dr
    option remote-host 192.168.1.66
end-volume


volume dr-client-3
    type protocol/client
    option send-gids true
    option transport-type tcp
    option remote-subvolume /data/dr
    option remote-host 192.168.1.244
end-volume


volume dr-replicate-0
    type cluster/replicate
    subvolumes dr-client-0 dr-client-1
end-volume


volume dr-replicate-1
    type cluster/replicate
    subvolumes dr-client-2 dr-client-3
end-volume


volume dr-dht
    type cluster/distribute
    subvolumes dr-replicate-0 dr-replicate-1
end-volume










客户端挂截并上传数据测试:
在客户端机器的/etc/hosts文件添加如下内容:
ip对应主机名:
 hostname                                     ip
YQD-intranet-salt-master                      192.168.1.63
YQD-Intranet-DB-NO1                           192.168.1.66
YQD-Intranet-DB-NO2                           192.168.1.64
YQD-Intranet-DB-NO3                           192.168.1.244


从这里可以看出:
63 64一组 salt-master和NO2这两台主机的内容是一样的 而NO1和NO3的内容是一致的,在实际的环境中,你不可能一次性坏一组服务器吧,那真是太杯具了!!!!


[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO1' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO1:
    aaa.txt
    glusterfs-3.5.1.tar.gz
    lmnp.tar.gz
[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO2' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO2:
    mysql-proxy-0.8.4.tar.gz
[root@YQD-intranet-salt-master dr]# salt 'YQD-Intranet-DB-NO3' cmd.run 'ls /data/dr'
YQD-Intranet-DB-NO3:
    aaa.txt
    glusterfs-3.5.1.tar.gz
    lmnp.tar.gz
[root@YQD-intranet-salt-master dr]# ls /data/dr/
mysql-proxy-0.8.4.tar.gz


还有机器宕机测试,某个组内的一个机器关闭或将glusterfs相关服务关闭掉,上传文件没有影响的,当别关闭的机器重启或glusterfs相关服务器的后,
会在组内自动同步


阅读(3980) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~