1.环境说明
[root@Master hadoop-2.6.0]#lsb_release -a
LSB Version: :base-4.0-amd64:base-4.0-noarch:core-4.0-amd64:core-4.0-noarch:graphics-4.0-amd64:graphics-4.0-noarch:printing-4.0-amd64:printing-4.0-noarch
Distributor ID: CentOS
Description: CentOS release 6.5 (Final)
Release: 6.5
Codename: Final
[root@Master hadoop-2.6.0]#cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.56.121 Master
192.168.56.122 Slave1
192.168.56.123 Slave2
三台主机的hostname分别为Master、Slave1、Slave2.
2.hadoop架构信息:
Master为主机名,Slave1和Slave2为从节点。
Hadoop环境安装信息
[root@Master hadoop-2.6.0]#cat ~/.bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
HADOOP_HOME=/usr/local/hadoop/hadoop-2.6.0
SCALA_HOME=/usr/local/scala/scala-2.10.4
SPARK_HOME=/usr/local/spark/spark-1.5.0-bin-hadoop2.6
JAVA_HOME=/opt/jdk1.7.0_79
CLASS_PATH=.$JAVA_HOME/lib/tools.jar
PATH=$PATH:$HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SCALA_HOME/bin:$SPARK_HOME/bin:$JAVA_HOME/bin
export PATH HADOOP_HOME SPARK_HOME JAVA_HOME CLASS_PATH
3.软件版本说明
JDK版本为1.7.0_79,SPARK版本为spark-1.5.0-bin-hadoop2.6,HADOOP版本为hadoop-2.6.0
4.操作状态说明
确保防火墙处于关闭状态
[root@Master hadoop-2.6.0]#service iptables status
iptables: Firewall is not running.
如果未关闭,请使用下列命令:
service iptables stop
chkconfig iptables off
5.SSH互相说明
Master主机:
#ssh-keygen –t
rsa
#chmod 755 .ssh
#cat
.ssh/id_rsa.pub >> .ssh/authorized_keys
#chmod 644
.ssh/authorized_keys
Slave1主机:
#ssh-keygen –t rsa
#chmod 755 .ssh
#cat .ssh/id_rsa.pub >> .ssh/authorized_keys
#chmod 644 .ssh/authorized_keys
Slave2主机
#ssh-keygen –t rsa
#chmod 755 .ssh
#cat .ssh/id_rsa.pub >> .ssh/authorized_keys
#chmod 644 .ssh/authorized_keys
将slave1和slave2主机的authorized_keys信息拷贝至master主机的authorized_keys文件中,
将slave1和Master主机的authorized_keys信息拷贝至Slave2主机的authorized_keys文件中,
将slave2和Master主机的authorized_keys信息拷贝至Slave1主机的authorized_keys文件中,最终的结果应该是三个操作系统的authorized_keys是一直的。
6.安装hadoop软件
解压hadoop-2.6.0.tar.gz文件到/usr/local/hadoop中。
创建必要的文件夹
[root@Master hadoop-2.6.0]#cd /usr/local/hadoop/hadoop-2.6.0/
[root@Master hadoop-2.6.0]#mkdir tmp
[root@Master hadoop-2.6.0]#mkdir -p dfs/data
[root@Master hadoop-2.6.0]#mkdir dfs/name
修改hadoop配置文件
[root@Master hadoop-2.6.0]# cd /usr/local/hadoop/hadoop-2.6.0/etc/hadoop/
修改hadoop-env.sh文件,在首行加入
export JAVA_HOME=/opt/jdk1.7.0_79
修改yarn-env.sh文件,在首行加入
export JAVA_HOME=/opt/jdk1.7.0_79
在slaves文件中加入
[root@Master hadoop]#cat slaves
Slave1
Slave2
修改core-site.xml文件
[root@Master hadoop]#cat core-site.xml
-
<configuration>
-
<property>
-
<name>fs.defaultFS</name>
-
<value>hdfs://Master:9000</value>
-
</property>
-
<property>
-
<name>io.file.buffer.size</name>
-
<value>131072</value>
-
</property>
-
<property>
-
<name>hadoop.tmp.dir</name>
-
<value>file:/usr/local/hadoop/hadoop-2.6.0/tmp</value>
-
<description>Abasefor other temporary directories.</description>
-
</property>
-
<property>
-
<name>hadoop.proxyuser.spark.hosts</name>
-
<value>*</value>
-
</property>
-
<property>
-
<name>hadoop.proxyuser.spark.groups</name>
-
<value>*</value>
-
</property>
-
</configuration>
修改hdfs-site.xml
[root@Master hadoop]#cat hdfs-site.xml
-
<configuration>
-
<property>
-
<name>dfs.namenode.secondary.http-address</name>
-
<value>Master:9001</value>
-
</property>
-
-
<property>
-
<name>dfs.namenode.name.dir</name>
-
<value>file:/usr/local/hadoop/hadoop-2.6.0/dfs/name</value>
-
</property>
-
-
<property>
-
<name>dfs.datanode.data.dir</name>
-
<value>file:/usr/local/hadoop/hadoop-2.6.0/dfs/data</value>
-
</property>
-
-
<property>
-
<name>dfs.replication</name>
-
<value>3</value>
-
</property>
-
-
<property>
-
<name>dfs.webhdfs.enabled</name>
-
<value>true</value>
-
</property>
-
-
</configuration>
修改mapred-site.xml文件
[root@Master hadoop]#cat mapred-site.xml
-
<configuration>
-
<property>
-
<name>mapreduce.framework.name</name>
-
<value>yarn</value>
-
</property>
-
<property>
-
<name>mapreduce.jobhistory.address</name>
-
<value>Master:10020</value>
-
</property>
-
<property>
-
<name>mapreduce.jobhistory.webapp.address</name>
-
<value>Master:19888</value>
-
</property>
-
</configuration>
修改yarn-site.xml文件
[root@Master hadoop]#cat yarn-site.xml
-
<configuration>
-
<property>
-
<name>yarn.nodemanager.aux-services</name>
-
<value>mapreduce_shuffle</value>
-
</property>
-
<property>
-
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
-
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
-
</property>
-
<property>
-
<name>yarn.resourcemanager.address</name>
-
<value>Master:8032</value>
-
</property>
-
<property>
-
<name>yarn.resourcemanager.scheduler.address</name>
-
<value>Master:8030</value>
-
</property>
-
<property>
-
<name>yarn.resourcemanager.resource-tracker.address</name>
-
<value>Master:8035</value>
-
</property>
-
<property>
-
<name>yarn.resourcemanager.admin.address</name>
-
<value>Master:8033</value>
-
</property>
-
<property>
-
<name>yarn.resourcemanager.webapp.address</name>
-
<value>Master:8088</value>
-
</property>
-
-
</configuration>
将hadoop的安装文件scp到Slave1和Slave2服务器。
[root@Master etc]#scp -r /usr/local/hadoop/ Slave1:/usr/local/
[root@Master etc]#scp -r /usr/local/hadoop/ Slave2:/usr/local/
格式化namenode,在每个节点都需要执行格式化操作。
[root@Master etc]#hadoop namenode -format
启动hadoop
[root@Master etc]#start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [Master]
Master: starting namenode, logging to /usr/local/hadoop/hadoop-2.6.0/logs/hadoop-root-namenode-Master.out
Slave1: starting datanode, logging to /usr/local/hadoop/hadoop-2.6.0/logs/hadoop-root-datanode-Slave1.out
Slave2: starting datanode, logging to /usr/local/hadoop/hadoop-2.6.0/logs/hadoop-root-datanode-Slave2.out
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop/hadoop-2.6.0/logs/yarn-root-resourcemanager-Master.out
Slave1: starting nodemanager, logging to /usr/local/hadoop/hadoop-2.6.0/logs/yarn-root-nodemanager-Slave1.out
Slave2: starting nodemanager, logging to /usr/local/hadoop/hadoop-2.6.0/logs/yarn-root-nodemanager-Slave2.out
停止hadoop
[root@Master etc]#stop-all.sh
This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh
Stopping namenodes on [Master]
Master: stopping namenode
Slave1: stopping datanode
Slave2: stopping datanode
stopping yarn daemons
stopping resourcemanager
Slave2: stopping nodemanager
Slave1: stopping nodemanager
no proxyserver to stop
在Master主机执行命令
[root@Master sbin]#jps
7843 ResourceManager
7604 NameNode
8233 Jps
在Slave1主机执行命令
[root@Slave1 hadoop]#jps
3914 NodeManager
3811 DataNode
4096 Jps
在Slave2主机执行命令
[root@Slave2 hadoop]#jps
27396 DataNode
27499 NodeManager
27682 Jps
访问Hadoop
对hadoop hdfs操作,
创建目录
hadoop fs -mkdir /tmp
hadoop fs -mkdir /tmp/input
将OS操作系统中的数据传入hadoop中
hadoop fs -put put/ /tmp/