1.安装ubuntu-server14.04
2.ubuntu-server14.04安装docker
sudo apt-get install docker-io
3.pull hadoop-docker镜像
docker pull sequenceiq/hadoop-docker:2.7.1
docker run -it sequenceiq/hadoop-docker:2.7.1 /etc/bootstrap.sh -bash
参考
这个好像已经带spark了
增加部署spark
4.yum install wget
5.下载安装spark
wget
mv spark-1.5.2-bin-without-hadoop.tgz /usr/local
tar -xf spark-1.5.2-bin-without-hadoop.tgz
mv spark-1.5.2-bin-without-hadoop spark
vi /etc/profile,增加:
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export SPARK_HOME=/usr/local/spark
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin
export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
source /etc/profile
run-examples SparkPi 10
pyspark
6.保存镜像
exit
docker commit id hadoop-docker-spark
7.打开镜像
sudo docker run -ti hadoop-docker-spark /etc/bootstrap.sh -bash
source /etc/profile
run-examples SparkPi 10
pyspark
测试:
cd /usr/hadoop
bin/hdfs dfs -put datafile /datafile
pyspark
>>>>> lines = sc.textFile("/datafile")
>>>>> lines.count()
>>>>> lines.first()
阅读(3643) | 评论(0) | 转发(0) |