1. download
wget
tar zxf hive-0.8.1.tar.gz
只需要在一个节点上安装
2. 设置环境变量
vi .bash_profile
export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre
export HADOOP_HOME=/home/hadoop/hadoop-1.0.0
export HIVE_HOME=/home/hadoop/hive-0.8.1
export HADOOP_CONF_DIR=$HOME/conf
export HIVE_CONF_DIR=$HOME/hive-conf
export CLASSPATH=$HIVE_HOME/lib:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$HADOOP_HOME
export PATH=$HIVE_HOME/bin:$HADOOP_HOME/bin:$JAVA_HOME/bin:/sbin/:/bin:$PATH
3. 配置hive
cp -r hive-0.8.1/conf $HIVE_CONF_DIR/
cd $HIVE_CONF_DIR/
cp hive-default.xml.template hive-default.xml
cat hive-env.sh
export HADOOP_HEAPSIZE=512
export HIVE_CONF_DIR=/home/hadoop/hive-conf
3. 测试
$ hive
hive> show tables;
OK
Time taken: 4.824 seconds
hive> create table hwz(id int, name string);
OK
Time taken: 0.566 seconds
hive> select * from hwz;
OK
Time taken: 0.361 seconds
$ hadoop dfs -lsr /user/hive
Warning: $HADOOP_HOME is deprecated.
drwxr-xr-x - hadoop supergroup 0 2012-03-22 12:36 /user/hive/warehouse
drwxr-xr-x - hadoop supergroup 0 2012-03-22 12:36 /user/hive/warehouse/hwz
4. 配置Metastore用mysql数据库,这样才可以多用户同时访问
a. create user and database for hive in mysql
create database hive;
GRANT all ON hive.* TO hive@'%' IDENTIFIED BY 'hivepass';
b. change metastore to use mysql
cat hive-site.xml
hive.metastore.local
true
javax.jdo.option.ConnectionURL
jdbc:mysql://slave1:3306/hive?createDatabaseIfNotExist=true
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
javax.jdo.option.ConnectionUserName
hive
javax.jdo.option.ConnectionPassword
hivepass
c. 检查
$ hive
hive> use dw2;
OK
Time taken: 3.43 seconds
hive> create table hwz2(id int, name string ) row format delimited FIELDS TERMINATED BY ',';
OK
Time taken: 2.519 seconds
hive> show tables;
OK
hwz2
Time taken: 0.419 seconds
hive> load data local inpath 'demo.txt' overwrite into table hwz2;
Copying data from file:/home/hadoop/demo.txt
Copying file: file:/home/hadoop/demo.txt
Loading data to table dw2.hwz2
Deleted hdfs://master:9000/user/hive/warehouse/dw2.db/hwz2
OK
Time taken: 0.557 seconds
hive> select * from hwz2;
OK
12 jack
12 jack
12 jack
12 jack
12 jack
12 jack
12 jack
12 jack
$ hadoop dfs -lsr /user/hive
Warning: $HADOOP_HOME is deprecated.
drwxr-xr-x - hadoop supergroup 0 2012-03-22 15:36 /user/hive/warehouse
drwxr-xr-x - hadoop supergroup 0 2012-03-22 15:48 /user/hive/warehouse/dw2.db
drwxr-xr-x - hadoop supergroup 0 2012-03-22 15:48 /user/hive/warehouse/dw2.db/hwz2
-rw-r--r-- 2 hadoop supergroup 1201 2012-03-22 15:48 /user/hive/warehouse/dw2.db/hwz2/demo.txt
drwxr-xr-x - hadoop supergroup 0 2012-03-22 12:36 /user/hive/warehouse/hwz
drwxr-xr-x - hadoop supergroup 0 2012-03-22 15:36 /user/hive/warehouse/hwz2
-rw-r--r-- 2 hadoop supergroup 1201 2012-03-22 15:36 /user/hive/warehouse/hwz2/demo.txt
$ hadoop dfs -cat /user/hive/warehouse/dw2.db/hwz2/demo.txt |head
Warning: $HADOOP_HOME is deprecated.
12,jack
12,jack
12,jack
12,jack
12,jack
12,jack
12,jack
12,jack
12,jack
12,jack
d. 在mysql中验证建立的新表
mysql> use hive;
Database changed
mysql> show tables;
+-----------------+
| Tables_in_hive |
+-----------------+
| BUCKETING_COLS |
| CDS |
| COLUMNS_V2 |
| DATABASE_PARAMS |
| DBS |
| PARTITION_KEYS |
| SDS |
| SD_PARAMS |
| SEQUENCE_TABLE |
| SERDES |
| SERDE_PARAMS |
| SORT_COLS |
| TABLE_PARAMS |
| TBLS |
+-----------------+
14 rows in set (0.00 sec)
9. 常见错误
error 1:
-------------------------------------------------
hive> show tables;
FAILED: Error in metadata: javax.jdo.JDOFatalInternalException: Error creating transactional connection factory
Solution:
Hive不带mysql JDBC驱动,自己安装:
wget http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.18.tar.gz/from/http://mysql.mirror.kangaroot.net/
tar zxf mysql-connector-java-5.1.18.tar.gz
cd mysql-connector-java-5.1.18
cp mysql-connector*.jar $HIVE_HOME/lib
error 2:
-------------------------------------------------
hive> show tables;
FAILED: Error in metadata: javax.jdo.JDOException: Couldnt obtain a new sequence (unique id) : Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging. InnoDB is limited to row-logging when transaction isolation level is READ COMMITTED or READ UNCOMMITTED.
Solution:
在mysql中设置 binlog_format='MIXED'