Prerequisite: How to install sqoop on AWS EC2
https://dev.to/zawhtutwin/installing-sqoop-on-hadoop-14n8
Go to your home folder
cd ~
Download hive
wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz
Extract in the home folder
tar -xvf apache-hive-3.1.3-bin.tar.gz
Go to /usr/lib folder and create a folder called hive
sudo mkdir hive
Move the extracted folder from home to /usr/lib/hive
cd ~
mv apache-hive-3.1.3-bin /usr/lib/hive
Add HIVE_HOME environment varibale to .bashrc
sudo nano ~/.bashrc
Add the following
export HIVE_HOME=/usr/lib/hive/apache-hive-3.1.3-bin
And modify PATH variable to the following
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SQOOP_HOME/bin:$HIVE_HOME/bin
And load the variables
source .bashrc
Then we need to create folders for hive in hadoop file system
hdfs dfs -mkdir -p /bigdata/tmp
hdfs dfs -mkdir -p /bigdata/hive/warehouse
Then give permisssions
hdfs dfs -chmod g+w /bigdata/tmp
hdfs dfs -chmod g+w /bigdata/hive/warehouse
Then go to $HIVE_HOME/conf folder
cd $HIVE_HOME/conf
Then edit the hive-env.sh file as following.
sudo nano hive-env.sh
export HIVE_CONF_DIR=/usr/lib/hive/apache-hive-3.1.3-bin/conf
export HADOOP_HOME=/usr/lib/hadoop/hadoop-2.9.0
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
Then install the mysql server if you do not have it in your system.
sudo apt-get install mysql-server
Then create database called 'metastore' and populate tables inside it
CREATE DATABASE metastore;
USE metastore;
SOURCE /usr/lib/hive/apache-hive-3.1.3-bin/scripts/metastore/upgrade/mysql/hive-schema-3.1.0.mysql.sql;
Create a user called hiveuser with the password hivepassword
CREATE USER 'hiveuser'@'%' IDENTIFIED BY 'hivepassword';
GRANT all on *.* to 'hiveuser'@localhost identified by 'hivepassword';
flush privileges;
Go to conf directory of hive and create hive-site.xml
cd $HIVE_HOME/conf
sudo nano hive-site.xml
And add the following config
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
<description>metadata is stored in a MySQL server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>MySQL JDBC driver class</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hiveuser</value>
<description>user name for connecting to mysql server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hivepassword</value>
<description>hivepassword for connecting to mysql server</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/bigdata/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
<description>Thrift URI for the remote metastore.</description>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
</configuration>
Then copy the mysql connector 8 jar from /usr/share/java/ directory to $HIVE_HOME/lib
cd /usr/share/java/
cp mysql-connector-java-8.0.30.jar $HIVE_HOME/lib
Check hive is working by entering hive command
hive
exit;
Then start the metastore service with this command
hive --service metastore
Then create the service for later use.
sudo nano /etc/systemd/system/hive-meta.service
Add the following
[Unit]
Description=Hive metastore
After=network.target
[Service]
User=ubuntu
Group=www-data
ExecStart=/usr/lib/hive/apache-hive-3.1.3-bin/bin/hive --service metastore
[Install]
WantedBy=multi-user.target
Then start the service
sudo systemctl start hive-meta
Then enable the service so when reboot next time it will be automatically running.
sudo systemctl enable hive-meta
Then remove the folder previously created by sqoop from hdfs with following command
hdfs dfs -rm -r -f /user/ubuntu/hr_users
Then do the scoop import again. Note that you only need to add --hive-import at the end of the command.
sqoop import --connect jdbc:mysql://your_rds_dns_address/yourdatabase --table hr_users --username something --password 'something' --hive-import
That will import default.hr_users table into hive.
You can check it inside hive.
hive
use default;
select * from hr_users limit 1;
Top comments (0)