数仓搭建 之 本地安装 + Spark

本文基于数仓搭建 之 本地安装

本文组件版本 Hadoop 3.2.2 / Hive 3.1.2 / Spark 2.4.8

目录

Spark

1
2
3
4
5
cd /opt/services

wget https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-2.4.8/spark-2.4.8-bin-without-hadoop.tgz

tar xf spark-2.4.8-bin-without-hadoop.tgz && cd spark-2.4.8-bin-without-hadoop
1
2
3
4
5
/opt/services/hadoop-3.2.2/bin/hadoop fs -mkdir -p /spark/jars

/opt/services/hadoop-3.2.2/bin/hadoop fs -put jars/* /spark/jars

/opt/services/hadoop-3.2.2/bin/hadoop fs -ls /spark/jars
1
vim conf/spark-defaults.conf
1
2
3
spark.master                     yarn
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 2g
1
vim conf/spark-env.sh
1
2
export HADOOP_CONF_DIR=/opt/services/hadoop-3.2.2/etc/hadoop
export YARN_CONF_DIR=/opt/services/hadoop-3.2.2/etc/hadoop
1
vim ~/.bashrc
1
2
3
4
export SPARK_HOME=/opt/services/spark-2.4.8-bin-without-hadoop
export SPARK_CONF=/opt/services/spark-2.4.8-bin-without-hadoop/conf
export PATH=$SPARK_HOME/bin:$PATH
export SPARK_DIST_CLASSPATH=$(/opt/services/hadoop-3.2.2/bin/hadoop classpath)
1
source ~/.bashrc

YARN

1
2
3
cd /opt/services/hadoop-3.2.2

vim etc/hadoop/yarn-site.xml
1
2
3
4
5
6
7
8
9
10
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
1
2
3
sbin/stop-yarn.sh

sbin/start-yarn.sh

Hive

1
2
3
4
5
6
7
8
9
cd /opt/services/apache-hive-3.1.2-bin

cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/scala-library-2.11.12.jar lib/

cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-core_2.11-2.4.8.jar lib/

cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-network-common_2.11-2.4.8.jar lib/

cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-unsafe_2.11-2.4.8.jar lib/
1
vim conf/hive-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://127.0.0.1:3306/hive?ssl=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>zhgmysql</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>spark.yarn.jars</name>
<value>hdfs://127.0.0.1:9000/spark/jars/*</value>
</property>
<property>
<name>spark.master</name>
<value>yarn</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
</configuration>
1
bin/hive
1
2
3
4
5
6
7
8
9
10
11
12
13
show databases;
# default

use default;

show tables;
# student

insert into student values(2, "xiaowang");

select * from student;
# 1 xiaoming
# 2 xiaowang

如果Hive报错 可以检查Hive日志

1
tail -100 /tmp/user/hive.log

参考