本文基于数仓搭建 之 阿里云HDFS & 数仓搭建 之 本地安装 + Spark
本文组件版本 文件系统HDFS版 / Hadoop 2.7.2 / Hive 2.3.9 / Spark 2.4.8
目录
Spark
1 2 3 4 5
| cd /opt/services
wget https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-2.4.8/spark-2.4.8-bin-without-hadoop.tgz
tar xf spark-2.4.8-bin-without-hadoop.tgz && cd spark-2.4.8-bin-without-hadoop
|
1 2 3 4 5
| /opt/services/hadoop-2.7.2/bin/hadoop fs -mkdir -p /spark/jars
/opt/services/hadoop-2.7.2/bin/hadoop fs -put jars/* /spark/jars
/opt/services/hadoop-2.7.2/bin/hadoop fs -ls /spark/jars
|
1
| vim conf/spark-defaults.conf
|
1 2 3
| spark.master yarn spark.serializer org.apache.spark.serializer.KryoSerializer spark.driver.memory 2g
|
1 2
| export HADOOP_CONF_DIR=/opt/services/hadoop-2.7.2/etc/hadoop export YARN_CONF_DIR=/opt/services/hadoop-2.7.2/etc/hadoop
|
1 2 3 4
| export SPARK_HOME=/opt/services/spark-2.4.8-bin-without-hadoop export SPARK_CONF=/opt/services/spark-2.4.8-bin-without-hadoop/conf export PATH=$SPARK_HOME/bin:$PATH export SPARK_DIST_CLASSPATH=$(/opt/services/hadoop-2.7.2/bin/hadoop classpath)
|
YARN
1 2 3
| cd /opt/services/hadoop-2.7.2
vim etc/hadoop/yarn-site.xml
|
1 2 3 4 5 6 7 8 9 10
| <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> </configuration>
|
1 2 3
| sbin/stop-yarn.sh
sbin/start-yarn.sh
|
Hive
1 2 3 4 5 6 7 8 9
| cd /opt/services/apache-hive-2.3.9-bin
cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/scala-library-2.11.12.jar lib/
cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-core_2.11-2.4.8.jar lib/
cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-network-common_2.11-2.4.8.jar lib/
cp /opt/services/spark-2.4.8-bin-without-hadoop/jars/spark-unsafe_2.11-2.4.8.jar lib/
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://127.0.0.1:3306/hive?ssl=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>zhgmysql</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> <property> <name>spark.yarn.jars</name> <value>dfs://f-2b786b7aage46.cn-hangzhou.dfs.aliyuncs.com:10290/spark/jars/*</value> </property> <property> <name>spark.master</name> <value>yarn</value> </property> <property> <name>hive.execution.engine</name> <value>spark</value> </property> </configuration>
|
1 2 3 4 5 6 7
| insert into student values(3, "xiaohong");
insert into student values(4, "xiaoyuan");
insert into student values(5, "xiaoma");
select * from student where id >= 3;
|
1 2 3
| 3 xiaohong 4 xiaoyuan 5 xiaoma
|
参考