Hadoop-HA-Hive-on-Spark 4台虚拟机安装配置文件

版本号步骤hadoopcore-site.xmlhdfs-site.xmlmapred-site.xmlslavesworkersyarn-site.xml

hivehive-site.xmlspark-defaults.conf

sparkhdfs-site.xmlhive-site.xmlslavesyarn-site.xmlspark-env.sh

版本号

apache-hive-3.1.3-bin.tar spark-3.0.0-bin-hadoop3.2.tgz hadoop-3.1.3.tar.gz

步骤

在hdfs上新建 spark-history(设置权限777),spark-jars文件夹 上传jar到hdfs

hdfs dfs -D dfs.replication=1 -put ./* /spark-jars

hadoop

core-site.xml

fs.defaultFS

hdfs://hacluster

hadoop.tmp.dir

file:///opt/hadoop-3.1.3/tmp

io.file.buffer.size

4096

ha.zookeeper.quorum

node15:2181,node16:2181,node17:2181,node18:2181

hadoop.proxyuser.root.hosts

*

hadoop.proxyuser.root.groups

*

hadoop.http.staticuser.user

root

hdfs-site.xml

dfs.block.size

134217728

dfs.nameservices

activeNode

dfs.replication

3

dfs.name.dir

file:///opt/hadoop-3.1.3/dfs/namenode_data

dfs.data.dir

file:///opt/hadoop-3.1.3/dfs/datanode_data

dfs.webhdfs.enabled

true

dfs.datanode.max.transfer.threads

4096

dfs.nameservices

hacluster

dfs.ha.namenodes.hacluster

nn1,nn2

dfs.namenode.rpc-address.hacluster.nn1

node15:9000

dfs.namenode.servicepc-address.hacluster.nn1

node15:53310

dfs.namenode.http-address.hacluster.nn1

node15:50070

dfs.namenode.rpc-address.hacluster.nn2

node16:9000

dfs.namenode.servicepc-address.hacluster.nn2

node16:53310

dfs.namenode.http-address.hacluster.nn2

node16:50070

dfs.namenode.shared.edits.dir

qjournal://node15:8485;node16:8485;node17:8485;node18:8485/hacluster

dfs.journalnode.edits.dir

/opt/hadoop-3.1.3/dfs/journalnode_data

dfs.namenode.edits.dir

/opt/hadoop-3.1.3/dfs/edits

dfs.ha.automatic-failover.enabled

true

dfs.client.failover.proxy.provider.hacluster

org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider

dfs.ha.fencing.methods

sshfence

dfs.ha.fencing.ssh.private-key-files

/root/.ssh/id_rsa

dfs.premissions

false

mapred-site.xml

        

mapred.job.tracker                  

node15:9001       

mapreduce.framework.name

yarn

mapreduce.jobhistory.address

node15:10020

mapreduce.jobhistory.webapp.address

node15:19888

yarn.application.classpath

/opt/hadoop-3.1.3/etc/hadoop:/opt/hadoop-3.1.3/share/hadoop/common/lib/*:/opt/hadoop-3.1.3/share/hadoop/common/*:/opt/hadoop-3.1.3/share/hadoop/hdfs:/opt/hadoop-3.1.3/share/hadoop/hdfs/lib/*:/opt/hadoop-3.1.3/share/hadoop/hdfs/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/*:/opt/hadoop-3.1.3/share/hadoop/yarn:/opt/hadoop-3.1.3/share/hadoop/yarn/lib/*:/opt/hadoop-3.1.3/share/hadoop/yarn/*

mapreduce.framework.name

yarn

mapreduce.map.memory.mb

1500

每个Map任务的物理内存限制

mapreduce.reduce.memory.mb

3000

每个Reduce任务的物理内存限制

mapreduce.map.java.opts

-Xmx1200m

mapreduce.reduce.java.opts

-Xmx2600m

mapreduce.framework.name

yarn

slaves

node15

node16

node17

node18

workers

node15

node16

node17

node18

yarn-site.xml

yarn.nodemanager.vmem-check-enabled

false

Whether virtual memory limits will be enforced for containers

yarn.nodemanager.vmem-pmem-ratio

4

Ratio between virtual memory to physical memory when setting memory limits for containers

yarn.resourcemanager.ha.enabled

true

yarn.resourcemanager.cluster-id

hayarn

yarn.resourcemanager.ha.rm-ids

rm1,rm2

yarn.resourcemanager.hostname.rm1

node15

yarn.resourcemanager.hostname.rm2

node16

yarn.resourcemanager.webapp.address.rm1

node15:8088

yarn.resourcemanager.webapp.address.rm2

node16:8088

yarn.resourcemanager.zk-address

node15:2181,node16:2181,node17:2181

yarn.resourcemanager.recovery.enabled

true

yarn.resourcemanager.store.class

org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore

yarn.resourcemanager.hostname

node18

yarn.nodemanager.aux-services

mapreduce_shuffle

yarn.log-aggregation-enable

true

yarn.log-aggregation.retain-seconds

604800

yarn.log.server.url

http://node15:19888/jobhistory/logs

hive

hive-site.xml

javax.jdo.option.ConnectionURL

jdbc:mysql://node15:3306/metastore?useSSL=false

javax.jdo.option.ConnectionDriverName

com.mysql.jdbc.Driver

javax.jdo.option.ConnectionUserName

root

javax.jdo.option.ConnectionPassword

hadoop

hive.metastore.warehouse.dir

/user/hive/warehouse

hive.metastore.schema.verification

false

hive.metastore.event.db.notification.api.auth

false

hive.server2.thrift.bind.host

node15

hive.server2.thrift.port

10000

spark.yarn.jars

hdfs://node15:9000/spark-jars/*

hive.execution.engine

spark

spark.home

/opt/spark-3.0.0-bin-hadoop3.2/

spark-defaults.conf

spark.master yarn

spark.eventLog.enabled true

spark.eventLog.dir hdfs://node15:9000/spark-history

spark.executor.memory 600m

spark.driver.memory 600m

spark

hdfs-site.xml

链接hadoop中的文件 ln -s 源文件名 新文件名

hive-site.xml

链接hive中的文件 ln -s 源文件名 新文件名

slaves

node15

node16

node17

node18

yarn-site.xml

链接hadoop中的文件 ln -s 源文件名 新文件名

spark-env.sh

#!/usr/bin/env bash

#

# Licensed to the Apache Software Foundation (ASF) under one or more

# contributor license agreements. See the NOTICE file distributed with

# this work for additional information regarding copyright ownership.

# The ASF licenses this file to You under the Apache License, Version 2.0

# (the "License"); you may not use this file except in compliance with

# the License. You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

#

export SCALA_HOME=/usr/share/scala

export JAVA_HOME=/usr/java/jdk1.8.0_241-amd64

export SPARK_HOME=/opt/spark-3.0.0-bin-hadoop3.2

export SPARK_MASTER_IP=192.168.206.215

export SPARK_MASTER_PORT=7077

export SPARK_MASTER_WEBUI_PORT=7080 #spark的web访问端口默认是8080,防止可能存在端口冲突,可以修

改端口号为其他的export SPARK_WORKER_CORES=1

export SPARK_WORKER_INSTANCES=1

export SPARK_EXECUTOR_MEMORY=512M

export SPARK_WORKER_MEMORY=1G

export SPARK_DIST_CLASSPATH=$(/opt/hadoop-3.1.3/bin/hadoop classpath)

export HADOOP_CONF_DIR=/opt/hadoop-3.1.3/etc/hadoop

# This file is sourced when running various Spark programs.

# Copy it as spark-env.sh and edit that to configure Spark for your site.

# Options read when launching programs locally with

# ./bin/run-example or ./bin/spark-submit

# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files

# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node

# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program

# Options read by executors and drivers running inside the cluster

# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node

# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program

# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data

# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in YARN client/cluster mode

# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)

# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files

# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN

# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).

# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)

# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)

# Options for the daemons used in the standalone deploy mode

# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname

# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master

# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")

# - SPARK_WORKER_CORES, to set the number of cores to use on this machine

# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2

g)# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker

# - SPARK_WORKER_DIR, to set the working directory of worker processes

# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")

# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default:

1g).# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")

# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y

")# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")

# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons

# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers

# Options for launcher

# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")

# Generic options for the daemons used in the standalone deploy mode

# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)

# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)

# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)

# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)

# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)

# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.

# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.

# You might get better performance to enable these options if using native BLAS (see SPARK-21305).

# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL

# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS

精彩链接

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。