Hadoop环境安装

1.最小化安装

2.固定ip地址

[root@master ~]# vim /etc/sysconfig/network-scripts/ifcfg-eno16777736

BOOTPROTO="static"

ONBOOT="yes"

IPADDR=192.168.10.4

GATEWAY=192.168.10.2

NETMASK=255.255.255.0

DNS1=8.8.8.8

DNS2=8.8.4.4

service network restart

ping www.baidu.com

3.配置主机名

hostnamectl set-hostname master

bash

hostname

4.安装yum库

安装必要的软件

[root@hadoop100 ~]# yum install -y epel-release

[root@hadoop100 ~]# yum install -y psmisc nc net-tools rsync vim lrzsz ntp libzstd openssl-static tree iotop git

5.关闭防火墙

[root@hadoop100 ~]# systemctl stop firewalld

[root@hadoop100 ~]# systemctl disable firewalld

6.创建用户,配置用户密码

useradd nby993

passwd nby993

7.配置用户具有root权限

方便后期sudo执行root权限的命令

vim /etc/sudoers

## Allow root to run any commands anywhere

root ALL=(ALL) ALL

nby993 ALL=(ALL) NOPASSWD:ALL

8.配置映射文件

linux

vim /etc/hosts

192.168.12.4 master

192.168.12.5 slave1

192.168.12.6 slave2

Windows

C:\Windows\System32\drivers\etc

192.168.12.4 master

192.168.12.5 slave1

192.168.12.6 slave2

9.进入opt解压hadoop,jdk

移动到 /usr/local/src/目录下

10.配置环境变量

#配置环境

JAVA_HOME=/usr/local/src/jdk1.8.0_212

HADOOP_HOME=/usr/local/src/hadoop-3.1.3

#声明PATH变量

PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

#提升全局变量

export PATH JAVA_HOME HADOOP_HOME

11.!!!.克隆master 为slave1

配置slave主机

hostnamectl set-hostname slave1,2

网络配置器重新生成MAC地址

在网卡配置添加

配置新的ip

HWADDR=00:50:56:35:18:28 //MAC地址

关闭NetworkManager

systemctl stop NetworkManager

systemctl disable NetworkManager

重启slave

init 6

12.配置分发脚本

#!/bin/bash

#参数预处理

if [ $# -lt 1 ]

then

echo "参数不能为空"

exit

fi

#遍历集群中机器依次分发内存

for host in slave1 slave2

do

#依次分发内容

for file in $@

do

#判断文件是否存在

if [ -e $file ]

then

#存在

#1.获取当前文件的目录结构,防止软连接

pdir=$(cd -P $(dirname $file); pwd)

#2.获取当前的文件名

fname=$(basename $file)

#3.登录目标机器创建统一的目录结构

ssh $host "mkdir -p $pdir"

#4.依次把要分发的文件或目录进行分发

rsync -av $pdir/$fname $host:$pdir

else

#不存在

echo "$file 不存在"

exit

fi

done

done

分发 hadoop,java文件,

[nby993@master ~]$ my_rsync.sh /opt/software/hadoop-3.1.3.tar.gz

[nby993@master ~]$ my_rsync.sh /usr/local/src/jdk1.8.0_212

13.规划集群

masterslave1slave2HDFSNameNode DataNodeDataNodeSecondaryNameNode DataNodeYARNNodeManagerResourceManager NodeManagerNodeManager

14.配置hadoop

DaemonAppmasterslave1NameNode PortHadoop HDFS NameNode8020(高可用) / 90009820Hadoop HDFS NameNode HTTP UI500709870Secondary NameNode PortSecondary NameNode500919869Secondary NameNode HTTP UI500909868DataNode PortHadoop HDFS DataNode IPC500209867Hadoop HDFS DataNode500109866Hadoop HDFS DataNode HTTP UI500759864

1.**hadoop-env.sh

export JAVA_HOME=/usr/local/src/jdk1.8.0_212

3.x之前配置JAVA_HOME

core-site.xml

fs.defaultFS

hdfs://master:9820

hadoop.tmp.dir

/usr/local/src/hadoop/data

hadoop.http.staticuser.user

master

hadoop.proxyuser.master.hosts

*

hadoop.proxyuser.master.groups

*

hadoop.proxyuser.master.groups

*

hdfs-site.xml

dfs.namenode.http-address

master:9870

dfs.namenode.secondary.http-address

slave2:9868

yarn-site.xml

yarn.nodemanager.aux-services

mapreduce_shuffle

yarn.resourcemanager.hostname

slave1

yarn.nodemanager.env-whitelist

JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME

yarn.scheduler.minimum-allocation-mb

512

yarn.scheduler.maximum-allocation-mb

4096

yarn.nodemanager.resource.memory-mb

4096

yarn.nodemanager.pmem-check-enabled

false

yarn.nodemanager.vmem-check-enabled

false

mapred-site.xml

mapreduce.framework.name

yarn

15.启动集群

在NameNode机器(master)上格式化集群

hdfs namenode -format

启动后在Hadoop目录下生成data目录,logs目录

单节点启动

//master启动namenode

[nby993@master hadoop-3.1.3]$ hdfs --daemon start namenode

[nby993@master hadoop-3.1.3]$ jps

2144 NameNode

2210 Jps

//三台机器启动datanode

[nby993@master hadoop-3.1.3]$ hdfs --daemon start datanode

[nby993@master hadoop-3.1.3]$ jps

2144 NameNode

2260 DataNode

2292 Jps

[nby993@slave1 ~]$ hdfs --daemon start datanode

WARNING: /usr/local/src/hadoop-3.1.3/logs does not exist. Creating.

[nby993@slave1 ~]$ jps

2658 Jps

2626 DataNode

[nby993@slave2 src]$ hdfs --daemon start datanode

WARNING: /usr/local/src/hadoop-3.1.3/logs does not exist. Creating.

[nby993@slave2 src]$ jps

9139 DataNode

9171 Jps

//slave2启动SecondaryNameNode

[nby993@slave2 src]$ hdfs --daemon start secondarynamenode

[nby993@slave2 src]$ jps

9265 SecondaryNameNode

9298 Jps

9139 DataNode

//启动ResourceManager

[nby993@slave1 hadoop-3.1.3]$ yarn --daemon start resourcemanager

[nby993@slave1 hadoop-3.1.3]$ jps

2789 DataNode

2918 ResourceManager

2958 Jps

//三台机器启动NodeManager

[nby993@slave1 hadoop-3.1.3]$ yarn --daemon start nodemanager

[nby993@slave1 hadoop-3.1.3]$ jps

3186 NodeManager

2789 DataNode

2918 ResourceManager

3227 Jps

[nby993@master hadoop-3.1.3]$ yarn --daemon start nodemanager

[nby993@master hadoop-3.1.3]$ jps

2945 Jps

2473 NameNode

2588 DataNode

2879 NodeManager

[nby993@slave2 src]$ yarn --daemon start nodemanager

[nby993@slave2 src]$ jps

9265 SecondaryNameNode

9139 DataNode

9427 Jps

9359 NodeManager

16.免密登陆

master->master,slave1,slave2

slave1->master,slave1,slave2

slave2->master,slave1,slave2

生成密钥

[nby993@master ~]$ ssh-keygen -t rsa -P ""

+--[ RSA 2048]----+

| .+o..o . |

| +=o= o |

| o.== = |

|..oo+E o |

|. o+ . S |

| |

| |

| |

| |

+-----------------+

[nby993@master ~]$ ll .ssh/

-rw-------. 1 nby993 nby993 1679 8月 5 19:59 id_rsa //私钥

-rw-r--r--. 1 nby993 nby993 395 8月 5 19:59 id_rsa.pub //公钥

给master,slave1,slave2授权

ssh-copy-id master

ssh master

ssh-copy-id slave1

ssh slave1

ssh-copy-id slave2

ssh slave2

17.配置集体群启群停

[nby993@master hadoop]$ vim workers

master

slave1

slave2

分发到slave1,slave2

[nby993@master hadoop]$ start-dfs.sh

[nby993@slave1 hadoop-3.1.3]$ start-yarn.sh

[nby993@master hadoop]$ jps

3811 NameNode

4297 NodeManager

4426 Jps

3963 DataNode

[nby993@slave1 hadoop-3.1.3]$ jps

4996 Jps

3846 DataNode

4024 ResourceManager

4890 NodeManager

[nby993@slave2 src]$ jps

10993 NodeManager

10821 SecondaryNameNode

10742 DataNode

11129 Jps

#!/bin/bash

if [ $# -lt 1 ]

then

echo "参数不能为空"

exit

fi

case $1 in

"start")

#启动hdfs

echo "==================正在启动HDFS================"

ssh master /usr/local/src/hadoop-3.1.3/sbin/start-dfs.sh

#启动yarn

echo "==================正在启动YARN================"

ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/start-yarn.sh

;;

"stop")

#停止hdfs

echo "==================正在停止HDFS================"

ssh master /usr/local/src/hadoop-3.1.3/sbin/stop-dfs.sh

#停止yarn

echo "==================正在停止YARN================"

ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/stop-yarn.sh

;;

*)

echo "参数非法"

exit

;;

esac

18.配置历史服务器

针对MR的历史服务器

mapred-site.xml增加

mapreduce.jobhistory.address

master:10020

mapreduce.jobhistory.webapp.address

master:19888

启动历史服务器

[nby993@master hadoop]$ mapred --daemon start historyserver

[nby993@master hadoop]$ jps

5667 NameNode

6261 JobHistoryServer //历史服务器

5820 DataNode

6285 Jps

6095 NodeManager

更改集启动停止脚本

#!/bin/bash

if [ $# -lt 1 ]

then

echo "参数不能为空"

exit

fi

case $1 in

"start")

#启动hdfs

echo "==================正在启动HDFS================"

ssh master /usr/local/src/hadoop-3.1.3/sbin/start-dfs.sh

#启动yarn

echo "==================正在启动YARN================"

ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/start-yarn.sh

#启动MR历史服务器

echo "==================正在开启MR历史================"

ssh master mapred --daemon start historyserver

;;

"stop")

#停止hdfs

echo "==================正在停止HDFS================"

ssh master /usr/local/src/hadoop-3.1.3/sbin/stop-dfs.sh

#停止yarn

echo "==================正在停止YARN================"

ssh slave1 /usr/local/src/hadoop-3.1.3/sbin/stop-yarn.sh

#关闭MR历史服务器

echo "==================正在关闭MR历史================"

ssh master mapred --daemon stop historyserver

;;

*)

echo "参数非法"

19.配置日志集聚

mr日志形成web界面

yarn-site.xml

yarn.log-aggregation-enable

true

yarn.log.server.url

http://master:19888/jobhistory/logs

yarn.log-aggregation.retain-seconds

604800

20.配置时间同步

0 查看所有节点的ntpd服务状态,开机自启动

[nby993@master hadoop-3.1.3]$ sudo systemctl status ntpd

● ntpd.service - Network Time Service //停止的

Loaded: loaded (/usr/lib/systemd/system/ntpd.service; disabled; vendor preset: disabled)

Active: inactive (dead)

[nby993@master hadoop-3.1.3]$ sudo systemctl is-enabled ntpd

disabled //开机关闭

1 将master为时间服务器,更改ntp.conf

vim /etc/ntp.conf

注释,集群在局域网中,不使用互联网时间

#server 0.centos.pool.ntp.org iburst

#server 1.centos.pool.ntp.org iburst

#server 2.centos.pool.ntp.org iburst

#server 3.centos.pool.ntp.org iburst

打开,所有机器可以从这台机器查询和同步时间

restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap

2 节点丢失网络,以然可以采用本地时间做为时间同步服务器

server 127.127.1.0

fudge 127.127.1.0 stratum 10

3 配置硬件时间与系统时间同步

[nby993@master hadoop-3.1.3]$ sudo vim /etc/sysconfig/ntpd

增加

SYNC_HWCLOCK=yes

4 重新启动时间服务,开机自启动

[nby993@master hadoop-3.1.3]$ sudo systemctl start ntpd

[nby993@master hadoop-3.1.3]$ sudo systemctl enable ntpd

Created symlink from /etc/systemd/system/multi-user.target.wants/ntpd.service to /usr/lib/systemd/system/ntpd.service.

5 slave1,slave2开启定时任务,与服务器时间同步,10分钟同步

[nby993@slave1 ~]$ sudo crontab -e

*/10 * * * * /usr/sbin/ntpdate master

推荐阅读

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。