Hadoop Ha

配置Hadoop Ha

[!NOTE]

在hadoop集群的基础上

hadoop-env.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# hadoop-env.sh
export JAVA_HOME=/root/software/jdk
export HADOOP_HOME=/root/software/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_uSER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export YARN_PROXYUSER_USER=root
# 加上了以下两个
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root

core-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
<configuration>
# 这里就不能指定 主机名:端口了
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
# 加上了zookeeper的配置
<property>
<name>ha.zookeeper.quorum</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>

hdfs-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/dn</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
# 以上是原来的配置,但是不用hosts了,因为默认是全部
# 以下则是高可用的配置
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>master:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>slave1:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>master:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>slave1:9870</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
# 这个得添加,配置hdfs-site.xml隔离机制方法(栅栏方法)
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
</configuration>

mapred-site.xml(保持原有配置即可)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19899</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>

yarn-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# 不能有resourcemanager指定hostname了因为是高可用
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/root/software/hadoop/etc/hadoop:/root/software/hadoop/share/hadoop/common/lib/*:/root/software/hadoop/share/hadoop/common/*:/root/software/hadoop/share/hadoop/hdfs:/root/software/hadoop/share/hadoop/hdfs/lib/*:/root/software/hadoop/share/hadoop/hdfs/*:/root/software/hadoop/share/hadoop/mapreduce/*:/root/software/hadoop/share/hadoop/yarn:/root/software/hadoop/share/hadoop/yarn/lib/*:/root/software/hadoop/share/hadoop/yarn/*</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>master:8089</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave1</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>slave1:8088</value>
</property>
<property>
<name>hadoop.zk.address</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
</configuration>

1.首先启动zookeeper且保证健康状态

1
zkServer.sh status

2. 启动JournalNode(三台都要)

1
hdfs --daemon start journalnode

3.格式化HDFS

1
hdfs namenode -format

4.FSImage文件同步

1
[root@master hadoop]# scp -r /data/nn/ slave1:/data

5.格式化ZKFC

1
hdfs zkfc -formatZK

6.启动

1
start-all.sh

可能看不到datanode之类的那就手动启动

1
[root@slave1 ~]# hadoop-daemon.sh start datanode

然后就可以去查看两个节点的hdfs webui了

会看到一个standby一个是active,standby是备用节点,而active是活跃节点,那说明hdfs ha成功了

查看NameNode的状态

1
2
hdfs haadmin -getServiceState nn1
hdfs haadmin -getServiceState nn2

查看ResourceManager的状态

1
2
yarn rmadmin -getServiceState rm1
yarn rmadmin -getServiceState rm2