環(huán)境簡介
# 三臺主機信息如下
# linux version: `CentOS release 6.5 (Final)`
cat /etc/hosts
192.168.59.10 slave7 #master
192.168.59.11 slave8 #slave
192.168.59.12 slave9 #slave
配置ssh無密碼訪問
# 在三臺機器上都創(chuàng)建hadoop賬戶
# 增加用戶
useradd hadoop
# 查看用戶信息
id hadoop
# 修改用戶密碼
password hadoop
# 沒有特別說明都是在hadoop賬戶下操作
# 在所有機器(三臺)先生成密鑰對
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 把公鑰文件,追加到授權(quán)文件authorized_keys中
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
# 修改權(quán)限,注意這個別遺漏了
chmod 700 ~/.ssh
chmod 644 ~/.ssh/authorized_keys
# 修改ssh配置,請切換root賬戶操作
vim /etc/ssh/sshd_config
把以下內(nèi)容的注釋取消掉
#RSAAuthentication yes # 啟用 RSA 認證
#PubkeyAuthentication yes # 啟用公鑰私鑰配對認證方式
#AuthorizedKeysFile .ssh/authorized_key #公鑰文件路徑
# 重啟ssh服務(wù)
service sshd restart
# 退出root
# 驗證本機是否無密碼登錄
ssh localhost
# 最后,把本機的id_rsa.pub文件里的內(nèi)容追加到“其他”服務(wù)器中的~/.ssh/authorized_keys里
# 重復以上操作
# 到此,無密碼訪問設(shè)置完畢
配置java環(huán)境
# 解壓后得到j(luò)dk目錄
# /home/hadoop/jdk1.8.0_73
vim ~/.bashrc
# 設(shè)置JDK相關(guān)信息
export JAVA_HOME=/home/hadoop/jdk1.8.0_73
PATH=${JAVA_HOME}/bin:$PATH
搭建zookeeper集群
# zookeeper版本:3.4.10
# 解壓zookeeper-3.4.10.tar.gz
# zookeeper頂級目錄為:/home/hadoop/zookeeper-3.4.10
# 創(chuàng)建dataDir和dataLogDir
mkdir /home/hadoop/zookeeper-3.4.10/{datadir,datalogdir}
# 創(chuàng)建配置文件:zoo.cfg
cp /home/hadoop/zookeeper-3.4.10/conf/zoo_sample.cfg /home/hadoop/zookeeper-3.4.10/conf/zoo.cfg
在配置文件zoo.cfg
中修改添加內(nèi)容,最終結(jié)果如下
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/home/hadoop/zookeeper-3.4.10/datadir
dataLogDir=/home/hadoop/zookeeper-3.4.10/datalogdir
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
autopurge.purgeInterval=1
server.7=slave7:2888:3888
server.8=slave8:2888:3888
server.9=slave9:2888:3888
# 在datadir目錄下創(chuàng)建myid文件
echo "7" > myid
# 上面的“7”跟配置文件中“server.7”保持一致,
# 其他機器一樣是類似的修改
# 確保在每臺機器上都按照上面要求創(chuàng)建完畢
# 并且都在相同目錄下,最后,對于每臺服務(wù)器都執(zhí)行
# 啟動zookeeper
cd ~/zookeeper-3.4.10/bin/ && ./zkServer.sh start
# 停止zookeeper命令
./zkServer.sh stop
# 查看zookeeper狀態(tài)命令
./zkServer.sh status
搭建hadoop集群
# hadoop版本:2.7.3
# 解壓hadoop-2.7.3.tar.gz
# hadoop頂級目錄為:/home/hadoop/hadoop-2.7.3
# 先進入hadoop配置文件目錄
cd /home/hadoop/hadoop-2.7.3/etc/hadoop
修改hadoop-env.sh
vim hadoop-env.sh
# 在腳本開頭添加如下內(nèi)容
export JAVA_HOME=/home/hadoop/jdk1.8.0_73
修改yarn-env.sh
vim yarn-env.sh
# 在腳本開頭添加如下內(nèi)容
export JAVA_HOME=/home/hadoop/jdk1.8.0_73
修改 core-site.xml,配置文件如下
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定hdfs的nameservice為ns1 名字隨意-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<!-- 指定hadoop臨時目錄 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop-2.7.3/hadoop.tmp.dir</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>slave7:2181,slave8:2181,slave9:2181</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
</configuration>
修改hdfs-site.xml,配置文件如下
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--指定hdfs的nameservice為ns1,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<!-- ns1下面有兩個NameNode,分別是nn1,nn2(名字隨意起,但是要與下面一致)-->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>slave7:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>slave7:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>slave8:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>slave8:50070</value>
</property>
<!-- 指定NameNode的元數(shù)據(jù)在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://slave7:8485;slave8:8485;slave9:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盤存放數(shù)據(jù)的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/hadoop-2.7.3/journaldata</value>
</property>
<!-- 開啟NameNode失敗自動切換 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失敗自動切換實現(xiàn)方式 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔離機制方法,多個機制用換行分割,即每個機制暫用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔離機制時需要ssh免登陸 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔離機制超時時間 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 指定HDFS副本的數(shù)量 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- HDFS文件系統(tǒng)的元信息保存目錄-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///home/hadoop/hadoop-2.7.3/namenode_dir</value>
</property>
<!-- HDFS文件系統(tǒng)的數(shù)據(jù)保存目錄 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/hadoop/hadoop-2.7.3/datanode_dir</value>
</property>
<!-- 在NN和DN上開啟WebHDFS (REST API)功能,不是必須 -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>
If "true", enable permission checking in HDFS.
If "false", permission checking is turned off,
but all other behavior is unchanged.
Switching from one parameter value to the other does not change the mode,
owner or group of files or directories.
</description>
</property>
</configuration>
修改mapred-site.xml,配置文件如下
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定mr框架為yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
修改yarn-site.xml,配置文件如下
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 指定nodemanager啟動時加載server的方式為shuffle server -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定resourcemanager地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>slave9</value>
</property>
</configuration>
修改slaves文件,配置文件如下
slave7
slave8
slave9
最后,將hadoop2.7.3目錄同步復制到所有服務(wù)器上
啟動hadoop集群(如果是首次啟動會特別標注說明,關(guān)閉集群順序剛好相反)
# 確保zookeeper集群先啟動,在本教程中,分別到slave7,slave8,slave9上在zookeeper目錄下執(zhí)行:
./zkServer.sh start
# 關(guān)閉
./zkServer.sh stop
# 格式化zookeeper集群,在slave7上執(zhí)行(僅限首次啟動):
bin/hdfs zkfc -formatZK
# 啟動JournalNode集群
# 在slave7上執(zhí)行:
sbin/hadoop-daemons.sh start journalnode
# 關(guān)閉
sbin/hadoop-daemons.sh stop journalnode
# 或者在slave7,slave8,slave9上分別都執(zhí)行(注意daemons.sh和daemon.sh的區(qū)別):
sbin/hadoop-daemon.sh start journalnode
# 關(guān)閉
sbin/hadoop-daemon.sh stop journalnode
# 格式化集群的namenode(hdfs) (僅限首次啟動)
bin/hadoop namenode -format
# 啟動namenode(1),在slave7上執(zhí)行:
sbin/hadoop-daemon.sh start namenode
# 關(guān)閉
sbin/hadoop-daemon.sh stop namenode
# 同步namenode(1)到namenode(2),并啟動namenode(2),在slave8上執(zhí)行:
bin/hdfs namenode –bootstrapStandby
sbin/hadoop-daemon.sh start namenode
# 關(guān)閉
sbin/hadoop-daemon.sh stop namenode
# 啟動所有datanode,在slave7上執(zhí)行:
sbin/hadoop-daemons.sh start datanode
# 關(guān)閉
sbin/hadoop-daemons.sh stop datanode
# 啟動yarn,在作為資源管理器上的slave9機器上執(zhí)行啟動:
sbin/start-yarn.sh
# 關(guān)閉
sbin/stop-yarn.sh
# 啟動zkfc集群,在slave7上執(zhí)行:
sbin/hadoop-daemons.sh start zkfc
# 關(guān)閉
sbin/hadoop-daemons.sh stop zkfc
搭建hbase集群
# hbase版本:1.2.5
# 解壓hbase-1.2.5-bin.tar.gz
# hbase頂級目錄為:/home/hadoop/hbase-1.2.5
# 首先,進入hbase配置文件目錄
cd /home/hadoop/hbase-1.2.5
打開hbase-env.sh,添加如下信息:
export JAVA_HOME=/home/hadoop/jdk1.8.0_73
export HBASE_LOG_DIR=${HBASE_HOME}/logs
# 不使用自帶的zookeeper
export HBASE_MANAGES_ZK=false
配置hbase-site.xml,最終內(nèi)容如下:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>hbase.tmp.dir</name>
<value>/home/hadoop/hbase-1.2.5/hbase.tmp.dir</value>
</property>
<!-- 設(shè)置HRegionServers共享目錄 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://slave7:9000/hbase</value>
<description>Hbase data director</description>
</property>
<!-- 開啟分布式模式 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 設(shè)置HMaster的rpc端口, 由于采用的是HA模式,這里只寫端口就可以了,不需要再寫主機名-->
<property>
<name>hbase.master.port</name>
<value>60000</value>
</property>
<!-- 對比參考
<property>
<name>hbase.master</name>
<value>hdfs://master:60000</value>
</property>
-->
<!-- 設(shè)置HMaster的http web console端口 -->
<property>
<name>hbase.master.info.port</name>
<value>16010</value>
</property>
<!--zookeeper設(shè)置,依賴zookeeper集群設(shè)置-->
<!--zookeeper集群信息設(shè)置-->
<property>
<name>hbase.zookeeper.quorum</name>
<value>slave7,slave8,slave9</value>
</property>
<!--zookeeper端口-->
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<!--請參考zookeeper配置文件zoo.cfg中dataDir的值 -->
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/hadoop/zookeeper-3.4.10/datadir</value>
</property>
</configuration>
配置regionservers,內(nèi)容如下:
slave8
slave9
scp hbase目錄到所有服務(wù)器(slave7,slave8,slave9)
啟動hbase集群:
# 首先確保hadoop集群啟動
# 啟動hbase集群,在slave7上執(zhí)行:
bin/start-hbase.sh
# 關(guān)閉
bin/stop-hbase.sh
后記:整個集群的啟動先后順序
zookeeper -> hadoop -> hbase
多次格式化集群,最后phoenix連接時,報錯:
org.apache.phoenix.exception.PhoenixIOException: SYSTEM.CATALOG
//解決方法,停止hbase集群,執(zhí)行 bin/hbase clean --cleanZk,然后啟動hbase集群
參考文檔