1. 环境准备
1.1 系统要求
- 操作系统:Linux(推荐CentOS 7+/Ubuntu 18.04+)
- Java版本:JDK 8或JDK 11
- 内存:最小4GB,推荐8GB+
- 存储:至少50GB可用空间
- 网络:集群节点间网络互通
1.2 主机规划
节点类型 | 主机名 | IP地址 | 角色
------------|------------|---------------|------------------
NameNode | master | 192.168.1.10 | NameNode, ResourceManager
DataNode | slave1 | 192.168.1.11 | DataNode, NodeManager
DataNode | slave2 | 192.168.1.12 | DataNode, NodeManager
DataNode | slave3 | 192.168.1.13 | DataNode, NodeManager
1.3 基础环境配置
# 1. 设置主机名
sudo hostnamectl set-hostname master # 在master节点执行
sudo hostnamectl set-hostname slave1 # 在slave1节点执行
# 2. 配置hosts文件
sudo vim /etc/hosts
# 添加以下内容
192.168.1.10 master
192.168.1.11 slave1
192.168.1.12 slave2
192.168.1.13 slave3
# 3. 关闭防火墙
sudo systemctl stop firewalld
sudo systemctl disable firewalld
# 4. 关闭SELinux
sudo setenforce 0
sudo sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
2. Java环境安装
2.1 安装OpenJDK
# CentOS/RHEL
sudo yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel
# Ubuntu/Debian
sudo apt update
sudo apt install -y openjdk-8-jdk
2.2 配置Java环境变量
# 查找Java安装路径
java -version
readlink -f $(which java)
# 配置环境变量
sudo vim /etc/profile
# 添加以下内容
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
# 使配置生效
source /etc/profile
# 验证安装
java -version
javac -version
echo $JAVA_HOME
3. SSH免密登录配置
3.1 生成SSH密钥
# 在master节点执行
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 将公钥复制到所有节点
ssh-copy-id master
ssh-copy-id slave1
ssh-copy-id slave2
ssh-copy-id slave3
3.2 验证免密登录
# 测试SSH连接
ssh master
ssh slave1
ssh slave2
ssh slave3
4. Hadoop安装
4.1 下载Hadoop
# 下载Hadoop 3.3.4
cd /opt
sudo wget https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz
# 解压
sudo tar -xzf hadoop-3.3.4.tar.gz
sudo mv hadoop-3.3.4 hadoop
sudo chown -R $(whoami):$(whoami) /opt/hadoop
4.2 配置Hadoop环境变量
# 编辑环境变量文件
sudo vim /etc/profile
# 添加以下内容
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
# 使配置生效
source /etc/profile
# 验证安装
hadoop version
5. Hadoop配置文件
5.1 hadoop-env.sh配置
# 编辑hadoop-env.sh
vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh
# 设置JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
# 设置Hadoop用户
export HDFS_NAMENODE_USER="hadoop"
export HDFS_DATANODE_USER="hadoop"
export HDFS_SECONDARYNAMENODE_USER="hadoop"
export YARN_RESOURCEMANAGER_USER="hadoop"
export YARN_NODEMANAGER_USER="hadoop"
5.2 core-site.xml配置
<!-- $HADOOP_HOME/etc/hadoop/core-site.xml -->
<configuration>
<!-- 指定NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<!-- 指定Hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<!-- 缓冲区大小 -->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<!-- 代理用户配置 -->
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
5.3 hdfs-site.xml配置
<!-- $HADOOP_HOME/etc/hadoop/hdfs-site.xml -->
<configuration>
<!-- 副本数量 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- NameNode数据目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/namenode</value>
</property>
<!-- DataNode数据目录 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/datanode</value>
</property>
<!-- 块大小 -->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<!-- NameNode HTTP地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>master:9870</value>
</property>
<!-- SecondaryNameNode HTTP地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9868</value>
</property>
<!-- 权限检查 -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>
5.4 mapred-site.xml配置
<!-- $HADOOP_HOME/etc/hadoop/mapred-site.xml -->
<configuration>
<!-- 指定MapReduce框架为YARN -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- JobHistory Server地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- JobHistory Server Web地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<!-- Map任务内存 -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>1024</value>
</property>
<!-- Reduce任务内存 -->
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>2048</value>
</property>
<!-- Map任务JVM参数 -->
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx819m</value>
</property>
<!-- Reduce任务JVM参数 -->
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx1638m</value>
</property>
</configuration>
5.5 yarn-site.xml配置
<!-- $HADOOP_HOME/etc/hadoop/yarn-site.xml -->
<configuration>
<!-- ResourceManager地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<!-- NodeManager辅助服务 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- NodeManager内存 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<!-- NodeManager CPU核数 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<!-- 容器最小内存 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<!-- 容器最大内存 -->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
<!-- ResourceManager Web地址 -->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<!-- 日志聚合 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
5.6 workers文件配置
# 编辑workers文件
vim $HADOOP_HOME/etc/hadoop/workers
# 添加DataNode节点
slave1
slave2
slave3
6. 分发配置文件
6.1 创建分发脚本
#!/bin/bash
# distribute.sh - 分发Hadoop到所有节点
HADOOP_HOME=/opt/hadoop
NODES="slave1 slave2 slave3"
for node in $NODES; do
echo "正在分发到 $node..."
scp -r $HADOOP_HOME $node:/opt/
scp /etc/profile $node:/etc/
ssh $node "source /etc/profile"
done
echo "分发完成!"
6.2 执行分发
chmod +x distribute.sh
./distribute.sh
7. 启动Hadoop集群
7.1 格式化NameNode
# 创建数据目录
mkdir -p /opt/hadoop/data/namenode
mkdir -p /opt/hadoop/data/datanode
mkdir -p /opt/hadoop/tmp
# 格式化NameNode(只在第一次启动时执行)
hdfs namenode -format
7.2 启动HDFS
# 启动HDFS
start-dfs.sh
# 检查进程
jps
# 应该看到:NameNode, DataNode, SecondaryNameNode
7.3 启动YARN
# 启动YARN
start-yarn.sh
# 检查进程
jps
# 应该看到:ResourceManager, NodeManager
7.4 启动JobHistory Server
# 启动JobHistory Server
mapred --daemon start historyserver
# 检查进程
jps
# 应该看到:JobHistoryServer
8. 验证集群状态
8.1 HDFS状态检查
# 查看HDFS状态
hdfs dfsadmin -report
# 查看文件系统
hdfs dfs -ls /
# 创建测试目录
hdfs dfs -mkdir /test
hdfs dfs -put /etc/passwd /test/
8.2 YARN状态检查
# 查看YARN节点状态
yarn node -list
# 查看队列信息
yarn queue -status default
8.3 Web界面访问
NameNode Web UI: http://master:9870
ResourceManager UI: http://master:8088
JobHistory Server: http://master:19888
9. 运行测试作业
9.1 运行示例程序
# 创建输入目录
hdfs dfs -mkdir /input
# 上传测试文件
hdfs dfs -put /etc/passwd /input/
# 运行WordCount示例
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar \
wordcount /input /output
# 查看结果
hdfs dfs -cat /output/part-r-00000
9.2 性能测试
# 运行TeraGen生成测试数据
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar \
teragen 1000000 /teragen-output
# 运行TeraSort排序测试
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar \
terasort /teragen-output /terasort-output
# 验证排序结果
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar \
teravalidate /terasort-output /teravalidate-output
10. 集群管理脚本
10.1 启动脚本
#!/bin/bash
# start-cluster.sh
echo "启动Hadoop集群..."
# 启动HDFS
echo "启动HDFS..."
start-dfs.sh
# 启动YARN
echo "启动YARN..."
start-yarn.sh
# 启动JobHistory Server
echo "启动JobHistory Server..."
mapred --daemon start historyserver
echo "集群启动完成!"
echo "NameNode Web UI: http://master:9870"
echo "ResourceManager UI: http://master:8088"
echo "JobHistory Server: http://master:19888"
10.2 停止脚本
#!/bin/bash
# stop-cluster.sh
echo "停止Hadoop集群..."
# 停止JobHistory Server
echo "停止JobHistory Server..."
mapred --daemon stop historyserver
# 停止YARN
echo "停止YARN..."
stop-yarn.sh
# 停止HDFS
echo "停止HDFS..."
stop-dfs.sh
echo "集群停止完成!"
10.3 状态检查脚本
#!/bin/bash
# check-cluster.sh
echo "=== Hadoop集群状态检查 ==="
echo "\n1. Java进程状态:"
jps
echo "\n2. HDFS状态:"
hdfs dfsadmin -report | head -20
echo "\n3. YARN节点状态:"
yarn node -list
echo "\n4. 磁盘使用情况:"
df -h | grep -E "(Filesystem|/opt)"
echo "\n5. 内存使用情况:"
free -h
11. 故障排除
11.1 常见问题
NameNode启动失败
# 检查日志
tail -f $HADOOP_HOME/logs/hadoop-*-namenode-*.log
# 常见原因:
# 1. 端口被占用
netstat -tlnp | grep 9000
# 2. 权限问题
chown -R hadoop:hadoop /opt/hadoop
# 3. 磁盘空间不足
df -h
DataNode无法连接NameNode
# 检查网络连通性
telnet master 9000
# 检查防火墙
sudo systemctl status firewalld
# 检查hosts文件
cat /etc/hosts
11.2 日志分析
# 查看各组件日志
tail -f $HADOOP_HOME/logs/hadoop-*-namenode-*.log
tail -f $HADOOP_HOME/logs/hadoop-*-datanode-*.log
tail -f $HADOOP_HOME/logs/yarn-*-resourcemanager-*.log
tail -f $HADOOP_HOME/logs/yarn-*-nodemanager-*.log
11.3 性能监控
# 监控系统资源
top
iostat -x 1
sar -u 1
# 监控Hadoop指标
hadoop dfsadmin -report
yarn top
小结
本章详细介绍了Hadoop集群的搭建过程,包括环境准备、软件安装、配置文件设置和集群启动验证。正确的环境搭建是后续MapReduce开发和运行的基础。
下一章将介绍MapReduce程序的开发和调试方法。