Hadoop是一个由Apache基金会所开发的分布式系统基础架构,包括分布式文件系统(HDFS)及分布式计算框架(MapReduce)。Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的数据,适合那些有着超大数据集(large data set)的应用程序。HDFS放宽了(relax)POSIX的要求,可以以流的形式访问(streaming access)文件系统中的数据。Hadoop的框架最核心的设计就是:HDFS和MapReduce。HDFS为海量的数据提供了存储,则MapReduce为海量的数据提供了计算。
1、准备
用的是Virtualbox,Centos7,hadoop-2.6.4,jdk1.8.0_77,ssh虚拟机的安装及部署就不再介绍。hadoop有三种安装模式:单节点/伪分布式/分布式,我们使用分布式安装
2、主节点基础环境配置
2.1、hostname 配置,各datanode 节点需要单独配置,此处以主节点配置为例
vim /etc/hostnamemaster.hadoopvim /etc/systemconfig/networkHOSTNAME=master.hadoopsudo service network restart
2.2、/etc/hosts 配置,此配置为所有服务器统一配置
127.0.0.1 localhost::1 localhost192.168.1.106 master.hadoop192.168.1.110 datanode0.hadoop192.168.1.109 datanode1.hadoop192.168.1.113 datanode2.hadoop192.168.1.116 datanode3.hadoop
2.3、安装java
axel http://download.oracle.com/otn-pub/java/jdk/8u91-b14/jdk-8u77-linux-x64.tar.gztar -zxvf jdk-8u77-linux-x64.tar.gzmkdir /usr/javamv jdk1.8.0_77 /usr/java
2.4、安装hadoop
axel http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.6.4/hadoop-2.6.4.tar.gztar -zxvf hadoop-2.6.4.tar.gzmv hadoop-2.6.4 /usr/local/hadoopsudo chown -R hadoop:hadoop /usr/local/hadoop
2.5、添加 hadoop 用户
添加hadoop用户useradd hadooppasswd hadoop生成无密码的 rsa 密钥ssh-keygen -t rsa -P ''设置本机无密登录sudo su - hadoopcat .ssh/id_rsa.pub >> authorized_keys测试本机无密登录ssh hadoop@master.hadoop
2.6、配置 hadoop 用户的环境变量
sudo su - hadoopvim .bash_profile#添加下面几句export JAVA_HOME=/usr/java/jdk1.8.0_77export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/binexport HADOOP_HOME=/usr/local/hadoopexport HADOOP_PREFIX=$HADOOP_HOMEexport PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3、配置 master与 datanode 的无密登录
ssh-copy-id root@192.168.1.109ssh-copy-id root@192.168.1.110ssh-copy-id root@192.168.1.113ssh-copy-id root@192.168.1.116
4、配置 datanode 节点环境
4.1、更改hostname
ssh root@192.168.1.109 'echo datanode1.hadoop > /etc/hostname; echo HOSTNAME=datanode1.hadoop > /etc/sysconfig/network; service network restart;'ssh root@192.168.1.110 'echo datanode0.hadoop > /etc/hostname; echo HOSTNAME=datanode0.hadoop > /etc/sysconfig/network; service network restart;'ssh root@192.168.1.113 'echo datanode2.hadoop > /etc/hostname; echo HOSTNAME=datanode2.hadoop > /etc/sysconfig/network; service network restart;'ssh root@192.168.1.116 'echo datanode3.hadoop > /etc/hostname; echo HOSTNAME=datanode3.hadoop > /etc/sysconfig/network; service network restart;'
4.2、更改hosts
scp /etc/hosts root@root@192.168.1.110:/etc/hostsscp /etc/hosts root@root@192.168.1.109:/etc/hostsscp /etc/hosts root@root@192.168.1.113:/etc/hostsscp /etc/hosts root@root@192.168.1.114:/etc/hosts
4.3、安装java环境
scp -r /usr/java/jdk1.8.0_77 root@datanode0.hadoop:/usr/java/jdk1.8.0_77scp -r /usr/java/jdk1.8.0_77 root@datanode1.hadoop:/usr/java/jdk1.8.0_77scp -r /usr/java/jdk1.8.0_77 root@datanode2.hadoop:/usr/java/jdk1.8.0_77scp -r /usr/java/jdk1.8.0_77 root@datanode3.hadoop:/usr/java/jdk1.8.0_77
4.4、安装hadoop环境
scp -r /usr/local/hadoop root@datanode0.hadoop:/usr/local/hadoopscp -r /usr/local/hadoop root@datanode1.hadoop:/usr/local/hadoopscp -r /usr/local/hadoop root@datanode2.hadoop:/usr/local/hadoopscp -r /usr/local/hadoop root@datanode3.hadoop:/usr/local/hadoop
4.5、停用firewall
ssh root@datanode0.hadoop 'systemctl stop firewalld.service;systemctl disable firewalld.service;'ssh root@datanode1.hadoop 'systemctl stop firewalld.service;systemctl disable firewalld.service;'ssh root@datanode2.hadoop 'systemctl stop firewalld.service;systemctl disable firewalld.service;'ssh root@datanode3.hadoop 'systemctl stop firewalld.service;systemctl disable firewalld.service;'
4.6、新建 hadoop 用户
ssh root@192.168.1.109 'useradd hadoop; echo hadoop:hadoop | chpasswd;'ssh root@192.168.1.110 'useradd hadoop; echo hadoop:hadoop | chpasswd;'ssh root@192.168.1.113 'useradd hadoop; echo hadoop:hadoop | chpasswd;'ssh root@192.168.1.116 'useradd hadoop; echo hadoop:hadoop | chpasswd;'
4.7、hadoop用户无密登录
sudo su - hadoopssh-copy-id hadoop@192.168.1.109ssh-copy-id hadoop@192.168.1.110ssh-copy-id hadoop@192.168.1.113ssh-copy-id hadoop@192.168.1.116
4.8、hadoop用户登录环境变量
scp ~/.bash_profile hadoop@datanode0.hadoop:~/.bash_profilescp ~/.bash_profile hadoop@datanode1.hadoop:~/.bash_profilescp ~/.bash_profile hadoop@datanode2.hadoop:~/.bash_profilescp ~/.bash_profile hadoop@datanode3.hadoop:~/.bash_profile
4.9、新建hadoop数据目录
ssh root@datanode0.hadoop 'mkdir -p /hadoopdata/datanode;chown -r hadoop:hadoop /hadoopdata;'ssh root@datanode1.hadoop 'mkdir -p /hadoopdata/datanode;chown -r hadoop:hadoop /hadoopdata;'ssh root@datanode2.hadoop 'mkdir -p /hadoopdata/datanode;chown -r hadoop:hadoop /hadoopdata;'ssh root@datanode3.hadoop 'mkdir -p /hadoopdata/datanode;chown -r hadoop:hadoop /hadoopdata;'
4.10、更新hadoop环境所属用户
ssh root@datanode0.hadoop 'chown -r hadoop:hadoop /usr/local/hadoop;'ssh root@datanode1.hadoop 'chown -r hadoop:hadoop /usr/local/hadoop;'ssh root@datanode2.hadoop 'chown -r hadoop:hadoop /usr/local/hadoop;'ssh root@datanode3.hadoop 'chown -r hadoop:hadoop /usr/local/hadoop;'
5、hadoop配置
5.1、core-site.xml
fs.default.name hdfs://master.hadoop:9000 hadoop.tmp.dir hdfs:///tmp io.file.buffer.size 131072 ds.default.name hdfs://master.hadoop:54310
5.2、hdfs-site.xml
dfs.namenode.name.dir file:///hadoopdata/namenode dfs.blocksize 268435456 dfs.namenode.handler.count 100 dfs.datanode.data.dir file:///hadoopdata/datanode dfs.replication 2
5.3、mapred-site.xml
mapreduce.framework.name yarn mapreduce.jobhistory.address master.hadoop:10020 mapreduce.jobhistory.webapp.address master.hadoop:19888 mapreduce.jobhistory.intermediate-done-dir /mr-history/tmp mapreduce.jobhistory.done-dir /mr-history/done
5.4、yarn-site.xml
yarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.aux-services.mapreduce.shuffle.class org.apache.hadoop.mapred.ShuffleHandler yarn.resourcemanager.hostname master.hadoop yarn.resourcemanager.address master.hadoop:8032 yarn.resourcemanager.scheduler.address master.hadoop:8030 yarn.resourcemanager.resource-tracker.address master.hadoop:8031 yarn.resourcemanager.admin.address master.hadoop:8033 yarn.resourcemanager.webapp.address master.hadoop:8088
5.5、hadoop-env.sh
# 找到下面一行export JAVA_HOME=${JAVA_HOME}#注释掉,并在下面添加一行export JAVA_HOME=/usr/java/jdk1.8.0_77
5.6、同步配置到节点服务器,脚本如下
scp /usr/local/hadoop/etc/hadoop/hadoop-env.sh hadoop@datanode0.hadoop:/usr/local/hadoop/etc/hadoop/hadoop-env.shscp /usr/local/hadoop/etc/hadoop/hadoop-env.sh hadoop@datanode1.hadoop:/usr/local/hadoop/etc/hadoop/hadoop-env.shscp /usr/local/hadoop/etc/hadoop/hadoop-env.sh hadoop@datanode2.hadoop:/usr/local/hadoop/etc/hadoop/hadoop-env.shscp /usr/local/hadoop/etc/hadoop/hadoop-env.sh hadoop@datanode3.hadoop:/usr/local/hadoop/etc/hadoop/hadoop-env.shscp /usr/local/hadoop/etc/hadoop/core-site.xml hadoop@datanode0.hadoop:/usr/local/hadoop/etc/hadoop/core-site.xmlscp /usr/local/hadoop/etc/hadoop/core-site.xml hadoop@datanode1.hadoop:/usr/local/hadoop/etc/hadoop/core-site.xmlscp /usr/local/hadoop/etc/hadoop/core-site.xml hadoop@datanode2.hadoop:/usr/local/hadoop/etc/hadoop/core-site.xmlscp /usr/local/hadoop/etc/hadoop/core-site.xml hadoop@datanode3.hadoop:/usr/local/hadoop/etc/hadoop/core-site.xmlscp /usr/local/hadoop/etc/hadoop/hdfs-site.xml hadoop@datanode0.hadoop:/usr/local/hadoop/etc/hadoop/hdfs-site.xmlscp /usr/local/hadoop/etc/hadoop/hdfs-site.xml hadoop@datanode1.hadoop:/usr/local/hadoop/etc/hadoop/hdfs-site.xmlscp /usr/local/hadoop/etc/hadoop/hdfs-site.xml hadoop@datanode2.hadoop:/usr/local/hadoop/etc/hadoop/hdfs-site.xmlscp /usr/local/hadoop/etc/hadoop/hdfs-site.xml hadoop@datanode3.hadoop:/usr/local/hadoop/etc/hadoop/hdfs-site.xmlscp /usr/local/hadoop/etc/hadoop/mapred-site.xml hadoop@datanode0.hadoop:/usr/local/hadoop/etc/hadoop/mapred-site.xmlscp /usr/local/hadoop/etc/hadoop/mapred-site.xml hadoop@datanode1.hadoop:/usr/local/hadoop/etc/hadoop/mapred-site.xmlscp /usr/local/hadoop/etc/hadoop/mapred-site.xml hadoop@datanode2.hadoop:/usr/local/hadoop/etc/hadoop/mapred-site.xmlscp /usr/local/hadoop/etc/hadoop/mapred-site.xml hadoop@datanode3.hadoop:/usr/local/hadoop/etc/hadoop/mapred-site.xmlscp /usr/local/hadoop/etc/hadoop/yarn-site.xml hadoop@datanode0.hadoop:/usr/local/hadoop/etc/hadoop/yarn-site.xmlscp /usr/local/hadoop/etc/hadoop/yarn-site.xml hadoop@datanode1.hadoop:/usr/local/hadoop/etc/hadoop/yarn-site.xmlscp /usr/local/hadoop/etc/hadoop/yarn-site.xml hadoop@datanode2.hadoop:/usr/local/hadoop/etc/hadoop/yarn-site.xmlscp /usr/local/hadoop/etc/hadoop/yarn-site.xml hadoop@datanode3.hadoop:/usr/local/hadoop/etc/hadoop/yarn-site.xml
6、启动
#格式化 namenode节点hadoop namenode -format#启动hadoopstart-all.sh
7、检查
# master 上执行 jps[hadoop@master ~]$ jps6486 ResourceManager5675 DataNode6120 SecondaryNameNode5447 NameNode16412 Jps# datanode 上执行 jps[hadoop@datanode0 ~]$ jps1991 DataNode2402 Jps
8、停止
stop-all.sh
9、hadoop开发需要配置eclipse
eclipse-hadoop插件的地址:https://github.com/winghc/hadoop2x-eclipse-plugin.git具体编译方式,这里不再详述