--------------- Master ---------------
HostName : server01.hadoop.com
IpAddress : 192.168.56.101
Ram : 2GB
HDD : 20GB
--------------- DataNode1 ---------------
HostName : server02.hadoop.com
IpAddress : 192.168.56.102
Ram : 2GB
HDD : 20GB
--------------- DataNode1 ---------------
HostName : server03.hadoop.com
IpAddress : 192.168.56.103
Ram : 2GB
HDD : 20GB
vi /etc/hosts
192.168.56.101 server01.hadoop.com server01
192.168.56.102 server02.hadoop.com server02
192.168.56.103 server03.hadoop.com server03
sysctl -w vm.swappiness=100
vi /etc/sysctl.conf
vm.swappiness=100
vi /etc/rc.local
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
vi /etc/security/limits.conf
root soft nofile 65536
root hard nofile 65536
* soft nofile 65536
* hard nofile 65536
root soft nproc 32768
root hard nproc 32768
* soft nproc 32768
* hard nproc 32768
hostname
hostnamectl set-hostname server02.hadoop.com
hostname
vi /etc/sysconfig/network-scripts/ifcfg-enp0s3
ONBOOT=yes
vi /etc/sysconfig/network-scripts/ifcfg-enp0s8
BOOTPROTO=static
ONBOOT=yes
IPADDR=192.168.56.102
HWADDR=08:00:27:59:D9:F5
vi /etc/hosts
192.168.56.101 server01.hadoop.com server01
192.168.56.102 server02.hadoop.com server02
192.168.56.103 server03.hadoop.com server03
1. key 생성
[root@server01 ~]# ssh-keygen
2. key 서버에 복사
[root@server01 ~]# ssh-copy-id -i root@server01
[root@server01 ~]# ssh-copy-id -i root@server02
3. 재시작
[root@server01 ~]# reboot
1.1 wget 설치
[root@server01 ~]# yum -y install wget
[root@server01 ~]# wget http://apache.mirror.cdnetworks.com/hadoop/common/current2/hadoop-2.10.1.tar.gz
[root@server01 ~]# ls
anaconda-ks.cfg hosts.txt jdk-8u231-linux-x64.tar.gz
hadoop-2.10.1.tar.gz hosts_all.txt
1.2 압축해제
[root@server01 ~]# tar -zxvf hadoop-2.10.1.tar.gz
1.3 디렉터리 이동
[root@server01 ~]# mv hadoop-2.10.1 /usr/local
1.4 환경변수 설정
[root@server01 ~]# vi /etc/profile
------------------ 추가
export HADOOP_HOME=/usr/local/hadoop-2.10.1
------------------ 수정
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
1.5 환경변수 적용
[root@server01 ~]# source /etc/profile
/usr/local/hadoop-2.9.2/etc/hadoop/
Hadoop 설정 파일의 위치
hadoop-env.sh
Hadoop을 실행하는 쉘스크립트 파일, JDK Path, Classpath, 데몬 옵션 등 설정
slaves
Data node 들의 서버 지정
core-site.xml
HDFS와 Mapreduce에서 공통적으로 사용할 정보들을 설정, hdfs-site와 mapred-site의 공통 설정 부분
hdfs-site.xml
하둡 파일시스템(HDFS)과 관련된 환경 정보를 설정
mapred-site.xml
MapReduce의 어플리케이션 정보를 설정
yarn-site.xml
Resource Manager, Node Manager 정보를 설정
yarn-env.sh
YARN을 실행하는 쉘스크립트 파일
[root@server01 ~]# cd /usr/local/hadoop-2.10.1/etc/hadoop/
[root@server01 ~]# vi hadoop-env.sh
-------------------------------------------------------------------------- 수정
export JAVA_HOME=/usr/local/jdk1.8.0_301
---------------------------------------------------------------------------------
-------------------------------------------------------------------------- 추가
export HADOOP_HOME=/usr/local/hadoop-2.10.1
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_YARN_USER=${`HADOOP_YARN_USER:-yarn`}
-------------------------------------------- `는 빼주세요
------- Data node 들의 서버 지정
------- 기존에 있는 로컬호스트는 삭제
[root@server01 ~]# vi slaves
----- localhost 삭제
-------------------------------------------------------------------------- 추가
server02
server03
---------------------------------------------------------------------------------
[root@server01 ~]# vi core-site.xml
<configuration>
-------------------------------------------------------------------------- 추가
<property>
<name>fs.default.name</name>
<value>hdfs://server01:9000</value> : 마스터 서버의 이름
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.10.1/tmp</value> : 임시 디렉토리
</property>
---------------------------------------------------------------------------------
</configuration>
----- 임시 디렉토리가 없으니 생성
[root@server01 ~]# mkdir /usr/local/hadoop-2.9.2/tmp
[root@server01 ~]# vi hdfs-site.xml
<configuration>
-------------------------------------------------------------------------- 추가
<property>
<name>dfs.replication</name>
---- 데이터를 1개만 복사:가상분산모드, 3일경우:완전분산모드
<value>3</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.http.address</name>
<value>server01:50070</value>
</property>
<property>
<name>dfs.secondary.http.address</name>
<value>server01:50090</value>
</property>
---------------------------------------------------------------------------------
</configuration>
---- mapred-site.xml 을 편집 [ 설정 ] 해야 하는데, 기본적으로는 해당 파일이 없다.
---- mapred-site.xml이 존재하지 않을 경우 mapred-site.xml.template를 복사하여 사용
[root@server01 ~]# cp mapred-site.xml.template mapred-site.xml
[root@server01 ~]# vi mapred-site.xml
<configuration>
-------------------------------------------------------------------------- 추가
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
---------------------------------------------------------------------------------
</configuration>
[root@server01 ~]# vi yarn-site.xml
<configuration>
-------------------------------------------------------------------------- 추가
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>server01:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>server01:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>server01:8040</value>
</property>
---------------------------------------------------------------------------------
</configuration>
------- scp 명령어 이용
------- 각 Server에서 확인
[root@server01 ~]# scp -r /usr/local/hadoop-2.10.1 root@server02:/usr/local
[root@server01 ~]# scp -r /etc/profile root@server02:/etc/profile
[root@server02 ~]cat /etc/profile
[root@server02 ~]source /etc/profile
------------------------------------------------ 1. Hadoop Format
---- format 명령어는 /usr/local/hadoop-2.9.2/bin/ 에 있다.
---- 환경변수에 /usr/local/hadoop-2.9.2/sbin/으로 등록 되어 있으니 먼저 이동 후 진행
[root@server01 ~]# cd /usr/local/hadoop-2.9.2/bin/
-- 이 명령어는 실행은 되지만 Deprecated라고 뜬다
[root@server01 ~]# hadoop namenode -format
-- hdfs 명령어 이용
[root@server01 ~]# hdfs namenode -format
------------------------------------------------ 2 Hadoop 실행
-- datanode , namenode on
[root@server01 ~]# start-dfs.sh
-- resource manager on
[root@server01 ~]# start-yarn.sh
-- 위 두 명령어를 한번에 주는 명령어
-- 실행은 되지만 Deprecated라고 뜬다
[root@server01 ~]# start-all.sh
------------------------------------------------ 3 Hadoop 실행 확인
[root@server01 ~]# jps
2949 SecondaryNameNode
2760 NameNode
3101 ResourceManager
3423 Jps
[root@server02 ~]# jps
2050 NodeManager
1942 DataNode
2183 Jps
1019 Bootstrap
[root@server03 ~]# jps
2064 Jps
1931 NodeManager
1823 DataNode
[root@server01 ~]# yarn node -list
-- Web Browser 에서 확인
http://192.168.56.101:50070/
http://192.168.56.102:50075/
http://192.168.56.101:50075/
------------------------------------------------ 4 Hadoop 종료
-- datanode , namenode off
[root@server01 ~]# stop-dfs.sh
-- resource manager off
[root@server01 ~]# stop-yarn.sh
-- 위 두 명령어를 한번에 주는 명령어
-- 실행은 되지만 Deprecated라고 뜬다
[root@server01 ~]# stop-all.sh
---- 예제 프로그램을 root 의 홈 디렉토리로 Copy
[root@server01 ~]# cp /usr/local/hadoop-2.10.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.10.1.jar ~
---- 예제 파일을 저장할 Hadoop 디렉토리 생성
[root@server01 ~]# hadoop fs -mkdir /input
[root@server01 ~]# hadoop fs -ls /
---- 예제 파일을 input에 저장
---- 예제파일이 없으므로 hadoop README.txt 사용
[root@server01 ~]# hadoop fs -put /usr/local/hadoop-2.10.1/README.txt /input/
---- wordCount 예제 실행
[root@server01 ~]# hadoop jar hadoop-mapreduce-examples-2.10.1.jar wordcount /input /output
[root@server01 ~]# hadoop fs -ls /
Found 3 items
drwxr-xr-x - root supergroup 0 2019-12-11 14:26 /input
drwxr-xr-x - root supergroup 0 2019-12-11 14:46 /output
drwx------ - root supergroup 0 2019-12-11 14:46 /tmp
---- web browser
http://192.168.56.101:50070/ -> Utilities -> Browse the filesystem 에서도 확인 가능