2021-11-02 11:59:58 +05:30
# This test is very comprehensive. It tests whether all hadoop services work well with each other.
# Run this when updating the Hadoop package or making significant changes to the hadoop module.
# For a more basic test, see hdfs.nix and yarn.nix
2022-01-08 18:38:34 +05:30
import ../make-test-python.nix (
{ package , . . . }:
{
name = " h a d o o p - c o m b i n e d " ;
2021-10-21 02:03:56 +05:30
2022-01-08 18:38:34 +05:30
nodes =
let
coreSite = {
" f s . d e f a u l t F S " = " h d f s : / / n s 1 " ;
} ;
hdfsSite = {
# HA Quorum Journal Manager configuration
" d f s . n a m e s e r v i c e s " = " n s 1 " ;
" d f s . h a . n a m e n o d e s . n s 1 " = " n n 1 , n n 2 " ;
2022-03-02 12:50:01 +05:30
" d f s . n a m e n o d e . s h a r e d . e d i t s . d i r . n s 1 " = " q j o u r n a l : / / j n 1 : 8 4 8 5 ; j n 2 : 8 4 8 5 ; j n 3 : 8 4 8 5 / n s 1 " ;
2022-01-08 18:38:34 +05:30
" d f s . n a m e n o d e . r p c - a d d r e s s . n s 1 . n n 1 " = " n n 1 : 8 0 2 0 " ;
" d f s . n a m e n o d e . r p c - a d d r e s s . n s 1 . n n 2 " = " n n 2 : 8 0 2 0 " ;
" d f s . n a m e n o d e . s e r v i c e r p c - a d d r e s s . n s 1 . n n 1 " = " n n 1 : 8 0 2 2 " ;
" d f s . n a m e n o d e . s e r v i c e r p c - a d d r e s s . n s 1 . n n 2 " = " n n 2 : 8 0 2 2 " ;
" d f s . n a m e n o d e . h t t p - a d d r e s s . n s 1 . n n 1 " = " n n 1 : 9 8 7 0 " ;
" d f s . n a m e n o d e . h t t p - a d d r e s s . n s 1 . n n 2 " = " n n 2 : 9 8 7 0 " ;
2021-11-02 11:59:58 +05:30
2022-01-08 18:38:34 +05:30
# Automatic failover configuration
" d f s . c l i e n t . f a i l o v e r . p r o x y . p r o v i d e r . n s 1 " =
" o r g . a p a c h e . h a d o o p . h d f s . s e r v e r . n a m e n o d e . h a . C o n f i g u r e d F a i l o v e r P r o x y P r o v i d e r " ;
" d f s . h a . a u t o m a t i c - f a i l o v e r . e n a b l e d . n s 1 " = " t r u e " ;
" d f s . h a . f e n c i n g . m e t h o d s " = " s h e l l ( t r u e ) " ;
" h a . z o o k e e p e r . q u o r u m " = " z k 1 : 2 1 8 1 " ;
} ;
2022-03-02 12:50:01 +05:30
yarnSite = {
2022-01-08 18:38:34 +05:30
" y a r n . r e s o u r c e m a n a g e r . z k - a d d r e s s " = " z k 1 : 2 1 8 1 " ;
" y a r n . r e s o u r c e m a n a g e r . h a . e n a b l e d " = " t r u e " ;
" y a r n . r e s o u r c e m a n a g e r . h a . r m - i d s " = " r m 1 , r m 2 " ;
" y a r n . r e s o u r c e m a n a g e r . h o s t n a m e . r m 1 " = " r m 1 " ;
" y a r n . r e s o u r c e m a n a g e r . h o s t n a m e . r m 2 " = " r m 2 " ;
" y a r n . r e s o u r c e m a n a g e r . h a . a u t o m a t i c - f a i l o v e r . e n a b l e d " = " t r u e " ;
" y a r n . r e s o u r c e m a n a g e r . c l u s t e r - i d " = " c l u s t e r 1 " ;
# yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in
# hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70
# that causes AM containers to fail otherwise.
" y a r n . r e s o u r c e m a n a g e r . w e b a p p . a d d r e s s . r m 1 " = " r m 1 : 8 0 8 8 " ;
" y a r n . r e s o u r c e m a n a g e r . w e b a p p . a d d r e s s . r m 2 " = " r m 2 : 8 0 8 8 " ;
} ;
in
{
zk1 =
{ . . . }:
{
services . zookeeper . enable = true ;
networking . firewall . allowedTCPPorts = [ 2181 ] ;
} ;
2021-11-02 11:59:58 +05:30
2022-01-08 18:38:34 +05:30
# HDFS cluster
nn1 =
{ . . . }:
{
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . namenode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
2022-01-08 18:38:34 +05:30
hdfs . zkfc . enable = true ;
2024-12-10 20:26:33 +01:00
} ;
2022-01-08 18:38:34 +05:30
} ;
nn2 =
{ . . . }:
{
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . namenode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
2022-01-08 18:38:34 +05:30
hdfs . zkfc . enable = true ;
2024-12-10 20:26:33 +01:00
} ;
2022-01-08 18:38:34 +05:30
} ;
2021-10-21 02:03:56 +05:30
2022-01-08 18:38:34 +05:30
jn1 =
{ . . . }:
{
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . journalnode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
jn2 =
{ . . . }:
{
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . journalnode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
jn3 =
{ . . . }:
{
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . journalnode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
2021-11-02 11:59:58 +05:30
2022-01-08 18:38:34 +05:30
dn1 =
{ . . . }:
{
2024-06-24 09:25:57 +05:30
virtualisation . diskSize = 4096 ;
2022-01-08 18:38:34 +05:30
services . hadoop = {
inherit package coreSite hdfsSite ;
hdfs . datanode = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
2021-11-02 11:59:58 +05:30
2022-01-08 18:38:34 +05:30
# YARN cluster
rm1 =
{ options , . . . }:
{
services . hadoop = {
2022-03-02 12:50:01 +05:30
inherit
package
coreSite
hdfsSite
yarnSite
;
2022-01-08 18:38:34 +05:30
yarn . resourcemanager = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
rm2 =
{ options , . . . }:
{
services . hadoop = {
2022-03-02 12:50:01 +05:30
inherit
package
coreSite
hdfsSite
yarnSite
;
2022-01-08 18:38:34 +05:30
yarn . resourcemanager = {
enable = true ;
openFirewall = true ;
2024-12-10 20:26:33 +01:00
} ;
} ;
2022-01-08 18:38:34 +05:30
} ;
nm1 =
{ options , . . . }:
{
virtualisation . memorySize = 2048 ;
2022-02-27 11:52:18 +05:30
services . hadoop = {
2024-12-10 20:26:33 +01:00
inherit
package
coreSite
hdfsSite
yarnSite
;
2022-01-08 18:38:34 +05:30
yarn . nodemanager = {
2022-02-27 11:52:18 +05:30
enable = true ;
2022-01-08 18:38:34 +05:30
openFirewall = true ;
2024-12-13 23:18:14 +05:30
useCGroups = false ;
2024-12-10 20:26:33 +01:00
} ;
} ;
} ;
client =
2022-02-27 11:52:18 +05:30
{ options , . . . }:
2024-12-10 20:26:33 +01:00
{
2022-01-08 18:38:34 +05:30
services . hadoop = {
gatewayRole . enable = true ;
2024-12-10 20:26:33 +01:00
inherit
package
2022-01-08 18:38:34 +05:30
coreSite
2024-12-10 20:26:33 +01:00
hdfsSite
yarnSite
2022-01-08 18:38:34 +05:30
;
2024-12-10 20:26:33 +01:00
} ;
2022-01-08 18:38:34 +05:30
} ;
2022-02-27 11:52:18 +05:30
} ;
2021-10-21 02:03:56 +05:30
testScript = ''
start_all ( )
2021-11-02 11:59:58 +05:30
#### HDFS tests ####
zk1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
jn1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
jn2 . wait_for_unit ( " n e t w o r k . t a r g e t " )
jn3 . wait_for_unit ( " n e t w o r k . t a r g e t " )
nn1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
nn2 . wait_for_unit ( " n e t w o r k . t a r g e t " )
dn1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
zk1 . wait_for_unit ( " z o o k e e p e r " )
jn1 . wait_for_unit ( " h d f s - j o u r n a l n o d e " )
jn2 . wait_for_unit ( " h d f s - j o u r n a l n o d e " )
jn3 . wait_for_unit ( " h d f s - j o u r n a l n o d e " )
zk1 . wait_for_open_port ( 2181 )
jn1 . wait_for_open_port ( 8480 )
jn1 . wait_for_open_port ( 8485 )
jn2 . wait_for_open_port ( 8480 )
jn2 . wait_for_open_port ( 8485 )
# Namenodes must be stopped before initializing the cluster
nn1 . succeed ( " s y s t e m c t l s t o p h d f s - n a m e n o d e " )
nn2 . succeed ( " s y s t e m c t l s t o p h d f s - n a m e n o d e " )
nn1 . succeed ( " s y s t e m c t l s t o p h d f s - z k f c " )
nn2 . succeed ( " s y s t e m c t l s t o p h d f s - z k f c " )
# Initialize zookeeper for failover controller
2023-11-07 19:57:23 +03:00
nn1 . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s z k f c - f o r m a t Z K " )
2021-11-02 11:59:58 +05:30
# Format NN1 and start it
2023-11-07 19:57:23 +03:00
nn1 . succeed ( " s u d o - u h d f s s y s t e m d - c a t h a d o o p n a m e n o d e - f o r m a t " )
2021-11-02 11:59:58 +05:30
nn1 . succeed ( " s y s t e m c t l s t a r t h d f s - n a m e n o d e " )
nn1 . wait_for_open_port ( 9870 )
nn1 . wait_for_open_port ( 8022 )
nn1 . wait_for_open_port ( 8020 )
# Bootstrap NN2 from NN1 and start it
2023-11-07 19:57:23 +03:00
nn2 . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s n a m e n o d e - b o o t s t r a p S t a n d b y " )
2021-11-02 11:59:58 +05:30
nn2 . succeed ( " s y s t e m c t l s t a r t h d f s - n a m e n o d e " )
nn2 . wait_for_open_port ( 9870 )
nn2 . wait_for_open_port ( 8022 )
nn2 . wait_for_open_port ( 8020 )
2023-11-07 19:57:23 +03:00
nn1 . succeed ( " s y s t e m d - c a t n e t s t a t - t u l p n e " )
2021-11-02 11:59:58 +05:30
# Start failover controllers
nn1 . succeed ( " s y s t e m c t l s t a r t h d f s - z k f c " )
nn2 . succeed ( " s y s t e m c t l s t a r t h d f s - z k f c " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
# DN should have started by now, but confirm anyway
dn1 . wait_for_unit ( " h d f s - d a t a n o d e " )
# Print states of namenodes
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2021-11-02 11:59:58 +05:30
# Wait for cluster to exit safemode
2022-02-27 11:52:18 +05:30
client . succeed ( " s u d o - u h d f s h d f s d f s a d m i n - s a f e m o d e w a i t " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2021-11-02 11:59:58 +05:30
# test R/W
2022-02-27 11:52:18 +05:30
client . succeed ( " e c h o t e s t f i l e c o n t e n t s | s u d o - u h d f s h d f s d f s - p u t - / t e s t f i l e " )
assert " t e s t f i l e c o n t e n t s " in client . succeed ( " s u d o - u h d f s h d f s d f s - c a t / t e s t f i l e " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
# Test NN failover
nn1 . succeed ( " s y s t e m c t l s t o p h d f s - n a m e n o d e " )
2022-02-27 11:52:18 +05:30
assert " a c t i v e " in client . succeed ( " s u d o - u h d f s h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2022-02-27 11:52:18 +05:30
assert " t e s t f i l e c o n t e n t s " in client . succeed ( " s u d o - u h d f s h d f s d f s - c a t / t e s t f i l e " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
nn1 . succeed ( " s y s t e m c t l s t a r t h d f s - n a m e n o d e " )
nn1 . wait_for_open_port ( 9870 )
nn1 . wait_for_open_port ( 8022 )
nn1 . wait_for_open_port ( 8020 )
2022-02-27 11:52:18 +05:30
assert " s t a n d b y " in client . succeed ( " s u d o - u h d f s h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u h d f s s y s t e m d - c a t h d f s h a a d m i n - g e t A l l S e r v i c e S t a t e " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
#### YARN tests ####
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
rm1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
rm2 . wait_for_unit ( " n e t w o r k . t a r g e t " )
nm1 . wait_for_unit ( " n e t w o r k . t a r g e t " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
rm1 . wait_for_unit ( " y a r n - r e s o u r c e m a n a g e r " )
rm1 . wait_for_open_port ( 8088 )
rm2 . wait_for_unit ( " y a r n - r e s o u r c e m a n a g e r " )
rm2 . wait_for_open_port ( 8088 )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
nm1 . wait_for_unit ( " y a r n - n o d e m a n a g e r " )
nm1 . wait_for_open_port ( 8042 )
nm1 . wait_for_open_port ( 8040 )
2022-02-27 11:52:18 +05:30
client . wait_until_succeeds ( " y a r n n o d e - l i s t | g r e p N o d e s : 1 " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u y a r n s y s t e m d - c a t y a r n r m a d m i n - g e t A l l S e r v i c e S t a t e " )
client . succeed ( " s u d o - u y a r n s y s t e m d - c a t y a r n n o d e - l i s t " )
2021-10-21 02:03:56 +05:30
2021-11-02 11:59:58 +05:30
# Test RM failover
rm1 . succeed ( " s y s t e m c t l s t o p y a r n - r e s o u r c e m a n a g e r " )
2022-02-27 11:52:18 +05:30
assert " s t a n d b y " not in client . succeed ( " s u d o - u y a r n y a r n r m a d m i n - g e t A l l S e r v i c e S t a t e " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u y a r n s y s t e m d - c a t y a r n r m a d m i n - g e t A l l S e r v i c e S t a t e " )
2021-11-02 11:59:58 +05:30
rm1 . succeed ( " s y s t e m c t l s t a r t y a r n - r e s o u r c e m a n a g e r " )
rm1 . wait_for_unit ( " y a r n - r e s o u r c e m a n a g e r " )
rm1 . wait_for_open_port ( 8088 )
2022-02-27 11:52:18 +05:30
assert " s t a n d b y " in client . succeed ( " s u d o - u y a r n y a r n r m a d m i n - g e t A l l S e r v i c e S t a t e " )
2023-11-07 19:57:23 +03:00
client . succeed ( " s u d o - u y a r n s y s t e m d - c a t y a r n r m a d m i n - g e t A l l S e r v i c e S t a t e " )
2021-10-21 02:03:56 +05:30
2023-09-22 16:34:07 +05:30
assert " E s t i m a t e d v a l u e o f P i i s " in client . succeed ( " H A D O O P _ U S E R _ N A M E = h d f s y a r n j a r $ ( r e a d l i n k $ ( w h i c h y a r n ) | s e d - r ' s ~ b i n / y a r n ~ s h a r e / h a d o o p / m a p r e d u c e / h a d o o p - m a p r e d u c e - e x a m p l e s - * . j a r ~ g ' ) p i 2 1 0 " )
2022-02-27 11:52:18 +05:30
assert " S U C C E E D E D " in client . succeed ( " y a r n a p p l i c a t i o n - l i s t - a p p S t a t e s F I N I S H E D " )
2021-10-21 02:03:56 +05:30
'' ;
2021-11-02 11:59:58 +05:30
}
)