mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-07-14 06:00:33 +03:00
Merge pull request #154004 from illustris/hadoop
This commit is contained in:
commit
b734f40478
12 changed files with 639 additions and 380 deletions
|
@ -1,6 +1,6 @@
|
|||
{ cfg, pkgs, lib }:
|
||||
let
|
||||
propertyXml = name: value: ''
|
||||
propertyXml = name: value: lib.optionalString (value != null) ''
|
||||
<property>
|
||||
<name>${name}</name>
|
||||
<value>${builtins.toString value}</value>
|
||||
|
@ -29,16 +29,16 @@ let
|
|||
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
|
||||
'';
|
||||
in
|
||||
pkgs.runCommand "hadoop-conf" {} ''
|
||||
pkgs.runCommand "hadoop-conf" {} (with cfg; ''
|
||||
mkdir -p $out/
|
||||
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
|
||||
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
|
||||
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
|
||||
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
|
||||
cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/
|
||||
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
|
||||
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
|
||||
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
|
||||
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
|
||||
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
|
||||
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
|
||||
cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
|
||||
cp ${cfg.log4jProperties} $out/log4j.properties
|
||||
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
|
||||
''
|
||||
cp ${log4jProperties} $out/log4j.properties
|
||||
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
|
||||
'')
|
||||
|
|
|
@ -21,24 +21,50 @@ with lib;
|
|||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
|
||||
'';
|
||||
};
|
||||
coreSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to core-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
hdfsSite = mkOption {
|
||||
hdfsSiteDefault = mkOption {
|
||||
default = {
|
||||
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.http-address" = "0.0.0.0:9870";
|
||||
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.http-bind-host" = "0.0.0.0";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for hdfs-site.xml
|
||||
'';
|
||||
};
|
||||
hdfsSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
{
|
||||
"dfs.nameservices" = "namenode1";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop hdfs-site.xml definition
|
||||
Additional options and overrides for hdfs-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
|
||||
'';
|
||||
};
|
||||
hdfsSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to hdfs-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
mapredSite = mkOption {
|
||||
mapredSiteDefault = mkOption {
|
||||
default = {
|
||||
"mapreduce.framework.name" = "yarn";
|
||||
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
|
||||
|
@ -54,18 +80,25 @@ with lib;
|
|||
}
|
||||
'';
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for mapred-site.xml
|
||||
'';
|
||||
};
|
||||
mapredSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
options.services.hadoop.mapredSite.default // {
|
||||
{
|
||||
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop mapred-site.xml definition
|
||||
Additional options and overrides for mapred-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
|
||||
'';
|
||||
};
|
||||
|
||||
yarnSite = mkOption {
|
||||
yarnSiteDefault = mkOption {
|
||||
default = {
|
||||
"yarn.nodemanager.admin-env" = "PATH=$PATH";
|
||||
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
|
||||
|
@ -77,19 +110,34 @@ with lib;
|
|||
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
|
||||
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
|
||||
"yarn.resourcemanager.bind-host" = "0.0.0.0";
|
||||
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
|
||||
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for yarn-site.xml
|
||||
'';
|
||||
};
|
||||
yarnSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
options.services.hadoop.yarnSite.default // {
|
||||
{
|
||||
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop yarn-site.xml definition
|
||||
Additional options and overrides for yarn-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
|
||||
'';
|
||||
};
|
||||
yarnSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to yarn-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
httpfsSite = mkOption {
|
||||
default = { };
|
||||
|
@ -123,6 +171,7 @@ with lib;
|
|||
"yarn.nodemanager.linux-container-executor.group"="hadoop";
|
||||
"min.user.id"=1000;
|
||||
"feature.terminal.enabled"=1;
|
||||
"feature.mount-cgroup.enabled" = 1;
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
|
@ -148,6 +197,8 @@ with lib;
|
|||
description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
|
||||
};
|
||||
|
||||
gatewayRole.enable = mkEnableOption "gateway role for deploying hadoop configs";
|
||||
|
||||
package = mkOption {
|
||||
type = types.package;
|
||||
default = pkgs.hadoop;
|
||||
|
@ -157,20 +208,16 @@ with lib;
|
|||
};
|
||||
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf (builtins.hasAttr "yarn" config.users.users ||
|
||||
builtins.hasAttr "hdfs" config.users.users ||
|
||||
builtins.hasAttr "httpfs" config.users.users) {
|
||||
users.groups.hadoop = {
|
||||
gid = config.ids.gids.hadoop;
|
||||
};
|
||||
environment = {
|
||||
systemPackages = [ cfg.package ];
|
||||
etc."hadoop-conf".source = let
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
in "${hadoopConf}";
|
||||
};
|
||||
})
|
||||
|
||||
];
|
||||
config = mkIf cfg.gatewayRole.enable {
|
||||
users.groups.hadoop = {
|
||||
gid = config.ids.gids.hadoop;
|
||||
};
|
||||
environment = {
|
||||
systemPackages = [ cfg.package ];
|
||||
etc."hadoop-conf".source = let
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
in "${hadoopConf}";
|
||||
variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,191 +1,191 @@
|
|||
{ config, lib, pkgs, ...}:
|
||||
{ config, lib, pkgs, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.services.hadoop;
|
||||
|
||||
# Config files for hadoop services
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
restartIfChanged = mkOption {
|
||||
type = types.bool;
|
||||
description = ''
|
||||
Automatically restart the service on config change.
|
||||
This can be set to false to defer restarts on clusters running critical applications.
|
||||
Please consider the security implications of inadvertently running an older version,
|
||||
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
|
||||
# Generator for HDFS service options
|
||||
hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
|
||||
enable = mkEnableOption serviceName;
|
||||
restartIfChanged = mkOption {
|
||||
type = types.bool;
|
||||
description = ''
|
||||
Automatically restart the service on config change.
|
||||
This can be set to false to defer restarts on clusters running critical applications.
|
||||
Please consider the security implications of inadvertently running an older version,
|
||||
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
extraFlags = mkOption{
|
||||
type = with types; listOf str;
|
||||
default = [];
|
||||
description = "Extra command line flags to pass to ${serviceName}";
|
||||
example = [
|
||||
"-Dcom.sun.management.jmxremote"
|
||||
"-Dcom.sun.management.jmxremote.port=8010"
|
||||
];
|
||||
};
|
||||
extraEnv = mkOption{
|
||||
type = with types; attrsOf str;
|
||||
default = {};
|
||||
description = "Extra environment variables for ${serviceName}";
|
||||
};
|
||||
} // (optionalAttrs firewallOption {
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Open firewall ports for ${serviceName}.";
|
||||
};
|
||||
}) // (optionalAttrs (extraOpts != null) extraOpts);
|
||||
|
||||
# Generator for HDFS service configs
|
||||
hadoopServiceConfig =
|
||||
{ name
|
||||
, serviceOptions ? cfg.hdfs."${toLower name}"
|
||||
, description ? "Hadoop HDFS ${name}"
|
||||
, User ? "hdfs"
|
||||
, allowedTCPPorts ? [ ]
|
||||
, preStart ? ""
|
||||
, environment ? { }
|
||||
, extraConfig ? { }
|
||||
}: (
|
||||
|
||||
mkIf serviceOptions.enable ( mkMerge [{
|
||||
systemd.services."hdfs-${toLower name}" = {
|
||||
inherit description preStart;
|
||||
environment = environment // serviceOptions.extraEnv;
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (serviceOptions) restartIfChanged;
|
||||
serviceConfig = {
|
||||
inherit User;
|
||||
SyslogIdentifier = "hdfs-${toLower name}";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
services.hadoop.gatewayRole.enable = true;
|
||||
|
||||
networking.firewall.allowedTCPPorts = mkIf
|
||||
((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
|
||||
allowedTCPPorts;
|
||||
} extraConfig])
|
||||
);
|
||||
|
||||
in
|
||||
{
|
||||
options.services.hadoop.hdfs = {
|
||||
namenode = {
|
||||
enable = mkEnableOption "Whether to run the HDFS NameNode";
|
||||
|
||||
namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // {
|
||||
formatOnInit = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Format HDFS namenode on first start. This is useful for quickly spinning up ephemeral HDFS clusters with a single namenode.
|
||||
For HA clusters, initialization involves multiple steps across multiple nodes. Follow [this guide](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html)
|
||||
to initialize an HA cluster manually.
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for namenode
|
||||
Format HDFS namenode on first start. This is useful for quickly spinning up
|
||||
ephemeral HDFS clusters with a single namenode.
|
||||
For HA clusters, initialization involves multiple steps across multiple nodes.
|
||||
Follow this guide to initialize an HA cluster manually:
|
||||
<link xlink:href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html"/>
|
||||
'';
|
||||
};
|
||||
};
|
||||
datanode = {
|
||||
enable = mkEnableOption "Whether to run the HDFS DataNode";
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for datanode
|
||||
'';
|
||||
|
||||
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
|
||||
dataDirs = mkOption {
|
||||
default = null;
|
||||
description = "Tier and path definitions for datanode storage.";
|
||||
type = with types; nullOr (listOf (submodule {
|
||||
options = {
|
||||
type = mkOption {
|
||||
type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
|
||||
description = ''
|
||||
Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
|
||||
'';
|
||||
};
|
||||
path = mkOption {
|
||||
type = path;
|
||||
example = [ "/var/lib/hadoop/hdfs/dn" ];
|
||||
description = "Determines where on the local filesystem a data node should store its blocks.";
|
||||
};
|
||||
};
|
||||
}));
|
||||
};
|
||||
};
|
||||
journalnode = {
|
||||
enable = mkEnableOption "Whether to run the HDFS JournalNode";
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for journalnode
|
||||
'';
|
||||
};
|
||||
|
||||
journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
|
||||
|
||||
zkfc = hadoopServiceOption {
|
||||
serviceName = "HDFS ZooKeeper failover controller";
|
||||
firewallOption = false;
|
||||
};
|
||||
zkfc = {
|
||||
enable = mkEnableOption "Whether to run the HDFS ZooKeeper failover controller";
|
||||
inherit restartIfChanged;
|
||||
};
|
||||
httpfs = {
|
||||
enable = mkEnableOption "Whether to run the HDFS HTTPfs server";
|
||||
|
||||
httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // {
|
||||
tempPath = mkOption {
|
||||
type = types.path;
|
||||
default = "/tmp/hadoop/httpfs";
|
||||
description = ''
|
||||
HTTPFS_TEMP path used by HTTPFS
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for HTTPFS
|
||||
'';
|
||||
description = "HTTPFS_TEMP path used by HTTPFS";
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf cfg.hdfs.namenode.enable {
|
||||
systemd.services.hdfs-namenode = {
|
||||
description = "Hadoop HDFS NameNode";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.hdfs.namenode) restartIfChanged;
|
||||
|
||||
preStart = (mkIf cfg.hdfs.namenode.formatOnInit ''
|
||||
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
|
||||
'');
|
||||
|
||||
serviceConfig = {
|
||||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-namenode";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
|
||||
(hadoopServiceConfig {
|
||||
name = "NameNode";
|
||||
allowedTCPPorts = [
|
||||
9870 # namenode.http-address
|
||||
8020 # namenode.rpc-address
|
||||
8022 # namenode. servicerpc-address
|
||||
]);
|
||||
8022 # namenode.servicerpc-address
|
||||
8019 # dfs.ha.zkfc.port
|
||||
];
|
||||
preStart = (mkIf cfg.hdfs.namenode.formatOnInit
|
||||
"${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true"
|
||||
);
|
||||
})
|
||||
(mkIf cfg.hdfs.datanode.enable {
|
||||
systemd.services.hdfs-datanode = {
|
||||
description = "Hadoop HDFS DataNode";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.hdfs.datanode) restartIfChanged;
|
||||
|
||||
serviceConfig = {
|
||||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-datanode";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
|
||||
(hadoopServiceConfig {
|
||||
name = "DataNode";
|
||||
# port numbers for datanode changed between hadoop 2 and 3
|
||||
allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [
|
||||
9864 # datanode.http.address
|
||||
9866 # datanode.address
|
||||
9867 # datanode.ipc.address
|
||||
]);
|
||||
] else [
|
||||
50075 # datanode.http.address
|
||||
50010 # datanode.address
|
||||
50020 # datanode.ipc.address
|
||||
];
|
||||
extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
|
||||
if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
|
||||
})
|
||||
(mkIf cfg.hdfs.journalnode.enable {
|
||||
systemd.services.hdfs-journalnode = {
|
||||
description = "Hadoop HDFS JournalNode";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.hdfs.journalnode) restartIfChanged;
|
||||
|
||||
serviceConfig = {
|
||||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-journalnode";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} journalnode";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.journalnode.openFirewall [
|
||||
(hadoopServiceConfig {
|
||||
name = "JournalNode";
|
||||
allowedTCPPorts = [
|
||||
8480 # dfs.journalnode.http-address
|
||||
8485 # dfs.journalnode.rpc-address
|
||||
]);
|
||||
];
|
||||
})
|
||||
(mkIf cfg.hdfs.zkfc.enable {
|
||||
systemd.services.hdfs-zkfc = {
|
||||
description = "Hadoop HDFS ZooKeeper failover controller";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.hdfs.zkfc) restartIfChanged;
|
||||
|
||||
serviceConfig = {
|
||||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-zkfc";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} zkfc";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
(hadoopServiceConfig {
|
||||
name = "zkfc";
|
||||
description = "Hadoop HDFS ZooKeeper failover controller";
|
||||
})
|
||||
(mkIf cfg.hdfs.httpfs.enable {
|
||||
systemd.services.hdfs-httpfs = {
|
||||
description = "Hadoop httpfs";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.hdfs.httpfs) restartIfChanged;
|
||||
|
||||
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
|
||||
|
||||
preStart = ''
|
||||
mkdir -p $HTTPFS_TEMP
|
||||
'';
|
||||
|
||||
serviceConfig = {
|
||||
User = "httpfs";
|
||||
SyslogIdentifier = "hdfs-httpfs";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} httpfs";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.httpfs.openFirewall [
|
||||
(hadoopServiceConfig {
|
||||
name = "HTTPFS";
|
||||
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
|
||||
preStart = "mkdir -p $HTTPFS_TEMP";
|
||||
User = "httpfs";
|
||||
allowedTCPPorts = [
|
||||
14000 # httpfs.http.port
|
||||
]);
|
||||
];
|
||||
})
|
||||
(mkIf (
|
||||
cfg.hdfs.namenode.enable || cfg.hdfs.datanode.enable || cfg.hdfs.journalnode.enable || cfg.hdfs.zkfc.enable
|
||||
) {
|
||||
|
||||
(mkIf cfg.gatewayRole.enable {
|
||||
users.users.hdfs = {
|
||||
description = "Hadoop HDFS user";
|
||||
group = "hadoop";
|
||||
|
@ -199,5 +199,6 @@ in
|
|||
isSystemUser = true;
|
||||
};
|
||||
})
|
||||
|
||||
];
|
||||
}
|
||||
|
|
|
@ -13,23 +13,77 @@ let
|
|||
'';
|
||||
default = false;
|
||||
};
|
||||
extraFlags = mkOption{
|
||||
type = with types; listOf str;
|
||||
default = [];
|
||||
description = "Extra command line flags to pass to the service";
|
||||
example = [
|
||||
"-Dcom.sun.management.jmxremote"
|
||||
"-Dcom.sun.management.jmxremote.port=8010"
|
||||
];
|
||||
};
|
||||
extraEnv = mkOption{
|
||||
type = with types; attrsOf str;
|
||||
default = {};
|
||||
description = "Extra environment variables";
|
||||
};
|
||||
in
|
||||
{
|
||||
options.services.hadoop.yarn = {
|
||||
resourcemanager = {
|
||||
enable = mkEnableOption "Whether to run the Hadoop YARN ResourceManager";
|
||||
inherit restartIfChanged;
|
||||
enable = mkEnableOption "Hadoop YARN ResourceManager";
|
||||
inherit restartIfChanged extraFlags extraEnv;
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
default = false;
|
||||
description = ''
|
||||
Open firewall ports for resourcemanager
|
||||
'';
|
||||
};
|
||||
};
|
||||
nodemanager = {
|
||||
enable = mkEnableOption "Whether to run the Hadoop YARN NodeManager";
|
||||
inherit restartIfChanged;
|
||||
enable = mkEnableOption "Hadoop YARN NodeManager";
|
||||
inherit restartIfChanged extraFlags extraEnv;
|
||||
|
||||
resource = {
|
||||
cpuVCores = mkOption {
|
||||
description = "Number of vcores that can be allocated for containers.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
maximumAllocationVCores = mkOption {
|
||||
description = "The maximum virtual CPU cores any container can be allocated.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
memoryMB = mkOption {
|
||||
description = "Amount of physical memory, in MB, that can be allocated for containers.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
maximumAllocationMB = mkOption {
|
||||
description = "The maximum physical memory any container can be allocated.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
useCGroups = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Use cgroups to enforce resource limits on containers
|
||||
'';
|
||||
};
|
||||
|
||||
localDir = mkOption {
|
||||
description = "List of directories to store localized files in.";
|
||||
type = with types; nullOr (listOf path);
|
||||
example = [ "/var/lib/hadoop/yarn/nm" ];
|
||||
default = null;
|
||||
};
|
||||
|
||||
addBinBash = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
|
@ -39,7 +93,7 @@ in
|
|||
};
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
default = false;
|
||||
description = ''
|
||||
Open firewall ports for nodemanager.
|
||||
Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened.
|
||||
|
@ -49,10 +103,7 @@ in
|
|||
};
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf (
|
||||
cfg.yarn.resourcemanager.enable || cfg.yarn.nodemanager.enable
|
||||
) {
|
||||
|
||||
(mkIf cfg.gatewayRole.enable {
|
||||
users.users.yarn = {
|
||||
description = "Hadoop YARN user";
|
||||
group = "hadoop";
|
||||
|
@ -65,15 +116,19 @@ in
|
|||
description = "Hadoop YARN ResourceManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.yarn.resourcemanager) restartIfChanged;
|
||||
environment = cfg.yarn.resourcemanager.extraEnv;
|
||||
|
||||
serviceConfig = {
|
||||
User = "yarn";
|
||||
SyslogIdentifier = "yarn-resourcemanager";
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" resourcemanager";
|
||||
" resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
services.hadoop.gatewayRole.enable = true;
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
|
||||
8088 # resourcemanager.webapp.address
|
||||
8030 # resourcemanager.scheduler.address
|
||||
|
@ -94,6 +149,7 @@ in
|
|||
description = "Hadoop YARN NodeManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.yarn.nodemanager) restartIfChanged;
|
||||
environment = cfg.yarn.nodemanager.extraEnv;
|
||||
|
||||
preStart = ''
|
||||
# create log dir
|
||||
|
@ -101,8 +157,9 @@ in
|
|||
chown yarn:hadoop /var/log/hadoop/yarn/nodemanager
|
||||
|
||||
# set up setuid container executor binary
|
||||
umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true
|
||||
rm -rf /run/wrappers/yarn-nodemanager/ || true
|
||||
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop}
|
||||
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu}
|
||||
cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
|
||||
chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
|
||||
chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
|
||||
|
@ -114,11 +171,26 @@ in
|
|||
SyslogIdentifier = "yarn-nodemanager";
|
||||
PermissionsStartOnly = true;
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" nodemanager";
|
||||
" nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
services.hadoop.gatewayRole.enable = true;
|
||||
|
||||
services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
|
||||
"yarn.nodemanager.local-dirs" = localDir;
|
||||
"yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
|
||||
"yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
|
||||
"yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
|
||||
"yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
|
||||
} // mkIf useCGroups {
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
|
||||
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPortRanges = [
|
||||
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
|
||||
];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue