mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-07-13 21:50:33 +03:00
Merge pull request #92048 from markuskowa/add-pmix
pmix: init at 3.1.5, add support to slurm, openmpi
This commit is contained in:
commit
6ce044250b
6 changed files with 134 additions and 38 deletions
|
@ -1,16 +1,52 @@
|
||||||
import ./make-test-python.nix ({ lib, ... }:
|
import ./make-test-python.nix ({ lib, pkgs, ... }:
|
||||||
let
|
let
|
||||||
mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
|
||||||
|
|
||||||
slurmconfig = {
|
slurmconfig = {
|
||||||
controlMachine = "control";
|
services.slurm = {
|
||||||
nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
|
controlMachine = "control";
|
||||||
partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
|
nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
|
||||||
extraConfig = ''
|
partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
|
||||||
AccountingStorageHost=dbd
|
extraConfig = ''
|
||||||
AccountingStorageType=accounting_storage/slurmdbd
|
AccountingStorageHost=dbd
|
||||||
'';
|
AccountingStorageType=accounting_storage/slurmdbd
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
environment.systemPackages = [ mpitest ];
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
systemd.tmpfiles.rules = [
|
||||||
|
"f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mpitest = let
|
||||||
|
mpitestC = pkgs.writeText "mpitest.c" ''
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int rank, size, length;
|
||||||
|
char name[512];
|
||||||
|
|
||||||
|
MPI_Init (&argc, &argv);
|
||||||
|
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
|
||||||
|
MPI_Comm_size (MPI_COMM_WORLD, &size);
|
||||||
|
MPI_Get_processor_name (name, &length);
|
||||||
|
|
||||||
|
if ( rank == 0 ) printf("size=%d\n", size);
|
||||||
|
|
||||||
|
printf ("%s: hello world from process %d of %d\n", name, rank, size);
|
||||||
|
|
||||||
|
MPI_Finalize ();
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
'';
|
||||||
|
in pkgs.runCommandNoCC "mpitest" {} ''
|
||||||
|
mkdir -p $out/bin
|
||||||
|
${pkgs.openmpi}/bin/mpicc ${mpitestC} -o $out/bin/mpitest
|
||||||
|
'';
|
||||||
in {
|
in {
|
||||||
name = "slurm";
|
name = "slurm";
|
||||||
|
|
||||||
|
@ -21,37 +57,40 @@ in {
|
||||||
computeNode =
|
computeNode =
|
||||||
{ ...}:
|
{ ...}:
|
||||||
{
|
{
|
||||||
|
imports = [ slurmconfig ];
|
||||||
# TODO slurmd port and slurmctld port should be configurations and
|
# TODO slurmd port and slurmctld port should be configurations and
|
||||||
# automatically allowed by the firewall.
|
# automatically allowed by the firewall.
|
||||||
networking.firewall.enable = false;
|
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
client.enable = true;
|
client.enable = true;
|
||||||
} // slurmconfig;
|
};
|
||||||
};
|
};
|
||||||
in {
|
in {
|
||||||
|
|
||||||
control =
|
control =
|
||||||
{ ...}:
|
{ ...}:
|
||||||
{
|
{
|
||||||
networking.firewall.enable = false;
|
imports = [ slurmconfig ];
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
server.enable = true;
|
server.enable = true;
|
||||||
} // slurmconfig;
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
submit =
|
submit =
|
||||||
{ ...}:
|
{ ...}:
|
||||||
{
|
{
|
||||||
networking.firewall.enable = false;
|
imports = [ slurmconfig ];
|
||||||
services.slurm = {
|
services.slurm = {
|
||||||
enableStools = true;
|
enableStools = true;
|
||||||
} // slurmconfig;
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
dbd =
|
dbd =
|
||||||
{ pkgs, ... } :
|
{ pkgs, ... } :
|
||||||
{
|
{
|
||||||
networking.firewall.enable = false;
|
networking.firewall.enable = false;
|
||||||
|
systemd.tmpfiles.rules = [
|
||||||
|
"f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest"
|
||||||
|
];
|
||||||
services.slurm.dbdserver = {
|
services.slurm.dbdserver = {
|
||||||
enable = true;
|
enable = true;
|
||||||
storagePass = "password123";
|
storagePass = "password123";
|
||||||
|
@ -87,24 +126,7 @@ in {
|
||||||
''
|
''
|
||||||
start_all()
|
start_all()
|
||||||
|
|
||||||
# Set up authentification across the cluster
|
# Make sure DBD is up after DB initialzation
|
||||||
for node in [submit, control, dbd, node1, node2, node3]:
|
|
||||||
|
|
||||||
node.wait_for_unit("default.target")
|
|
||||||
|
|
||||||
node.succeed("mkdir /etc/munge")
|
|
||||||
node.succeed(
|
|
||||||
"echo '${mungekey}' > /etc/munge/munge.key"
|
|
||||||
)
|
|
||||||
node.succeed("chmod 0400 /etc/munge/munge.key")
|
|
||||||
node.succeed("chown munge:munge /etc/munge/munge.key")
|
|
||||||
node.succeed("systemctl restart munged")
|
|
||||||
|
|
||||||
node.wait_for_unit("munged")
|
|
||||||
|
|
||||||
|
|
||||||
# Restart the services since they have probably failed due to the munge init
|
|
||||||
# failure
|
|
||||||
with subtest("can_start_slurmdbd"):
|
with subtest("can_start_slurmdbd"):
|
||||||
dbd.succeed("systemctl restart slurmdbd")
|
dbd.succeed("systemctl restart slurmdbd")
|
||||||
dbd.wait_for_unit("slurmdbd.service")
|
dbd.wait_for_unit("slurmdbd.service")
|
||||||
|
@ -137,5 +159,8 @@ in {
|
||||||
# find the srun job from above in the database
|
# find the srun job from above in the database
|
||||||
control.succeed("sleep 5")
|
control.succeed("sleep 5")
|
||||||
control.succeed("sacct | grep hostname")
|
control.succeed("sacct | grep hostname")
|
||||||
|
|
||||||
|
with subtest("run_PMIx_mpitest"):
|
||||||
|
submit.succeed("srun -N 3 --mpi=pmix mpitest | grep size=3")
|
||||||
'';
|
'';
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{ stdenv, fetchurl, fetchpatch, gfortran, perl, libnl
|
{ stdenv, fetchurl, fetchpatch, gfortran, perl, libnl
|
||||||
, rdma-core, zlib, numactl, libevent, hwloc, targetPackages, symlinkJoin
|
, rdma-core, zlib, numactl, libevent, hwloc, targetPackages, symlinkJoin
|
||||||
, libpsm2, libfabric
|
, libpsm2, libfabric, pmix
|
||||||
|
|
||||||
# Enable CUDA support
|
# Enable CUDA support
|
||||||
, cudaSupport ? false, cudatoolkit ? null
|
, cudaSupport ? false, cudatoolkit ? null
|
||||||
|
@ -46,7 +46,7 @@ in stdenv.mkDerivation rec {
|
||||||
'';
|
'';
|
||||||
|
|
||||||
buildInputs = with stdenv; [ gfortran zlib ]
|
buildInputs = with stdenv; [ gfortran zlib ]
|
||||||
++ lib.optionals isLinux [ libnl numactl ]
|
++ lib.optionals isLinux [ libnl numactl pmix ]
|
||||||
++ lib.optionals cudaSupport [ cudatoolkit ]
|
++ lib.optionals cudaSupport [ cudatoolkit ]
|
||||||
++ [ libevent hwloc ]
|
++ [ libevent hwloc ]
|
||||||
++ lib.optional (isLinux || isFreeBSD) rdma-core
|
++ lib.optional (isLinux || isFreeBSD) rdma-core
|
||||||
|
@ -55,8 +55,11 @@ in stdenv.mkDerivation rec {
|
||||||
nativeBuildInputs = [ perl ];
|
nativeBuildInputs = [ perl ];
|
||||||
|
|
||||||
configureFlags = with stdenv; lib.optional (!cudaSupport) "--disable-mca-dso"
|
configureFlags = with stdenv; lib.optional (!cudaSupport) "--disable-mca-dso"
|
||||||
++ lib.optional isLinux "--with-libnl=${libnl.dev}"
|
++ lib.optionals isLinux [
|
||||||
++ lib.optional enableSGE "--with-sge"
|
"--with-libnl=${libnl.dev}"
|
||||||
|
"--with-pmix=${pmix}"
|
||||||
|
"--with-pmix-libdir=${pmix}/lib"
|
||||||
|
] ++ lib.optional enableSGE "--with-sge"
|
||||||
++ lib.optional enablePrefix "--enable-mpirun-prefix-by-default"
|
++ lib.optional enablePrefix "--enable-mpirun-prefix-by-default"
|
||||||
# TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build
|
# TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build
|
||||||
# https://github.com/openucx/ucx
|
# https://github.com/openucx/ucx
|
||||||
|
|
48
pkgs/development/libraries/pmix/default.nix
Normal file
48
pkgs/development/libraries/pmix/default.nix
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
{ stdenv, fetchFromGitHub, perl, autoconf, automake
|
||||||
|
, libtool, flex, libevent, hwloc, munge, zlib
|
||||||
|
} :
|
||||||
|
|
||||||
|
let
|
||||||
|
version = "3.1.5";
|
||||||
|
|
||||||
|
in stdenv.mkDerivation {
|
||||||
|
pname = "pmix";
|
||||||
|
inherit version;
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
repo = "openpmix";
|
||||||
|
owner = "openpmix";
|
||||||
|
rev = "v${version}";
|
||||||
|
sha256 = "0fvfsig20amcigyn4v3gcdxc0jif44vqg37b8zzh0s8jqqj7jz5w";
|
||||||
|
};
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
patchShebangs ./autogen.pl
|
||||||
|
patchShebangs ./config
|
||||||
|
'';
|
||||||
|
|
||||||
|
nativeBuildInputs = [ perl autoconf automake libtool flex ];
|
||||||
|
|
||||||
|
buildInputs = [ libevent hwloc munge zlib ];
|
||||||
|
|
||||||
|
configureFlags = [
|
||||||
|
"--with-libevent=${libevent.dev}"
|
||||||
|
"--with-munge=${munge}"
|
||||||
|
"--with-hwloc=${hwloc.dev}"
|
||||||
|
];
|
||||||
|
|
||||||
|
preConfigure = ''
|
||||||
|
./autogen.pl
|
||||||
|
'';
|
||||||
|
|
||||||
|
enableParallelBuilding = true;
|
||||||
|
|
||||||
|
meta = with stdenv.lib; {
|
||||||
|
description = "Process Management Interface for HPC environments";
|
||||||
|
homepage = "https://openpmix.github.io/";
|
||||||
|
license = licenses.bsd3;
|
||||||
|
maintainers = [ maintainers.markuskowa ];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
, python, munge, perl, pam, zlib, shadow, coreutils
|
, python, munge, perl, pam, zlib, shadow, coreutils
|
||||||
, ncurses, libmysqlclient, gtk2, lua, hwloc, numactl
|
, ncurses, libmysqlclient, gtk2, lua, hwloc, numactl
|
||||||
, readline, freeipmi, xorg, lz4, rdma-core, nixosTests
|
, readline, freeipmi, xorg, lz4, rdma-core, nixosTests
|
||||||
|
, pmix
|
||||||
# enable internal X11 support via libssh2
|
# enable internal X11 support via libssh2
|
||||||
, enableX11 ? true
|
, enableX11 ? true
|
||||||
}:
|
}:
|
||||||
|
@ -26,6 +27,8 @@ stdenv.mkDerivation rec {
|
||||||
# increase string length to allow for full
|
# increase string length to allow for full
|
||||||
# path of 'echo' in nix store
|
# path of 'echo' in nix store
|
||||||
./common-env-echo.patch
|
./common-env-echo.patch
|
||||||
|
# Required for configure to pick up the right dlopen path
|
||||||
|
./pmix-configure.patch
|
||||||
];
|
];
|
||||||
|
|
||||||
prePatch = ''
|
prePatch = ''
|
||||||
|
@ -46,6 +49,7 @@ stdenv.mkDerivation rec {
|
||||||
curl python munge perl pam zlib
|
curl python munge perl pam zlib
|
||||||
libmysqlclient ncurses gtk2 lz4 rdma-core
|
libmysqlclient ncurses gtk2 lz4 rdma-core
|
||||||
lua hwloc numactl readline freeipmi shadow.su
|
lua hwloc numactl readline freeipmi shadow.su
|
||||||
|
pmix
|
||||||
] ++ stdenv.lib.optionals enableX11 [ xorg.xauth ];
|
] ++ stdenv.lib.optionals enableX11 [ xorg.xauth ];
|
||||||
|
|
||||||
configureFlags = with stdenv.lib;
|
configureFlags = with stdenv.lib;
|
||||||
|
@ -56,6 +60,7 @@ stdenv.mkDerivation rec {
|
||||||
"--with-zlib=${zlib}"
|
"--with-zlib=${zlib}"
|
||||||
"--with-ofed=${rdma-core}"
|
"--with-ofed=${rdma-core}"
|
||||||
"--sysconfdir=/etc/slurm"
|
"--sysconfdir=/etc/slurm"
|
||||||
|
"--with-pmix=${pmix}"
|
||||||
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
||||||
++ (optional (!enableX11) "--disable-x11");
|
++ (optional (!enableX11) "--disable-x11");
|
||||||
|
|
||||||
|
|
13
pkgs/servers/computing/slurm/pmix-configure.patch
Normal file
13
pkgs/servers/computing/slurm/pmix-configure.patch
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
diff --git a/configure b/configure
|
||||||
|
index 1cf53bc..ab68441 100755
|
||||||
|
--- a/configure
|
||||||
|
+++ b/configure
|
||||||
|
@@ -21207,7 +21207,7 @@ rm -f conftest.err conftest.i conftest.$ac_ext
|
||||||
|
as_fn_error $? "error processing $x_ac_cv_pmix_libdir: PMIx v3.x was already found in one of the previous paths" "$LINENO" 5
|
||||||
|
fi
|
||||||
|
_x_ac_pmix_v3_found="1"
|
||||||
|
- PMIX_V3_CPPFLAGS="-I$x_ac_cv_pmix_dir/include"
|
||||||
|
+ PMIX_V3_CPPFLAGS="-I$x_ac_cv_pmix_dir/include -DPMIXP_V3_LIBPATH=\\\"$x_ac_cv_pmix_libdir\\\""
|
||||||
|
if test "$ac_with_rpath" = "yes"; then
|
||||||
|
PMIX_V3_LDFLAGS="-Wl,-rpath -Wl,$x_ac_cv_pmix_libdir -L$x_ac_cv_pmix_libdir"
|
||||||
|
else
|
|
@ -6190,6 +6190,8 @@ in
|
||||||
|
|
||||||
pmacct = callPackage ../tools/networking/pmacct { };
|
pmacct = callPackage ../tools/networking/pmacct { };
|
||||||
|
|
||||||
|
pmix = callPackage ../development/libraries/pmix { };
|
||||||
|
|
||||||
polygraph = callPackage ../tools/networking/polygraph { };
|
polygraph = callPackage ../tools/networking/polygraph { };
|
||||||
|
|
||||||
progress = callPackage ../tools/misc/progress { };
|
progress = callPackage ../tools/misc/progress { };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue