rocmPackages: 6.0.2 -> 6.3.3, and various ROCm build fixes and new packages (#367695)

This commit is contained in:
Pavol Rusnak 2025-03-24 21:11:47 +01:00 committed by GitHub
commit 2925f716f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
114 changed files with 4659 additions and 3195 deletions

View file

@ -389,6 +389,10 @@
- `python3Packages.jaeger-client` was removed because it was deprecated upstream. [OpenTelemetry](https://opentelemetry.io) is the recommended replacement.
- `rocmPackages.rocm-thunk` has been removed and its functionality has been integrated with the ROCm CLR. Use `rocmPackages.clr` instead.
- `rocmPackages.clang-ocl` has been removed. [It was deprecated by AMD in 2023.](https://github.com/ROCm/clang-ocl)
- `nodePackages.meshcommander` has been removed, as the package was deprecated by Intel.
- The default version of `z3` has been updated from 4.8 to 4.13. There are still a few packages that need specific older versions; those will continue to be maintained as long as other packages depend on them but may be removed in the future.

View file

@ -152,16 +152,11 @@ effectiveStdenv.mkDerivation (finalAttrs: {
++ optionals cudaSupport [
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
]
++ optionals rocmSupport [
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals rocmSupport ([
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.clr.hipClangPath}/clang++")
# TODO: this should become `clr.gpuTargets` in the future.
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmPackages.rocblas.amdgpu_targets)
])
++ optionals metalSupport [
(cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" true)

View file

@ -70,6 +70,7 @@ let
rocmLibs = [
rocmPackages.clr
rocmPackages.hipblas-common
rocmPackages.hipblas
rocmPackages.rocblas
rocmPackages.rocsolver
@ -77,10 +78,9 @@ let
rocmPackages.rocm-device-libs
rocmPackages.rocm-smi
];
rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
rocmPath = buildEnv {
name = "rocm-path";
paths = rocmLibs ++ [ rocmClang ];
paths = rocmLibs;
};
cudaLibs = [
@ -131,6 +131,8 @@ let
goBuild =
if enableCuda then
buildGoModule.override { stdenv = cudaPackages.backendStdenv; }
else if enableRocm then
buildGoModule.override { stdenv = rocmPackages.stdenv; }
else
buildGoModule;
inherit (lib) licenses platforms maintainers;
@ -148,6 +150,14 @@ goBuild {
ROCM_PATH = rocmPath;
CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
HIP_PATH = rocmPath;
CFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include";
CXXFLAGS = "-Wno-c++17-extensions -I${rocmPath}/include";
}
// lib.optionalAttrs (enableRocm && (rocmPackages.clr.localGpuTargets or false)) {
# If rocm CLR is set to build for an exact set of targets reuse that target list,
# otherwise let ollama use its builtin defaults
HIP_ARCHS = lib.concatStringsSep ";" rocmPackages.clr.localGpuTargets;
}
// lib.optionalAttrs enableCuda { CUDA_PATH = cudaPath; };

View file

@ -33,8 +33,10 @@ let
paths = rocmList;
};
# rocm build fails with gcc stdenv due to unrecognised arg parallel-jobs
stdenv' = if enableRocm then rocmPackages.stdenv else stdenv;
in
stdenv.mkDerivation rec {
stdenv'.mkDerivation rec {
pname = "ucx";
version = "1.18.0";

View file

@ -117,18 +117,11 @@ stdenv.mkDerivation {
"test"
];
patches = lib.optionals (version == "2.9.0") [
# get ROCm version directly
# https://github.com/icl-utk-edu/magma/pull/27
(fetchpatch {
url = "https://github.com/icl-utk-edu/magma/commit/10fe816b763c41099fa1c978a79d6869246671cf.patch";
hash = "sha256-qSY5ACMHyHofJdQKyPqx8sI8GbPD6IZezmCd8qOS5OM=";
})
];
# Fixup for the python test runners
postPatch =
''
# For rocm version script invoked by cmake
patchShebangs tools/
# Fixup for the python test runners
patchShebangs ./testing/run_{tests,summarize}.py
''
+ lib.optionalString (strings.versionOlder version "2.9.0") ''
@ -201,6 +194,10 @@ stdenv.mkDerivation {
(strings.cmakeFeature "MIN_ARCH" minArch) # Disarms magma's asserts
]
++ lists.optionals rocmSupport [
# Can be removed once https://github.com/icl-utk-edu/magma/pull/27 is merged
# Can't easily apply the PR as a patch because we rely on the tarball with pregenerated
# hipified files ∴ fetchpatch of the PR will apply cleanly but fail to build
(strings.cmakeFeature "ROCM_CORE" "${effectiveRocmPackages.clr}")
(strings.cmakeFeature "CMAKE_C_COMPILER" "${effectiveRocmPackages.clr}/bin/hipcc")
(strings.cmakeFeature "CMAKE_CXX_COMPILER" "${effectiveRocmPackages.clr}/bin/hipcc")
];

View file

@ -92,8 +92,9 @@ buildPythonPackage rec {
# set the environment variable, CC, which conflicts with standard environment
"test_patch_environment_key_exists"
]
++ lib.optionals (pythonAtLeast "3.13") [
++ lib.optionals ((pythonAtLeast "3.13") || (torch.rocmSupport or false)) [
# RuntimeError: Dynamo is not supported on Python 3.13+
# OR torch.compile tests broken on torch 2.5 + rocm
"test_can_unwrap_distributed_compiled_model_keep_torch_compile"
"test_can_unwrap_distributed_compiled_model_remove_torch_compile"
"test_convert_to_fp32"

View file

@ -36,6 +36,7 @@
symlinkJoin,
which,
pybind11,
pkg-config,
removeReferencesTo,
# Build inputs
@ -69,7 +70,13 @@
# (dependencies without cuda support).
# Instead we should rely on overlays and nixpkgsFun.
# (@SomeoneSerge)
_tritonEffective ? if cudaSupport then triton-cuda else triton,
_tritonEffective ?
if cudaSupport then
triton-cuda
else if rocmSupport then
rocmPackages.triton
else
triton,
triton-cuda,
# Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
@ -91,7 +98,7 @@
# ROCm dependencies
rocmSupport ? config.rocmSupport,
rocmPackages_5,
rocmPackages,
gpuTargets ? [ ],
vulkanSupport ? false,
@ -111,8 +118,6 @@ let
triton = throw "python3Packages.torch: use _tritonEffective instead of triton to avoid divergence";
rocmPackages = rocmPackages_5;
setBool = v: if v then "1" else "0";
# https://github.com/pytorch/pytorch/blob/v2.4.0/torch/utils/cpp_extension.py#L1953
@ -180,7 +185,7 @@ let
clr
rccl
miopen
miopengemm
aotriton
rocrand
rocblas
rocsparse
@ -192,10 +197,12 @@ let
rocfft
rocsolver
hipfft
hiprand
hipsolver
hipblas-common
hipblas
hipblaslt
rocminfo
rocm-thunk
rocm-comgr
rocm-device-libs
rocm-runtime
@ -225,8 +232,6 @@ let
# In particular, this triggered warnings from cuda's `aliases.nix`
"Magma cudaPackages does not match cudaPackages" =
cudaSupport && (effectiveMagma.cudaPackages.cudaVersion != cudaPackages.cudaVersion);
"Rocm support is currently broken because `rocmPackages.hipblaslt` is unpackaged. (2024-06-09)" =
rocmSupport;
};
unroll-src = writeShellScript "unroll-src" ''
@ -293,6 +298,11 @@ buildPythonPackage rec {
# annotations (3.7), print_function (3.0), with_statement (2.6) are all supported
sed -i -e "/from __future__ import/d" **.py
substituteInPlace third_party/NNPACK/CMakeLists.txt \
--replace-fail "PYTHONPATH=" 'PYTHONPATH=$ENV{PYTHONPATH}:'
# flag from cmakeFlags doesn't work, not clear why
# setting it at the top of NNPACK's own CMakeLists does
sed -i '2s;^;set(PYTHON_SIX_SOURCE_DIR ${six.src})\n;' third_party/NNPACK/CMakeLists.txt
''
+ lib.optionalString rocmSupport ''
# https://github.com/facebookincubator/gloo/pull/297
@ -365,6 +375,10 @@ buildPythonPackage rec {
# We only do an imports check, so do not build tests either.
BUILD_TEST = setBool false;
# ninja hook doesn't automatically turn on ninja
# because pytorch setup.py is responsible for this
CMAKE_GENERATOR = "Ninja";
# Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
# it by default. PyTorch currently uses its own vendored version
# of oneDNN through Intel iDeep.
@ -375,14 +389,15 @@ buildPythonPackage rec {
# Also avoids pytorch exporting the headers of pybind11
USE_SYSTEM_PYBIND11 = true;
# NB technical debt: building without NNPACK as workaround for missing `six`
USE_NNPACK = 0;
# Multicore CPU convnet support
USE_NNPACK = 1;
# Explicitly enable MPS for Darwin
USE_MPS = setBool stdenv.hostPlatform.isDarwin;
cmakeFlags =
[
(lib.cmakeFeature "PYTHON_SIX_SOURCE_DIR" "${six.src}")
# (lib.cmakeBool "CMAKE_FIND_DEBUG_MODE" true)
(lib.cmakeFeature "CUDAToolkit_VERSION" cudaPackages.cudaVersion)
]
@ -440,6 +455,9 @@ buildPythonPackage rec {
}
// lib.optionalAttrs vulkanSupport {
VULKAN_SDK = shaderc.bin;
}
// lib.optionalAttrs rocmSupport {
AOTRITON_INSTALLED_PREFIX = "${rocmPackages.aotriton}";
};
nativeBuildInputs =
@ -448,6 +466,7 @@ buildPythonPackage rec {
which
ninja
pybind11
pkg-config
removeReferencesTo
]
++ lib.optionals cudaSupport (
@ -501,7 +520,10 @@ buildPythonPackage rec {
]
++ lib.optionals tritonSupport [ _tritonEffective ]
++ lib.optionals MPISupport [ mpi ]
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
++ lib.optionals rocmSupport [
rocmtoolkit_joined
rocmPackages.clr # Added separately so setup hook applies
];
pythonRelaxDeps = [
"sympy"

View file

@ -36,7 +36,6 @@ let
clr
rccl
miopen
miopengemm
rocrand
rocblas
rocsparse
@ -49,9 +48,9 @@ let
rocsolver
hipfft
hipsolver
hipblas-common
hipblas
rocminfo
rocm-thunk
rocm-comgr
rocm-device-libs
rocm-runtime

View file

@ -0,0 +1,89 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
pkg-config,
libdrm,
wrapPython,
autoPatchelfHook,
}:
let
esmi_ib_src = fetchFromGitHub {
owner = "amd";
repo = "esmi_ib_library";
rev = "esmi_pkg_ver-3.0.3";
hash = "sha256-q0w5c5c+CpXkklmSyfzc+sbkt4cHNxscGJA3AXwvHxQ=";
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "amdsmi";
version = "6.3.3";
src = fetchFromGitHub {
owner = "rocm";
repo = "amdsmi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-hrPqd4ZWqzTG7JRfVwc1SZx6TNS0Q/LFg8yDxrM3mPo=";
};
postPatch = ''
substituteInPlace goamdsmi_shim/CMakeLists.txt \
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
# Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
mkdir -p ./esmi_ib_library/include/asm
cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h
'';
patches = [
# Fix ld.lld undefined reference: drmGetVersion
(fetchpatch {
url = "https://github.com/ROCm/amdsmi/commit/c3864bf6171970d86dc50fd23f06377736823997.patch";
hash = "sha256-zRG1tBD8sIQCWdKfCbXC/Z/6d6NTrRYvRpddPWdM4j8=";
})
];
nativeBuildInputs = [
cmake
pkg-config
wrapPython
autoPatchelfHook
];
buildInputs = [
libdrm
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall = ''
wrapPythonProgramsIn $out
rm $out/bin/amd-smi
ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = [ "x86_64-linux" ];
};
})

View file

@ -0,0 +1,226 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
rocblas,
rocsolver,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
hipblas,
nlohmann_json,
triton-llvm,
rocmlir,
lapack-reference,
ninja,
ncurses,
libffi,
zlib,
zstd,
xz,
pkg-config,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [
# aotriton GPU support list:
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
"gfx90a"
"gfx942"
"gfx1100"
"gfx1101"
],
}:
stdenv.mkDerivation (
finalAttrs:
let
py = python3.withPackages (ps: [
ps.pyyaml
ps.distutils
ps.setuptools
ps.packaging
ps.numpy
ps.wheel
ps.filelock
ps.iniconfig
ps.pluggy
ps.pybind11
]);
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
compiler = "amdclang++";
cFlags = "-O3 -DNDEBUG";
triton-llvm' = triton-llvm;
in
{
pname = "aotriton";
version = "0.8.2b";
src = fetchFromGitHub {
owner = "ROCm";
repo = "aotriton";
rev = "${finalAttrs.version}";
hash = "sha256-gSzGYWfyUNLyzqpu3BM8rjFFL7cRVZ+w9L5pnh9QGz4=";
fetchSubmodules = true;
};
env.CXX = compiler;
env.ROCM_PATH = "${clr}";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
# Need an empty cuda.h for this to compile
# Better than pulling in unfree cuda headers
postPatch = ''
touch third_party/triton/third_party/nvidia/include/cuda.h
'';
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
pkg-config
py
clr
ninja
];
buildInputs =
[
rocblas
rocsolver
hipblas-common
hipblas
openmp
libffi
ncurses
xz
nlohmann_json
rocmlir
msgpack
libxml2
python3Packages.msgpack
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
env.TRITON_OFFLINE_BUILD = 1;
env.LLVM_SYSPATH = "${triton-llvm'}";
env.JSON_SYSPATH = nlohmann_json;
env.MLIR_DIR = "${triton-llvm'}/lib/cmake/mlir";
env.CXXFLAGS = "-I/build/source/third_party/triton/third_party/nvidia/backend/include";
# Fix up header issues in triton: https://github.com/triton-lang/triton/pull/3985/files
preConfigure = ''
mkdir third_party/triton/third_party/nvidia/backend/include/
touch third_party/triton/third_party/nvidia/backend/include/cuda.h
find third_party/triton -type f -exec sed -i 's|[<]cupti.h[>]|"cupti.h"|g' {} +
find third_party/triton -type f -exec sed -i 's|[<]cuda.h[>]|"cuda.h"|g' {} +
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' CMakeLists.txt
sed -i '2s;^;set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
sed -i '2s;^;set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")\n;' third_party/triton/CMakeLists.txt
substituteInPlace third_party/triton/python/setup.py \
--replace-fail "from distutils.command.clean import clean" "import setuptools;from distutils.command.clean import clean" \
--replace-fail 'system == "Linux"' 'False'
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${cFlags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${cFlags}'
)
prependToVar cmakeFlags "-GNinja"
mkdir -p /build/tmp-home
export HOME=/build/tmp-home
'';
# Excerpt from README:
# Note: do not run ninja separately, due to the limit of the current build system,
# ninja install will run the whole build process unconditionally.
dontBuild = true;
installPhase = ''
runHook preInstall
ninja -v install
runHook postInstall
'';
cmakeFlags =
[
"-Wno-dev"
"-DAOTRITON_NOIMAGE_MODE=ON" # FIXME: Should be able to build with object code but generate_shim is failing
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
meta = with lib; {
description = "ROCm Ahead of Time (AOT) Triton Math Library ";
homepage = "https://github.com/ROCm/aotriton";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}
)

View file

@ -1,45 +0,0 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-device-libs,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "clang-ocl";
version = "6.0.2";
src = fetchFromGitHub {
owner = "ROCm";
repo = "clang-ocl";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uMSvcVJj+me2E+7FsXZ4l4hTcK6uKEegXpkHGcuist0=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
buildInputs = [ rocm-device-libs ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; {
description = "OpenCL compilation with clang compiler";
homepage = "https://github.com/ROCm/clang-ocl";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,40 @@
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
index 3f233b72f..67bdc62ee 100644
--- a/rocclr/cmake/ROCclr.cmake
+++ b/rocclr/cmake/ROCclr.cmake
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
find_package(AMD_OPENCL)
+# Find X11 package
+find_package(X11 REQUIRED)
+if(NOT X11_FOUND)
+ message(FATAL_ERROR "X11 libraries not found")
+endif()
+
+# Find OpenGL package
+find_package(OpenGL REQUIRED)
+if(NOT OpenGL_FOUND)
+ message(FATAL_ERROR "OpenGL not found")
+endif()
+
+
add_library(rocclr STATIC)
include(ROCclrCompilerOptions)
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
${ROCCLR_SRC_DIR}/device
${ROCCLR_SRC_DIR}/elf
${ROCCLR_SRC_DIR}/include
+ ${X11_INCLUDE_DIR}
+ ${OPENGL_INCLUDE_DIR}
${AMD_OPENCL_INCLUDE_DIRS})
-target_link_libraries(rocclr PUBLIC Threads::Threads)
+target_link_libraries(rocclr PUBLIC
+ Threads::Threads
+ ${X11_LIBRARIES}
+ ${OPENGL_LIBRARIES})
# IPC on Windows is not supported
if(UNIX)
target_link_libraries(rocclr PUBLIC rt)

View file

@ -4,52 +4,58 @@
callPackage,
fetchFromGitHub,
fetchpatch,
fetchurl,
rocmUpdateScript,
makeWrapper,
cmake,
perl,
clang,
hip-common,
hipcc,
rocm-device-libs,
rocm-comgr,
rocm-runtime,
rocm-core,
roctracer,
rocminfo,
rocm-smi,
numactl,
libffi,
zstd,
zlib,
libGL,
libxml2,
libX11,
python3Packages,
rocm-merged-llvm,
khronos-ocl-icd-loader,
gcc-unwrapped,
writeShellScriptBin,
localGpuTargets ? null,
}:
let
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
hipClang = rocm-merged-llvm;
hipClangPath = "${hipClang}/bin";
wrapperArgs = [
"--prefix PATH : $out/bin"
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
"--set HIP_PLATFORM amd"
"--set HIP_PATH $out"
"--set HIP_CLANG_PATH ${clang}/bin"
"--set HIP_CLANG_PATH ${hipClangPath}"
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
"--set HSA_PATH ${rocm-runtime}"
"--set ROCM_PATH $out"
];
# https://github.com/NixOS/nixpkgs/issues/305641
# Not needed when 3.29.2 is in unstable
cmake' = cmake.overrideAttrs (old: rec {
version = "3.29.2";
src = fetchurl {
url = "https://cmake.org/files/v${lib.versions.majorMinor version}/cmake-${version}.tar.gz";
hash = "sha256-NttLaSaqt0G6bksuotmckZMiITIwi03IJNQSPLcwNS4=";
};
});
amdclang = writeShellScriptBin "amdclang" ''
exec clang "$@"
'';
amdclangxx = writeShellScriptBin "amdclang++" ''
exec clang++ "$@"
'';
in
stdenv.mkDerivation (finalAttrs: {
pname = "clr";
version = "6.0.2";
version = "6.3.3";
outputs = [
"out"
@ -60,15 +66,17 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "clr";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ZMpA7vCW2CcpGdBLZfPimMHcgjhN1PHuewJiYwZMgGY=";
hash = "sha256-4qjfnn0kto2sNaSumXxHRHFrf3a3RZILOdhVSxkEs1I=";
};
nativeBuildInputs = [
makeWrapper
cmake'
cmake
perl
python3Packages.python
python3Packages.cppheaderparser
amdclang
amdclangxx
];
buildInputs = [
@ -76,9 +84,15 @@ stdenv.mkDerivation (finalAttrs: {
libGL
libxml2
libX11
khronos-ocl-icd-loader
hipClang
libffi
zstd
zlib
];
propagatedBuildInputs = [
rocm-core
rocm-device-libs
rocm-comgr
rocm-runtime
@ -86,6 +100,7 @@ stdenv.mkDerivation (finalAttrs: {
];
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
"-DCLR_BUILD_HIP=ON"
"-DCLR_BUILD_OCL=ON"
@ -94,6 +109,9 @@ stdenv.mkDerivation (finalAttrs: {
"-DHIP_PLATFORM=amd"
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
"-DROCM_PATH=${rocminfo}"
"-DBUILD_ICD=ON"
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
@ -102,26 +120,36 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_LIBDIR=lib"
];
env.LLVM_DIR = "";
patches = [
./cmake-find-x11-libgl.patch
(fetchpatch {
name = "add-missing-operators.patch";
url = "https://github.com/ROCm/clr/commit/86bd518981b364c138f9901b28a529899d8654f3.patch";
hash = "sha256-lbswri+zKLxif0hPp4aeJDeVfadhWZz4z+m+G2XcCPI=";
# Fix handling of old fatbin version https://github.com/ROCm/clr/issues/99
sha256 = "sha256-CK/QwgWJQEruiG4DqetF9YM0VEWpSiUMxAf1gGdJkuA=";
url = "https://src.fedoraproject.org/rpms/rocclr/raw/rawhide/f/0001-handle-v1-of-compressed-fatbins.patch";
})
(fetchpatch {
name = "static-functions.patch";
url = "https://github.com/ROCm/clr/commit/77c581a3ebd47b5e2908973b70adea66891159ee.patch";
hash = "sha256-auBedbd7rghlKav7A9V6l64J7VmtE9GizIdi5gWj+fs=";
# improve rocclr isa compatibility check
sha256 = "sha256-wUrhpYN68AbEXeFU5f366C6peqHyq25kujJXY/bBJMs=";
url = "https://github.com/GZGavinZhao/clr/commit/22c17a0ac09c6b77866febf366591f669a1ed133.patch";
})
(fetchpatch {
name = "extend-hip-isa-compatibility-check.patch";
url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0026-extend-hip-isa-compatibility-check.patch";
hash = "sha256-eG0ALZZQLRzD7zJueJFhi2emontmYy6xx8Rsm346nQI=";
# [PATCH] Improve hipamd compat check
sha256 = "sha256-uZQ8rMrWH61CCbxwLqQGggDmXFmYTi6x8OcgYPrZRC8=";
url = "https://github.com/GZGavinZhao/clr/commit/63c6ee630966744d4199fdfb854e98d2da9e1122.patch";
})
(fetchpatch {
name = "improve-rocclr-isa-compatibility-check.patch";
url = "https://salsa.debian.org/rocm-team/rocm-hipamd/-/raw/d6d20142c37e1dff820950b16ff8f0523241d935/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch";
hash = "sha256-8eowuRiOAdd9ucKv4Eg9FPU7c6367H3eP3fRAGfXc6Y=";
# [PATCH] SWDEV-504340 - Move cast of cl_mem inside the condition
# Fixes crash due to UB in KernelBlitManager::setArgument
sha256 = "sha256-nL4CZ7EOXqsTVUtYhuu9DLOMpnMeMRUhkhylEQLTg9I=";
url = "https://github.com/ROCm/clr/commit/fa63919a6339ea2a61111981ba2362c97fbdf743.patch";
})
(fetchpatch {
# [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
})
];
@ -131,83 +159,121 @@ stdenv.mkDerivation (finalAttrs: {
# We're not on Windows so these are never installed to hipcc...
substituteInPlace hipamd/CMakeLists.txt \
--replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
--replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
substituteInPlace hipamd/src/hip_embed_pch.sh \
--replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang"
# https://lists.debian.org/debian-ai/2024/02/msg00178.html
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_USE_RUNTIME_UNBUNDLER, false" "HIP_USE_RUNTIME_UNBUNDLER, true"
--replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
# new unbundler has better error messages, defaulting it on
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
'';
postInstall = ''
chmod +x $out/bin/*
patchShebangs $out/bin
# hipcc.bin and hipconfig.bin is mysteriously never installed
cp -a ${hipcc}/bin/{hipcc.bin,hipconfig.bin} $out/bin
cp ${amdclang}/bin/* $out/bin/
cp ${amdclangxx}/bin/* $out/bin/
wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs}
wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs}
wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs}
wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs}
for prog in hip{cc,config}{,.pl}; do
wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
done
mkdir -p $out/nix-support/
echo '
export HIP_PATH="${placeholder "out"}"
export HIP_PLATFORM=amd
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
export NIX_CC_USE_RESPONSE_FILE=0
export HIP_CLANG_PATH="${hipClangPath}"
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
# Just link rocminfo, it's easier
ln -s ${rocminfo}/bin/* $out/bin
ln -s ${rocm-core}/include/* $out/include/
# Replace rocm-opencl-icd functionality
mkdir -p $icd/etc/OpenCL/vendors
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
# add version info to output (downstream rocmPackages look for this)
mkdir $out/.info
echo "${finalAttrs.version}" > $out/.info/version
ln -s ${rocm-core}/.info/ $out/.info
ln -s ${hipClang} $out/llvm
'';
passthru = {
# All known and valid general GPU targets
# We cannot use this for each ROCm library, as each defines their own supported targets
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
gpuTargets = lib.forEach [
"803"
"900"
"906"
"908"
"90a"
"940"
"941"
"942"
"1010"
"1012"
"1030"
"1100"
"1101"
"1102"
] (target: "gfx${target}");
disallowedRequisites = [
gcc-unwrapped
];
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
page = "tags?per_page=1";
filter = ".[0].name | split(\"-\") | .[1]";
};
passthru =
{
# All known and valid general GPU targets
# We cannot use this for each ROCm library, as each defines their own supported targets
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
# Generic targets are not yet available in rocm-6.3.1 llvm
gpuTargets = lib.forEach [
# "9-generic"
"900" # MI25, Vega 56/64
"906" # MI50/60, Radeon VII
"908" # MI100
"90a" # MI210 / MI250
# "9-4-generic"
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
"942" # MI300
# "10-1-generic"
"1010"
"1012"
# "10-3-generic"
"1030" # W6800, various Radeon cards
# "11-generic"
"1100"
"1101"
"1102"
] (target: "gfx${target}");
impureTests = {
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi;
clr = finalAttrs.finalPackage;
inherit hipClangPath;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage;
impureTests = {
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi;
clr = finalAttrs.finalPackage;
};
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage;
};
};
selectGpuTargets =
{
supported ? [ ],
}:
supported;
gpuArchSuffix = "";
}
// lib.optionalAttrs (localGpuTargets != null) {
inherit localGpuTargets;
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
selectGpuTargets =
{
supported ? [ ],
}:
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
};
};
meta = with lib; {
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
@ -215,8 +281,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,169 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-merged-llvm,
clr,
rocminfo,
hipify,
gitMinimal,
gtest,
zstd,
buildTests ? false,
buildExamples ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
preBuild = ''
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
exit 1
'';
pname = "composable_kernel_base";
# Picked this version over 6.3 because much easier to get to build
# and it matches the version torch 2.6 wants
version = "6.4.0-unstable-20241220";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildExamples [
"example"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "composable_kernel";
rev = "07339c738396ebeae57374771ded4dcf11bddf1e";
hash = "sha256-EvEBxlOpQ71BF57VW79WBo/cdxAwTKFXFMiYKyGyyEs=";
};
nativeBuildInputs = [
# Deliberately not using ninja
# because we're jankily composing build outputs from multiple drvs
# ninja won't believe they're up to date
gitMinimal
cmake
rocminfo
clr
hipify
zstd
];
buildInputs = [
rocm-cmake
clr
zstd
];
strictDeps = true;
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
cmakeFlags =
[
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
# "-DDL_KERNELS=ON" # Not needed, slow to build
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
# but migraphx requires an incompatible fork of CK and fails anyway
# "-DCK_USE_CODEGEN=ON"
# It might be worth skipping fp64 in future with this:
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_DEV=OFF"
"-DROCM_PATH=${clr}"
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# FP8 can build for 908/90a but very slow build
# and produces unusably slow kernels that are huge
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
]
++ lib.optionals (gpuTargets != [ ]) [
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
# per readme this is required if archs are dissimilar
# In rocm-6.3.x not setting any arch flag worked
# but setting dissimilar arches always failed
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
];
# No flags to build selectively it seems...
postPatch =
# Reduce configure time by preventing thousands of clang-tidy targets being added
# We will never call them
# Never build profiler
''
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
--replace-fail clang_tidy_check '#clang_tidy_check'
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(profiler)" ""
''
# Optionally remove tests
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(test)" ""
substituteInPlace codegen/CMakeLists.txt \
--replace-fail "include(ROCMTest)" ""
''
# Optionally remove examples
+ lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(example)" ""
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildExamples ''
mkdir -p $example/bin
mv $out/bin/example_* $example/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = true;
};
})

View file

@ -0,0 +1,43 @@
{
buildPythonPackage,
python,
composable_kernel,
lib,
setuptools,
setuptools-scm,
rocm-merged-llvm,
}:
buildPythonPackage {
pyproject = true;
pname = "ck4inductor";
build-system = [
setuptools
setuptools-scm
];
version = "6.4.0";
inherit (composable_kernel) src;
pythonImportsCheck = [
"ck4inductor"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.op"
];
propagatedBuildInputs = [
# At runtime will fail to compile anything with ck4inductor without this
# can't easily use in checks phase because most of the compiler machinery is in torch
rocm-merged-llvm
];
checkPhase = ''
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
exit 1
fi
'';
meta = with lib; {
description = "pytorch inductor backend which uses composable_kernel universal GEMM implementations";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}

View file

@ -1,112 +1,241 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
openmp,
clang-tools-extra,
git,
gtest,
zstd,
buildTests ? false,
buildExamples ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx1030" ... ]
composable_kernel_base,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "composable_kernel";
version = "6.0.2";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildExamples [
"example"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "composable_kernel";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-NCqMganmNyQfz3X+KQOrfrimnrgd3HbAGK5DeC4+J+o=";
let
parts = {
_mha = {
# mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz
# big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores
# Should be <10h timeout but might be cutting it close
# TODO: work out how to split this into smaller chunks instead of all 3k mha instances together
# mha_0,1,2, search ninja target file for the individual instances, split by the index?
# TODO: can we prune the generated instances down to only what in practice are used with popular models
# when using flash-attention + MHA kernels?
targets = [
"device_mha_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
gemm_multiply_multiply = {
targets = [
"device_gemm_multiply_multiply_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
grouped_conv = {
targets = [
"device_grouped_conv1d_bwd_weight_instance"
"device_grouped_conv2d_bwd_data_instance"
"device_grouped_conv2d_bwd_weight_instance"
"device_grouped_conv1d_fwd_instance"
"device_grouped_conv2d_fwd_instance"
"device_grouped_conv2d_fwd_dynamic_op_instance"
];
};
grouped_conv_bwd_3d = {
targets = [
"device_grouped_conv3d_bwd_data_instance"
"device_grouped_conv3d_bwd_data_bilinear_instance"
"device_grouped_conv3d_bwd_data_scale_instance"
"device_grouped_conv3d_bwd_weight_instance"
"device_grouped_conv3d_bwd_weight_bilinear_instance"
"device_grouped_conv3d_bwd_weight_scale_instance"
];
};
grouped_conv_fwd_3d = {
targets = [
"device_grouped_conv3d_fwd_instance"
"device_grouped_conv3d_fwd_bilinear_instance"
"device_grouped_conv3d_fwd_convinvscale_instance"
"device_grouped_conv3d_fwd_convscale_instance"
"device_grouped_conv3d_fwd_convscale_add_instance"
"device_grouped_conv3d_fwd_convscale_relu_instance"
"device_grouped_conv3d_fwd_dynamic_op_instance"
"device_grouped_conv3d_fwd_scale_instance"
"device_grouped_conv3d_fwd_scaleadd_ab_instance"
"device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance"
];
};
batched_gemm = {
targets = [
"device_batched_gemm_instance"
"device_batched_gemm_add_relu_gemm_add_instance"
"device_batched_gemm_bias_permute_instance"
"device_batched_gemm_gemm_instance"
"device_batched_gemm_reduce_instance"
"device_batched_gemm_softmax_gemm_instance"
"device_batched_gemm_softmax_gemm_permute_instance"
"device_grouped_gemm_instance"
"device_grouped_gemm_bias_instance"
"device_grouped_gemm_fastgelu_instance"
"device_grouped_gemm_fixed_nk_instance"
"device_grouped_gemm_fixed_nk_multi_abd_instance"
"device_grouped_gemm_tile_loop_instance"
];
};
gemm_universal = {
targets = [
"device_gemm_universal_instance"
"device_gemm_universal_batched_instance"
"device_gemm_universal_reduce_instance"
"device_gemm_universal_streamk_instance"
];
requiredSystemFeatures = [ "big-parallel" ];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
gemm_other = {
targets = [
"device_gemm_instance"
"device_gemm_ab_scale_instance"
"device_gemm_add_instance"
"device_gemm_add_add_fastgelu_instance"
"device_gemm_add_fastgelu_instance"
"device_gemm_add_multiply_instance"
"device_gemm_add_relu_instance"
"device_gemm_add_relu_add_layernorm_instance"
"device_gemm_add_silu_instance"
"device_gemm_bias_add_reduce_instance"
"device_gemm_bilinear_instance"
"device_gemm_fastgelu_instance"
"device_gemm_multi_abd_instance"
"device_gemm_multiply_add_instance"
"device_gemm_reduce_instance"
"device_gemm_splitk_instance"
"device_gemm_streamk_instance"
];
};
conv = {
targets = [
"device_conv1d_bwd_data_instance"
"device_conv2d_bwd_data_instance"
"device_conv2d_fwd_instance"
"device_conv2d_fwd_bias_relu_instance"
"device_conv2d_fwd_bias_relu_add_instance"
"device_conv3d_bwd_data_instance"
];
};
pool = {
targets = [
"device_avg_pool2d_bwd_instance"
"device_avg_pool3d_bwd_instance"
"device_pool2d_fwd_instance"
"device_pool3d_fwd_instance"
"device_max_pool_bwd_instance"
];
};
other1 = {
targets = [
"device_batchnorm_instance"
"device_contraction_bilinear_instance"
"device_contraction_scale_instance"
"device_elementwise_instance"
"device_elementwise_normalization_instance"
"device_normalization_bwd_data_instance"
"device_normalization_bwd_gamma_beta_instance"
"device_normalization_fwd_instance"
];
};
other2 = {
targets = [
"device_column_to_image_instance"
"device_image_to_column_instance"
"device_permute_scale_instance"
"device_quantization_instance"
"device_reduce_instance"
"device_softmax_instance"
"device_transpose_instance"
];
};
};
tensorOpBuilder =
{
part,
targets,
extraCmakeFlags ? [ ],
requiredSystemFeatures ? [ ],
}:
composable_kernel_base.overrideAttrs (old: {
inherit requiredSystemFeatures;
pname = "composable_kernel${clr.gpuArchSuffix}-${part}";
makeTargets = targets;
preBuild = ''
echo "Building ${part}"
makeFlagsArray+=($makeTargets)
substituteInPlace Makefile \
--replace-fail '.NOTPARALLEL:' ""
'';
nativeBuildInputs = [
git
cmake
rocm-cmake
clr
clang-tools-extra
zstd
];
# Compile parallelism adjusted based on available RAM
# Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES
# CK uses an unusually high amount of memory per core in the build step
# Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry
# So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when
# it's only this package that has trouble.
preConfigure =
old.preConfigure or ""
+ ''
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
MAX_CORES=$((1 + APPX_GB/3))
MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES))
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB"
cmakeFlagsArray+=(
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
)
'';
cmakeFlags = old.cmakeFlags ++ extraCmakeFlags;
# Early exit after build phase with success, skips fixups etc
# Will get copied back into /build of the final CK
postBuild = ''
find . -name "*.o" -type f | while read -r file; do
mkdir -p "$out/$(dirname "$file")"
cp --reflink=auto "$file" "$out/$file"
done
exit 0
'';
meta = old.meta // {
broken = false;
};
});
composable_kernel_parts = builtins.mapAttrs (
part: targets: tensorOpBuilder (targets // { inherit part; })
) parts;
in
buildInputs = [ openmp ];
composable_kernel_base.overrideAttrs (
finalAttrs: old: {
pname = "composable_kernel${clr.gpuArchSuffix}";
parts_dirs = builtins.attrValues composable_kernel_parts;
disallowedReferences = builtins.attrValues composable_kernel_parts;
preBuild = ''
for dir in $parts_dirs; do
find "$dir" -type f -name "*.o" | while read -r file; do
# Extract the relative path by removing the output directory prefix
rel_path="''${file#"$dir/"}"
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
];
# Create parent directory if it doesn't exist
mkdir -p "$(dirname "$rel_path")"
# No flags to build selectively it seems...
postPatch =
lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" ""
''
+ lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(example)" ""
''
+ ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(profiler)" ""
# Copy the file back to its original location, give it a future timestamp
# so make treats it as up to date
cp --reflink=auto --no-preserve=all "$file" "$rel_path"
touch -d "now +10 hours" "$rel_path"
done
done
'';
postInstall =
''
zstd --rm $out/lib/libdevice_operations.a
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildExamples ''
mkdir -p $example/bin
mv $out/bin/example_* $example/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
# Times out otherwise
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})
passthru = old.passthru // {
parts = composable_kernel_parts;
};
meta = old.meta // {
# Builds which don't don't target any gfx9 cause cmake errors in dependent projects
broken = !finalAttrs.passthru.anyGfx9Target;
};
}
)

View file

@ -1,20 +0,0 @@
{
runCommandLocal,
composable_kernel_build,
zstd,
}:
let
ck = composable_kernel_build;
in
runCommandLocal "unpack-${ck.name}"
{
nativeBuildInputs = [ zstd ];
meta = ck.meta;
}
''
mkdir -p $out
cp -r --no-preserve=mode ${ck}/* $out
zstd -dv --rm $out/lib/libdevice_operations.a.zst -o $out/lib/libdevice_operations.a
substituteInPlace $out/lib/cmake/composable_kernel/*.cmake \
--replace "${ck}" "$out"
''

View file

@ -1,530 +1,507 @@
{ stdenv
, lib
, config
, callPackage
, recurseIntoAttrs
, symlinkJoin
, fetchFromGitHub
, cudaPackages
, python3Packages
, elfutils
, boost179
, opencv
, ffmpeg_4
, libjpeg_turbo
{
lib,
config,
callPackage,
newScope,
recurseIntoAttrs,
symlinkJoin,
fetchFromGitHub,
ffmpeg_4,
boost179,
opencv,
libjpeg_turbo,
python3Packages,
triton-llvm,
openmpi,
rocmGpuArches ? [ ],
}:
let
rocmUpdateScript = callPackage ./update.nix { };
in rec {
## ROCm ##
llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript rocm-device-libs rocm-runtime rocm-thunk clr; });
rocm-core = callPackage ./rocm-core {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
rocm-cmake = callPackage ./rocm-cmake {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
rocm-thunk = callPackage ./rocm-thunk {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
rocm-smi = python3Packages.callPackage ./rocm-smi {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
rocm-device-libs = callPackage ./rocm-device-libs {
inherit rocmUpdateScript rocm-cmake;
stdenv = llvm.rocmClangStdenv;
};
rocm-runtime = callPackage ./rocm-runtime {
inherit rocmUpdateScript rocm-device-libs rocm-thunk;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
rocm-comgr = callPackage ./rocm-comgr {
inherit rocmUpdateScript rocm-cmake rocm-device-libs;
stdenv = llvm.rocmClangStdenv;
};
rocminfo = callPackage ./rocminfo {
inherit rocmUpdateScript rocm-cmake rocm-runtime;
stdenv = llvm.rocmClangStdenv;
};
clang-ocl = callPackage ./clang-ocl {
inherit rocmUpdateScript rocm-cmake rocm-device-libs;
stdenv = llvm.rocmClangStdenv;
};
# Unfree
hsa-amd-aqlprofile-bin = callPackage ./hsa-amd-aqlprofile-bin {
stdenv = llvm.rocmClangStdenv;
};
# Broken, too many errors
rdc = callPackage ./rdc {
inherit rocmUpdateScript rocm-smi rocm-runtime stdenv;
# stdenv = llvm.rocmClangStdenv;
};
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { inherit stdenv; };
hip-common = callPackage ./hip-common {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Eventually will be in the LLVM repo
hipcc = callPackage ./hipcc {
inherit rocmUpdateScript;
stdenv = llvm.rocmClangStdenv;
};
# Replaces hip, opencl-runtime, and rocclr
clr = callPackage ./clr {
inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo rocm-smi;
inherit (llvm) clang;
stdenv = llvm.rocmClangStdenv;
};
hipify = callPackage ./hipify {
inherit rocmUpdateScript;
inherit (llvm) clang;
stdenv = llvm.rocmClangStdenv;
};
# Needs GCC
rocprofiler = callPackage ./rocprofiler {
inherit rocmUpdateScript clr rocm-core rocm-thunk rocm-device-libs roctracer rocdbgapi rocm-smi hsa-amd-aqlprofile-bin stdenv;
inherit (llvm) clang;
};
# Needs GCC
roctracer = callPackage ./roctracer {
inherit rocmUpdateScript rocm-device-libs rocm-runtime clr stdenv;
};
rocgdb = callPackage ./rocgdb {
inherit rocmUpdateScript rocdbgapi;
stdenv = llvm.rocmClangStdenv;
};
rocdbgapi = callPackage ./rocdbgapi {
inherit rocmUpdateScript rocm-cmake rocm-comgr rocm-runtime;
stdenv = llvm.rocmClangStdenv;
};
rocr-debug-agent = callPackage ./rocr-debug-agent {
inherit rocmUpdateScript clr rocdbgapi;
stdenv = llvm.rocmClangStdenv;
};
rocprim = callPackage ./rocprim {
inherit rocmUpdateScript rocm-cmake clr;
stdenv = llvm.rocmClangStdenv;
};
rocsparse = callPackage ./rocsparse {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
rocthrust = callPackage ./rocthrust {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
rocrand = callPackage ./rocrand {
inherit rocmUpdateScript rocm-cmake clr;
stdenv = llvm.rocmClangStdenv;
};
hiprand = callPackage ./hiprand {
inherit rocmUpdateScript rocm-cmake clr rocrand;
stdenv = llvm.rocmClangStdenv;
};
rocfft = callPackage ./rocfft {
inherit rocmUpdateScript rocm-cmake rocrand rocfft clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rccl = callPackage ./rccl {
inherit rocmUpdateScript rocm-cmake rocm-smi clr hipify;
stdenv = llvm.rocmClangStdenv;
};
hipcub = callPackage ./hipcub {
inherit rocmUpdateScript rocm-cmake rocprim clr;
stdenv = llvm.rocmClangStdenv;
};
hipsparse = callPackage ./hipsparse {
inherit rocmUpdateScript rocm-cmake rocsparse clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
hipfort = callPackage ./hipfort {
inherit rocmUpdateScript rocm-cmake;
stdenv = llvm.rocmClangStdenv;
};
hipfft = callPackage ./hipfft {
inherit rocmUpdateScript rocm-cmake rocfft clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
tensile = python3Packages.callPackage ./tensile {
inherit rocmUpdateScript rocminfo;
stdenv = llvm.rocmClangStdenv;
};
rocblas = callPackage ./rocblas {
inherit rocmUpdateScript rocm-cmake clr tensile;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocsolver = callPackage ./rocsolver {
inherit rocmUpdateScript rocm-cmake rocblas rocsparse clr;
stdenv = llvm.rocmClangStdenv;
};
rocwmma = callPackage ./rocwmma {
inherit rocmUpdateScript rocm-cmake rocm-smi rocblas clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocalution = callPackage ./rocalution {
inherit rocmUpdateScript rocm-cmake rocprim rocsparse rocrand rocblas clr;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rocmlir = callPackage ./rocmlir {
inherit rocmUpdateScript rocm-cmake rocminfo clr;
stdenv = llvm.rocmClangStdenv;
};
rocmlir-rock = rocmlir.override {
buildRockCompiler = true;
};
hipsolver = callPackage ./hipsolver {
inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr;
stdenv = llvm.rocmClangStdenv;
};
hipblas = callPackage ./hipblas {
inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr;
stdenv = llvm.rocmClangStdenv;
};
# hipBlasLt - Very broken with Tensile at the moment, only supports GFX9
# hipTensor - Only supports GFX9
composable_kernel = callPackage ./composable_kernel/unpack.nix {
composable_kernel_build = callPackage ./composable_kernel {
inherit rocmUpdateScript rocm-cmake clr;
inherit (llvm) openmp clang-tools-extra;
outer = lib.makeScope newScope (
self:
let
inherit (self) llvm;
pyPackages = python3Packages;
openmpi-orig = openmpi;
in
{
inherit rocmGpuArches;
buildTests = false;
buildBenchmarks = false;
stdenv = llvm.rocmClangStdenv;
};
};
half = callPackage ./half {
inherit rocmUpdateScript rocm-cmake;
stdenv = llvm.rocmClangStdenv;
};
rocmPath = self.callPackage ./rocm-path { };
rocmUpdateScript = self.callPackage ./update.nix { };
miopen = callPackage ./miopen {
inherit rocmUpdateScript rocm-cmake rocblas clang-ocl composable_kernel rocm-comgr clr rocm-docs-core half roctracer;
inherit (llvm) clang-tools-extra;
stdenv = llvm.rocmClangStdenv;
rocmlir = rocmlir-rock;
boost = boost179.override { enableStatic = true; };
};
## ROCm ##
llvm = recurseIntoAttrs (
callPackage ./llvm/default.nix {
inherit (self) rocm-device-libs rocm-runtime;
}
);
inherit (self.llvm) rocm-merged-llvm clang openmp;
miopen-hip = miopen;
migraphx = callPackage ./migraphx {
inherit rocmUpdateScript rocm-cmake rocblas composable_kernel miopen clr half rocm-device-libs;
inherit (llvm) openmp clang-tools-extra;
stdenv = llvm.rocmClangStdenv;
rocmlir = rocmlir-rock;
};
rpp = callPackage ./rpp {
inherit rocmUpdateScript rocm-cmake rocm-docs-core clr half;
inherit (llvm) openmp;
stdenv = llvm.rocmClangStdenv;
};
rpp-hip = rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = callPackage ./mivisionx {
inherit rocmUpdateScript rocm-cmake rocm-device-libs clr rpp rocblas miopen migraphx half rocm-docs-core;
inherit (llvm) clang openmp;
opencv = opencv.override { enablePython = true; };
ffmpeg = ffmpeg_4;
stdenv = llvm.rocmClangStdenv;
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
rocm-core = self.callPackage ./rocm-core { };
amdsmi = pyPackages.callPackage ./amdsmi {
inherit (self) rocmUpdateScript;
};
# overwrite all patches, since patches for newer version do not apply
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
rocm-cmake = self.callPackage ./rocm-cmake { };
mivisionx-hip = mivisionx.override {
rpp = rpp-hip;
useOpenCL = false;
useCPU = false;
};
rocm-smi = pyPackages.callPackage ./rocm-smi {
inherit (self) rocmUpdateScript;
};
mivisionx-cpu = mivisionx.override {
rpp = rpp-cpu;
useOpenCL = false;
useCPU = true;
};
rocm-device-libs = self.callPackage ./rocm-device-libs {
inherit (llvm) rocm-merged-llvm;
};
## Meta ##
# Emulate common ROCm meta layout
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = rec {
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
rocm-runtime = self.callPackage ./rocm-runtime {
inherit (llvm) rocm-merged-llvm;
};
paths = [
hsa-amd-aqlprofile-bin
rocm-core
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-comgr = self.callPackage ./rocm-comgr {
inherit (llvm) rocm-merged-llvm;
};
rocm-ml-sdk = symlinkJoin {
name = "rocm-ml-sdk-meta";
rocminfo = self.callPackage ./rocminfo { };
paths = [
rocm-core
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
# Unfree
hsa-amd-aqlprofile-bin = self.callPackage ./hsa-amd-aqlprofile-bin { };
rocm-ml-libraries = symlinkJoin {
name = "rocm-ml-libraries-meta";
rdc = self.callPackage ./rdc { };
paths = [
llvm.clang
llvm.mlir
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
rocm-hip-sdk = symlinkJoin {
name = "rocm-hip-sdk-meta";
hip-common = self.callPackage ./hip-common { };
paths = [
rocprim
rocalution
hipfft
rocm-core
hipcub
hipblas
rocrand
rocfft
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
# Eventually will be in the LLVM repo
hipcc = self.callPackage ./hipcc {
inherit (llvm) rocm-merged-llvm;
};
rocm-hip-libraries = symlinkJoin {
name = "rocm-hip-libraries-meta";
# Replaces hip, opencl-runtime, and rocclr
clr = self.callPackage ./clr { };
paths = [
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
rocfft
hipfft
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
aotriton = self.callPackage ./aotriton { };
rocm-openmp-sdk = symlinkJoin {
name = "rocm-openmp-sdk-meta";
hipify = self.callPackage ./hipify {
inherit (llvm)
clang
rocm-merged-llvm
;
};
paths = [
rocm-core
llvm.clang
llvm.mlir
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
# hsakmt was merged into rocm-runtime
hsakmt = self.rocm-runtime;
rocm-opencl-sdk = symlinkJoin {
name = "rocm-opencl-sdk-meta";
rocprofiler = self.callPackage ./rocprofiler {
inherit (llvm) clang;
};
rocprofiler-register = self.callPackage ./rocprofiler-register {
inherit (llvm) clang;
};
paths = [
rocm-core
rocm-runtime
clr
clr.icd
rocm-thunk
rocm-opencl-runtime
];
};
# Needs GCC
roctracer = self.callPackage ./roctracer { };
rocm-opencl-runtime = symlinkJoin {
name = "rocm-opencl-runtime-meta";
rocgdb = self.callPackage ./rocgdb { };
paths = [
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocdbgapi = self.callPackage ./rocdbgapi { };
rocm-hip-runtime-devel = symlinkJoin {
name = "rocm-hip-runtime-devel-meta";
rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
paths = [
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.mlir
llvm.openmp
rocm-thunk
rocm-runtime
rocm-hip-runtime
];
};
rocprim = self.callPackage ./rocprim { };
rocm-hip-runtime = symlinkJoin {
name = "rocm-hip-runtime-meta";
rocsparse = self.callPackage ./rocsparse { };
paths = [
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocthrust = self.callPackage ./rocthrust { };
rocm-language-runtime = symlinkJoin {
name = "rocm-language-runtime-meta";
rocrand = self.callPackage ./rocrand { };
paths = [
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
hiprand = self.callPackage ./hiprand { };
rocm-all = symlinkJoin {
name = "rocm-all-meta";
rocfft = self.callPackage ./rocfft { };
paths = [
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
} // lib.optionalAttrs config.allowAliases {
miopengemm= throw ''
'miopengemm' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
mscclpp = self.callPackage ./mscclpp { };
miopen-opencl= throw ''
'miopen-opencl' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
rccl = self.callPackage ./rccl { };
mivisionx-opencl = throw ''
'mivisionx-opencl' has been deprecated.
Other versions of mivisionx are still available.
It is also still available for some time as part of rocmPackages_5.
''; # Added 2024-3-24
# RCCL with sanitizers and tests
# Can't have with sanitizer build as dep of other packages without
# runtime crashes due to ASAN not loading first
rccl-tests = self.callPackage ./rccl {
buildTests = true;
};
hipcub = self.callPackage ./hipcub { };
hipsparse = self.callPackage ./hipsparse { };
hipfort = self.callPackage ./hipfort { };
hipfft = self.callPackage ./hipfft { };
tensile = pyPackages.callPackage ./tensile {
inherit (self)
rocmUpdateScript
clr
;
};
rocblas = self.callPackage ./rocblas {
buildTests = true;
buildBenchmarks = true;
};
rocsolver = self.callPackage ./rocsolver { };
rocwmma = self.callPackage ./rocwmma { };
rocalution = self.callPackage ./rocalution { };
rocmlir-rock = self.callPackage ./rocmlir {
buildRockCompiler = true;
};
rocmlir = self.rocmlir-rock;
hipsolver = self.callPackage ./hipsolver { };
hipblas-common = self.callPackage ./hipblas-common { };
hipblas = self.callPackage ./hipblas { };
hipblaslt = self.callPackage ./hipblaslt { };
# hipTensor - Only supports GFX9
composable_kernel_base = self.callPackage ./composable_kernel/base.nix { };
composable_kernel = self.callPackage ./composable_kernel { };
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
inherit (self) composable_kernel;
inherit (llvm) rocm-merged-llvm;
};
half = self.callPackage ./half { };
miopen = self.callPackage ./miopen {
boost = boost179.override { enableStatic = true; };
};
miopen-hip = self.miopen;
migraphx = self.callPackage ./migraphx { };
rpp = self.callPackage ./rpp { };
rpp-hip = self.rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = self.rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = self.rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = self.callPackage ./mivisionx {
opencv = opencv.override { enablePython = true; };
# TODO: Remove this pin in ROCm 6.4+
# FFMPEG support was improved in https://github.com/ROCm/MIVisionX/pull/1460
ffmpeg = ffmpeg_4;
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
};
# overwrite all patches, since patches for newer version do not apply
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
mivisionx-hip = self.mivisionx.override {
rpp = self.rpp-hip;
useOpenCL = false;
useCPU = false;
};
mivisionx-cpu = self.mivisionx.override {
rpp = self.rpp-cpu;
useOpenCL = false;
useCPU = true;
};
# Even if config.rocmSupport is false we need rocmSupport true
# version of ucc/ucx in openmpi in this package set
openmpi = openmpi-orig.override (
prev:
let
ucx = prev.ucx.override {
enableCuda = false;
enableRocm = true;
};
in
{
inherit ucx;
ucc = prev.ucc.override {
enableCuda = false;
inherit ucx;
};
}
);
mpi = self.openmpi;
triton-llvm = triton-llvm.overrideAttrs {
src = fetchFromGitHub {
owner = "llvm";
repo = "llvm-project";
# make sure this matches triton llvm rel branch hash for now
# https://github.com/triton-lang/triton/blob/release/3.2.x/cmake/llvm-hash.txt
rev = "86b69c31642e98f8357df62c09d118ad1da4e16a";
hash = "sha256-W/mQwaLGx6/rIBjdzUTIbWrvGjdh7m4s15f70fQ1/hE=";
};
pname = "triton-llvm-rocm";
patches = [ ]; # FIXME: https://github.com/llvm/llvm-project//commit/84837e3cc1cf17ed71580e3ea38299ed2bfaa5f6.patch doesn't apply, may need to rebase
};
triton = pyPackages.callPackage ./triton { rocmPackages = self; };
## Meta ##
# Emulate common ROCm meta layout
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = with self; rec {
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
paths = [
hsa-amd-aqlprofile-bin
rocm-core
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-ml-sdk = symlinkJoin {
name = "rocm-ml-sdk-meta";
paths = [
rocm-core
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
rocm-ml-libraries = symlinkJoin {
name = "rocm-ml-libraries-meta";
paths = [
llvm.clang
llvm.mlir
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-hip-sdk = symlinkJoin {
name = "rocm-hip-sdk-meta";
paths = [
rocprim
rocalution
hipfft
rocm-core
hipcub
hipblas
hipblaslt
rocrand
rocfft
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
rocm-hip-libraries = symlinkJoin {
name = "rocm-hip-libraries-meta";
paths = [
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
hipblaslt
rocfft
hipfft
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
rocm-openmp-sdk = symlinkJoin {
name = "rocm-openmp-sdk-meta";
paths = [
rocm-core
llvm.clang
llvm.mlir
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
rocm-opencl-sdk = symlinkJoin {
name = "rocm-opencl-sdk-meta";
paths = [
rocm-core
rocm-runtime
clr
clr.icd
rocm-opencl-runtime
];
};
rocm-opencl-runtime = symlinkJoin {
name = "rocm-opencl-runtime-meta";
paths = [
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocm-hip-runtime-devel = symlinkJoin {
name = "rocm-hip-runtime-devel-meta";
paths = [
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.mlir
llvm.openmp
rocm-runtime
rocm-hip-runtime
];
};
rocm-hip-runtime = symlinkJoin {
name = "rocm-hip-runtime-meta";
paths = [
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocm-language-runtime = symlinkJoin {
name = "rocm-language-runtime-meta";
paths = [
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
rocm-all = symlinkJoin {
name = "rocm-all-meta";
paths = [
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
rocm-tests = self.callPackage ./rocm-tests {
rocmPackages = self;
};
}
// lib.optionalAttrs config.allowAliases {
rocm-thunk = throw ''
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
''; # Added 2025-3-16
clang-ocl = throw ''
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
''; # Added 2025-3-16
miopengemm = throw ''
'miopengemm' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
miopen-opencl = throw ''
'miopen-opencl' has been deprecated.
It is still available for some time as part of rocmPackages_5.
''; # Added 2024-3-3
mivisionx-opencl = throw ''
'mivisionx-opencl' has been deprecated.
Other versions of mivisionx are still available.
It is also still available for some time as part of rocmPackages_5.
''; # Added 2024-3-24
}
);
scopeForArches =
arches:
outer.overrideScope (
_final: prev: {
clr = prev.clr.override {
localGpuTargets = arches;
};
}
);
in
outer
// builtins.listToAttrs (
builtins.map (arch: {
name = arch;
value = scopeForArches [ arch ];
}) outer.clr.gpuTargets
)
// {
gfx9 = scopeForArches [
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
];
gfx10 = scopeForArches [
"gfx1010"
"gfx1030"
];
gfx11 = scopeForArches [
"gfx1100"
"gfx1101"
"gfx1102"
];
}

View file

@ -1,20 +1,21 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "half";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "half";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-wvl8ny7pbY9hUGGtJ70R7/4YIsahgI7qcVzUnxmUfZM=";
hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
};
nativeBuildInputs = [
@ -24,8 +25,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -34,6 +35,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.unix;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,18 +1,19 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hip-common";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIP";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-51u3By0R4LKoWiklNacFP6HILL845jxpN6FD7rQB+zQ=";
hash = "sha256-eKDbuG57KCg/nZpy+RcDsjyw0pP68qO9K/3ZpSPuMNw=";
};
dontConfigure = true;
@ -29,8 +30,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -39,6 +40,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,37 @@
{
lib,
stdenv,
cmake,
fetchFromGitHub,
rocm-cmake,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas-common";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLAS-common";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-tvNz4ymQ1y3YSUQxAtNu2who79QzSKR+3JEevr+GDWo=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Common files shared by hipBLAS and hipBLASLt";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
})

View file

@ -2,13 +2,17 @@
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gfortran,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocprim,
gtest,
lapack-reference,
buildTests ? false,
@ -19,7 +23,7 @@
# Can also use cuBLAS
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -39,9 +43,23 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "hipBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Fq7o2sMmHlHIv9UKJw+u/h9K/ZhKVJWwosYTdYIsscA=";
hash = "sha256-Rz1KAhBUbvErHTF2PM1AkVhqo4OHldfSNMSpp5Tx9yk=";
};
patches = [
# https://github.com/ROCm/hipBLAS/pull/952
(fetchpatch {
name = "transitively-depend-hipblas-common.patch";
url = "https://github.com/ROCm/hipBLAS/commit/54220fdaebf0fb4fd0921ee9e418ace5b143ec8f.patch";
hash = "sha256-MFEhv8Bkrd2zD0FFIDg9oJzO7ztdyMAF+R9oYA0rmwQ=";
})
];
postPatch = ''
substituteInPlace library/CMakeLists.txt \
--replace-fail "find_package(Git REQUIRED)" ""
'';
nativeBuildInputs = [
cmake
rocm-cmake
@ -49,9 +67,13 @@ stdenv.mkDerivation (finalAttrs: {
gfortran
];
propagatedBuildInputs = [ hipblas-common ];
buildInputs =
[
rocblas
rocprim
rocsparse
rocsolver
]
++ lib.optionals buildTests [
@ -63,13 +85,16 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_CXX_COMPILER=${lib.getExe' clr "hipcc"}"
# Upstream is migrating to amdclang++, it is likely this will be correct in next version bump
#"-DCMAKE_CXX_COMPILER=${lib.getBin clr}/bin/amdclang++"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
@ -100,8 +125,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -110,8 +135,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,235 @@
{
lib,
stdenv,
fetchpatch,
fetchFromGitHub,
cmake,
rocm-cmake,
clr,
gfortran,
gtest,
msgpack,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
tensile,
lapack-reference,
ncurses,
libffi,
zlib,
zstd,
rocmUpdateScript,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
# hipblaslt supports only devices with MFMA or WMMA
# WMMA on gfx1100 may be broken
# MFMA on MI100 may be broken
# MI200/MI300 known to work
gpuTargets ? (
clr.localGpuTargets or [
# "gfx908" FIXME: confirm MFMA on MI100 works
"gfx90a"
"gfx942"
# "gfx1100" FIXME: confirm WMMA targets work
]
),
}:
stdenv.mkDerivation (
finalAttrs:
let
supportsTargetArches =
(builtins.any (lib.strings.hasPrefix "gfx9") gpuTargets)
|| (builtins.any (lib.strings.hasPrefix "gfx11") gpuTargets);
tensile' = (tensile.override { isTensileLite = true; }).overrideAttrs {
inherit (finalAttrs) src;
sourceRoot = "${finalAttrs.src.name}/tensilelite";
};
py = python3.withPackages (ps: [
ps.pyyaml
ps.setuptools
ps.packaging
]);
gpuTargets' = lib.optionalString supportsTargetArches (lib.concatStringsSep ";" gpuTargets);
compiler = "amdclang++";
cFlags = "-O3 -I${msgpack}/include"; # FIXME: cmake files need patched to include this properly
in
{
pname = "hipblaslt${clr.gpuArchSuffix}";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLASLt";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-ozfHwsxcczzYXN9SIkyfRvdtaCqlDN4bh3UHZNS2oVQ=";
};
env.CXX = compiler;
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
env.ROCM_PATH = "${clr}";
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
# Some tensile scripts look for this as an env var rather than a cmake flag
env.CMAKE_CXX_COMPILER = lib.getExe' clr "amdclang++";
requiredSystemFeatures = [ "big-parallel" ];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
postPatch = ''
mkdir -p build/Tensile/library
# git isn't needed and we have no .git
substituteInPlace cmake/Dependencies.cmake \
--replace-fail "find_package(Git REQUIRED)" ""
substituteInPlace CMakeLists.txt \
--replace-fail "include(virtualenv)" "" \
--replace-fail "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" \
--replace-fail "virtualenv_install(\''${CMAKE_SOURCE_DIR}/tensilelite)" "" \
--replace-fail 'find_package(Tensile 4.33.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "''${INSTALLED_TENSILE_PATH}")' "find_package(Tensile)" \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
# FIXME: TensileCreateExtOpLibraries build failure due to unsupported null operand
# Working around for now by disabling the ExtOp libs
substituteInPlace library/src/amd_detail/rocblaslt/src/CMakeLists.txt \
--replace-fail 'TensileCreateExtOpLibraries("' '# skipping TensileCreateExtOpLibraries'
substituteInPlace library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh \
--replace-fail '${"\${rocm_path}"}/bin/' ""
'';
# Apply patches to allow building without a target arch if we need to do that
patches = lib.optionals (!supportsTargetArches) [
# Add ability to build without specitying any arch.
(fetchpatch {
sha256 = "sha256-VW3bPzmQvfo8+iKsVfpn4sbqAe41fLzCEUfBh9JxVyk=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.1.1-no-arch.patch";
})
# Followup to above patch for 6.3.x
(fetchpatch {
sha256 = "sha256-GCsrne6BiWzwj8TMAfFuaYz1Pij97hoCc6E3qJhWb10=";
url = "https://raw.githubusercontent.com/gentoo/gentoo/refs/heads/master/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch";
})
];
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
py
clr
gfortran
# need make to get streaming console output so nix knows build is still running
# so deliberately not using ninja
];
buildInputs =
[
hipblas-common
tensile'
openmp
libffi
ncurses
# Tensile deps - not optional, building without tensile isn't actually supported
msgpack # FIXME: not included in cmake!
libxml2
python3Packages.msgpack
python3Packages.joblib
zlib
zstd
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags =
[
"-Wno-dev"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DVIRTUALENV_PYTHON_EXENAME=${lib.getExe py}"
"-DTENSILE_USE_HIP=ON"
"-DTENSILE_BUILD_CLIENT=OFF"
"-DTENSILE_USE_FLOAT16_BUILTIN=ON"
"-DCMAKE_CXX_COMPILER=${compiler}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DHIPBLASLT_ENABLE_MARKER=Off"
# FIXME what are the implications of hardcoding this?
"-DTensile_CODE_OBJECT_VERSION=V5"
"-DTensile_COMPILER=${compiler}"
"-DAMDGPU_TARGETS=${gpuTargets'}"
"-DGPU_TARGETS=${gpuTargets'}"
"-DTensile_LIBRARY_FORMAT=msgpack"
]
++ lib.optionals (!supportsTargetArches) [
"-DBUILD_WITH_TENSILE=OFF"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
# If this is false there are no kernels in the output lib
# and it's useless at runtime
# so if it's an optional dep it's best to not depend on it
# Some packages like torch need hipblaslt to compile
# and are fine ignoring it at runtime if it's not supported
# so we have to support building an empty hipblaslt
passthru.supportsTargetArches = supportsTargetArches;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner repo;
};
passthru.tensilelite = tensile';
meta = with lib; {
description = "hipBLASLt is a library that provides general matrix-matrix operations with a flexible API";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
}
)

View file

@ -0,0 +1,39 @@
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Fri, 11 Oct 2024 02:56:22 -0700
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
platforms
Fixes UB when hipBinUtilPtr_ is used.
---
amd/hipcc/src/hipBin_amd.h | 1 -
amd/hipcc/src/hipBin_nvidia.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
index 0a782d1beab9..36cd625ae8bc 100644
--- a/src/hipBin_amd.h
+++ b/src/hipBin_amd.h
@@ -42,7 +42,6 @@ THE SOFTWARE.
class HipBinAmd : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string hipClangPath_ = "";
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
PlatformInfo platformInfoAMD_;
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
index ff142cc1cea2..09b7b80979c7 100644
--- a/src/hipBin_nvidia.h
+++ b/src/hipBin_nvidia.h
@@ -31,7 +31,6 @@ THE SOFTWARE.
class HipBinNvidia : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string cudaPath_ = "";
PlatformInfo platformInfoNV_;
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
--
2.46.0

View file

@ -1,49 +1,46 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
rocm-merged-llvm,
cmake,
lsb-release,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipcc";
version = "6.0.2";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIPCC";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-/LRQN+RSMBPk2jS/tdp3psUL/B0RJZQhRri7e67KsG4=";
};
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-merged-llvm ];
patches = [
# https://github.com/ROCm/llvm-project/pull/183
# Fixes always-invoked UB in hipcc
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
];
postPatch = ''
substituteInPlace src/hipBin_amd.h \
--replace "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
--replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
'';
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
];
postInstall = ''
rm -r $out/hip/bin
ln -s $out/bin $out/hip/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; {
description = "Compiler driver utility that calls clang or nvcc";
homepage = "https://github.com/ROCm/HIPCC";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,36 +1,40 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
, rocprim
, clr
, gtest
, gbenchmark
, buildTests ? false
, buildBenchmarks ? false
, gpuTargets ? [ ]
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
# CUB can also be used as a backend instead of rocPRIM.
stdenv.mkDerivation (finalAttrs: {
pname = "hipcub";
version = "6.0.2";
version = "6.3.3";
outputs = [
"out"
] ++ lib.optionals buildTests [
"test"
] ++ lib.optionals buildBenchmarks [
"benchmark"
];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipCUB";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-8QzVgj0JSb86zEG3sj5AAt9pG3frw+xrjEOTo7xCIrc=";
hash = "sha256-uECOQWG9C64tg5YZdm9/3+fZXaZVGslu8vElK3m23GY=";
};
nativeBuildInputs = [
@ -39,44 +43,53 @@ stdenv.mkDerivation (finalAttrs: {
clr
];
buildInputs = [
rocprim
] ++ lib.optionals buildTests [
gtest
] ++ lib.optionals buildBenchmarks [
gbenchmark
];
buildInputs =
[
rocprim
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
] ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
] ++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
'' + lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
'' + lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -85,6 +98,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ bsd3 ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -22,7 +22,7 @@
# Can also use cuFFT
stdenv.mkDerivation (finalAttrs: {
pname = "hipfft";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -42,7 +42,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "hipFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-DjjNQryJdl7RmaMQRQPWkleweEWMIwH/xXU84GGjoC0=";
hash = "sha256-Jq/YHEtOo7a0/Ki7gxZATKmSqPU6cyLf5gx3A4MAZNw=";
fetchSubmodules = true;
};
@ -111,8 +111,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -121,8 +121,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,21 +1,22 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
, gfortran
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
gfortran,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipfort";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipfort";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-3PIqSDyDlY0oVSEx20EPlKGYNkc9xPZtIG3Sbw69esE=";
hash = "sha256-V5XDNM0bYHKnpkcnaDyxIS1zwsgaByJj+znFxJ6VxR0=";
};
nativeBuildInputs = [
@ -51,8 +52,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -61,6 +62,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,39 +1,57 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, clang
, libxml2
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
libxml2,
rocm-merged-llvm,
zlib,
zstd,
perl,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipify";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIPIFY";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-nNyWrPPhUwT7FyASzc3kf5NCTzeqvHybVOc+6hBzkA4=";
hash = "sha256-o/1LNsNtAyQcSug1gf7ujGNRRbvC33kwldrJKZi2LA0=";
};
nativeBuildInputs = [ cmake ];
buildInputs = [ libxml2 ];
nativeBuildInputs = [
cmake
];
buildInputs = [
libxml2
rocm-merged-llvm
zlib
zstd
perl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
chmod +x bin/*
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
postInstall = ''
patchShebangs $out/bin
chmod +x $out/bin/*
chmod +x $out/libexec/*
patchShebangs $out/bin/
patchShebangs $out/libexec/
'';
meta = with lib; {
@ -42,6 +60,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -14,7 +14,7 @@
stdenv.mkDerivation (finalAttrs: {
pname = "hiprand";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -28,7 +28,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "hipRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uGHzOhUX5JEknVFwhHhWFdPmwLS/TuaXYMeItS7tXIg=";
hash = "sha256-TVc+qFwRiS5tAo1OKI1Wu5hadlwPZmSVZ9SvVvH1w7Y=";
};
nativeBuildInputs = [
@ -41,8 +41,6 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
@ -67,8 +65,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -77,8 +75,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -9,6 +9,8 @@
gfortran,
rocblas,
rocsolver,
rocsparse,
suitesparse,
gtest,
lapack-reference,
buildTests ? false,
@ -19,7 +21,7 @@
# Can also use cuSOLVER
stdenv.mkDerivation (finalAttrs: {
pname = "hipsolver";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -39,7 +41,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "hipSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-iMfaOv4TdTkmaRHCZOuqUfjO081J6on71+s8nIwwV00=";
hash = "sha256-ZQUKU3L4DgZ5zM7pCYEix0ulRkl78x/5wJnyCndTAwk=";
};
nativeBuildInputs = [
@ -53,6 +55,8 @@ stdenv.mkDerivation (finalAttrs: {
[
rocblas
rocsolver
rocsparse
suitesparse
]
++ lib.optionals buildTests [
gtest
@ -63,13 +67,13 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
@ -101,8 +105,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -111,8 +115,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -12,6 +12,7 @@
gtest,
openmp,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
@ -19,7 +20,7 @@
# This can also use cuSPARSE as a backend instead of rocSPARSE
stdenv.mkDerivation (finalAttrs: {
pname = "hipsparse";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -36,7 +37,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "hipSPARSE";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-fi5b0IF++OiezpM3JuUkhwpmW2apeFH4r5g6CcFseNY=";
hash = "sha256-3a7fKpYyiqG3aGOg7YrTHmKoH4rgTVLD16DvrZ3YY1g=";
};
nativeBuildInputs = [
@ -51,7 +52,7 @@ stdenv.mkDerivation (finalAttrs: {
rocsparse
git
]
++ lib.optionals buildTests [
++ lib.optionals (buildTests || buildBenchmarks) [
gtest
]
++ lib.optionals (buildTests || buildSamples) [
@ -60,20 +61,17 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
];
# We have to manually generate the matrices
@ -140,8 +138,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -150,8 +148,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -4,22 +4,25 @@
fetchurl,
callPackage,
dpkg,
rocm-core,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hsa-amd-aqlprofile-bin";
version = "6.0.2";
version = "6.3.3";
src =
let
version = finalAttrs.version;
dotless = builtins.replaceStrings [ "." ] [ "0" ] version;
incremental = "115";
inherit (finalAttrs) version;
patch = rocm-core.ROCM_LIBPATCH_VERSION;
majorMinor = lib.versions.majorMinor version;
poolVersion = if majorMinor + ".0" == version then majorMinor else version;
incremental = "74";
osRelease = "22.04";
in
fetchurl {
url = "https://repo.radeon.com/rocm/apt/${version}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${dotless}.${dotless}-${incremental}~${osRelease}_amd64.deb";
hash = "sha256-0XeKUKaof5pSMS/UgLwumBDBYgyH/pCex9jViUKENXY=";
url = "https://repo.radeon.com/rocm/apt/${poolVersion}/pool/main/h/hsa-amd-aqlprofile/hsa-amd-aqlprofile_1.0.0.${patch}-${incremental}~${osRelease}_amd64.deb";
hash = "sha256-Lo6gU9ywkujtsKvnOAwL3L8qQNPwjjm0Pm4OyzoUYao=";
};
nativeBuildInputs = [ dpkg ];
@ -31,7 +34,7 @@ stdenv.mkDerivation (finalAttrs: {
runHook preInstall
mkdir -p $out
cp -a opt/rocm-${finalAttrs.version}/* $out
cp -a opt/rocm-${finalAttrs.version}*/* $out
chmod +x $out/lib/libhsa-amd-aqlprofile64.so.1.*
chmod +x $out/lib/hsa-amd-aqlprofile/librocprofv2_att.so
@ -46,8 +49,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ unfree ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,216 +0,0 @@
{
lib,
stdenv,
gcc12Stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
cmake,
ninja,
git,
doxygen,
sphinx,
lit,
libxml2,
libxcrypt,
libedit,
libffi,
mpfr,
zlib,
ncurses,
python3Packages,
buildDocs ? true,
buildMan ? true,
buildTests ? true,
targetName ? "llvm",
targetDir ? "llvm",
targetProjects ? [ ],
targetRuntimes ? [ ],
llvmTargetsToBuild ? [ "NATIVE" ], # "NATIVE" resolves into x86 or aarch64 depending on stdenv
extraPatches ? [ ],
extraNativeBuildInputs ? [ ],
extraBuildInputs ? [ ],
extraCMakeFlags ? [ ],
extraPostPatch ? "",
checkTargets ? [
(lib.optionalString buildTests (if targetDir == "runtimes" then "check-runtimes" else "check-all"))
],
extraPostInstall ? "",
hardeningDisable ? [ ],
requiredSystemFeatures ? [ ],
extraLicenses ? [ ],
isBroken ? false,
}:
let
stdenv' = stdenv;
in
let
stdenv =
if stdenv'.cc.cc.isGNU or false && lib.versionAtLeast stdenv'.cc.cc.version "13.0" then
gcc12Stdenv
else
stdenv';
in
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
inferNativeTarget = t: if t == "NATIVE" then llvmNativeTarget else t;
llvmTargetsToBuild' = [ "AMDGPU" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-llvm-${targetName}";
version = "6.0.2";
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildMan [
"man"
"info" # Avoid `attribute 'info' missing` when using with wrapCC
];
patches = extraPatches;
src = fetchFromGitHub {
owner = "ROCm";
repo = "llvm-project";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uGxalrwMNCOSqSFVrYUBi3ijkMEFFTrzFImmvZKQf6I=";
};
nativeBuildInputs =
[
pkg-config
cmake
ninja
git
(python3Packages.python.withPackages (p: [ p.setuptools ]))
]
++ lib.optionals (buildDocs || buildMan) [
doxygen
sphinx
python3Packages.recommonmark
]
++ lib.optionals (buildTests && !finalAttrs.passthru.isLLVM) [
lit
]
++ extraNativeBuildInputs;
buildInputs = [
libxml2
libxcrypt
libedit
libffi
mpfr
] ++ extraBuildInputs;
propagatedBuildInputs = lib.optionals finalAttrs.passthru.isLLVM [
zlib
ncurses
];
sourceRoot = "${finalAttrs.src.name}/${targetDir}";
cmakeFlags =
[
"-DLLVM_TARGETS_TO_BUILD=${builtins.concatStringsSep ";" llvmTargetsToBuild'}"
]
++ lib.optionals (finalAttrs.passthru.isLLVM && targetProjects != [ ]) [
"-DLLVM_ENABLE_PROJECTS=${lib.concatStringsSep ";" targetProjects}"
]
++
lib.optionals ((finalAttrs.passthru.isLLVM || targetDir == "runtimes") && targetRuntimes != [ ])
[
"-DLLVM_ENABLE_RUNTIMES=${lib.concatStringsSep ";" targetRuntimes}"
]
++ lib.optionals finalAttrs.passthru.isLLVM [
"-DLLVM_INSTALL_UTILS=ON"
"-DLLVM_INSTALL_GTEST=ON"
]
++ lib.optionals (buildDocs || buildMan) [
"-DLLVM_INCLUDE_DOCS=ON"
"-DLLVM_BUILD_DOCS=ON"
# "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core
"-DLLVM_ENABLE_SPHINX=ON"
"-DSPHINX_OUTPUT_HTML=ON"
"-DSPHINX_OUTPUT_MAN=ON"
"-DSPHINX_WARNINGS_AS_ERRORS=OFF"
]
++ lib.optionals buildTests [
"-DLLVM_INCLUDE_TESTS=ON"
"-DLLVM_BUILD_TESTS=ON"
"-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped"
]
++ extraCMakeFlags;
prePatch = ''
cd ../
chmod -R u+w .
'';
postPatch =
''
cd ${targetDir}
''
+ lib.optionalString finalAttrs.passthru.isLLVM ''
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
''
+ lib.optionalString (buildTests && finalAttrs.passthru.isLLVM) ''
# FileSystem permissions tests fail with various special bits
rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test
rm unittests/Support/Path.cpp
substituteInPlace unittests/Support/CMakeLists.txt \
--replace-fail "Path.cpp" ""
''
+ extraPostPatch;
doCheck = buildTests;
checkTarget = lib.concatStringsSep " " checkTargets;
postInstall =
lib.optionalString buildMan ''
mkdir -p $info
''
+ extraPostInstall;
passthru = {
isLLVM = targetDir == "llvm";
isClang = targetDir == "clang" || builtins.elem "clang" targetProjects;
isROCm = true;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
};
inherit hardeningDisable requiredSystemFeatures;
meta = with lib; {
description = "ROCm fork of the LLVM compiler infrastructure";
homepage = "https://github.com/ROCm/llvm-project";
license = with licenses; [ ncsa ] ++ extraLicenses;
maintainers =
with maintainers;
[
acowley
lovesegfault
]
++ teams.rocm.members;
platforms = platforms.linux;
broken = isBroken || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,23 @@
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
index 57368104c914..71c57f72078e 100644
--- a/lib/Driver/ToolChains/Linux.cpp
+++ b/lib/Driver/ToolChains/Linux.cpp
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
return;
// LOCAL_INCLUDE_DIR
+ if (!SysRoot.empty())
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
// TOOL_INCLUDE_DIR
AddMultilibIncludeArgs(DriverArgs, CC1Args);
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
// Add an include of '/include' directly. This isn't provided by default by
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
// add even when Clang is acting as-if it were a system compiler.
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())

View file

@ -0,0 +1,40 @@
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 06f5e7e7e335..8407d664886a 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -340,6 +340,9 @@ private:
void Compilation::ExecuteJobs(const JobList &Jobs,
FailingCommandList &FailingCommands,
bool LogOnly) const {
+ // If >1 job, log as each job finishes so can see progress while building many offloads
+ const bool logJobs = Jobs.size() > 1;
+ auto start_time = std::chrono::steady_clock::now();
// According to UNIX standard, driver need to continue compiling all the
// inputs on the command line even one of them failed.
// In all but CLMode, execute all the jobs unless the necessary inputs for the
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
JS.setJobState(Next, JobScheduler::JS_RUN);
auto Work = [&, Next]() {
+ auto job_start_time = std::chrono::steady_clock::now();
const Command *FailingCommand = nullptr;
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
JS.setJobState(Next, JobScheduler::JS_FAIL);
} else {
+ if (logJobs && Next) {
+ auto now = std::chrono::steady_clock::now();
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
+ if (duration > 10 && job_duration > 0) {
+ if (Next->getOutputFilenames().empty())
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
+ else
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
+ }
+ }
+
JS.setJobState(Next, JobScheduler::JS_DONE);
}
};

View file

@ -1,142 +1,520 @@
{
# stdenv FIXME: Try changing back to this with a new ROCm release https://github.com/NixOS/nixpkgs/issues/271943
gcc12Stdenv,
callPackage,
rocmUpdateScript,
wrapBintoolsWith,
lib,
stdenv,
llvmPackages_18,
overrideCC,
rocm-device-libs,
rocm-runtime,
rocm-thunk,
clr,
fetchFromGitHub,
runCommand,
symlinkJoin,
rdfind,
wrapBintoolsWith,
emptyDirectory,
zstd,
zlib,
gcc-unwrapped,
glibc,
substituteAll,
libffi,
libxml2,
removeReferencesTo,
fetchpatch,
# Build compilers and stdenv suitable for profiling
# compressed line tables (-g1 -gz) and
# frame pointers for sampling profilers (-fno-omit-frame-pointer -momit-leaf-frame-pointer)
# TODO: Should also apply to downstream packages which use rocmClangStdenv
profilableStdenv ? false,
}:
let
## Stage 1 ##
# Projects
llvm = callPackage ./stage-1/llvm.nix {
inherit rocmUpdateScript;
stdenv = gcc12Stdenv;
};
clang-unwrapped = callPackage ./stage-1/clang-unwrapped.nix {
inherit rocmUpdateScript llvm;
stdenv = gcc12Stdenv;
};
lld = callPackage ./stage-1/lld.nix {
inherit rocmUpdateScript llvm;
stdenv = gcc12Stdenv;
llvmPackagesNoBintools = llvmPackages_18.override {
bootBintools = null;
bootBintoolsNoLibc = null;
};
useLibcxx = false; # whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
# Runtimes
runtimes = callPackage ./stage-1/runtimes.nix {
inherit rocmUpdateScript llvm;
stdenv = gcc12Stdenv;
};
llvmStdenv = overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM;
llvmLibstdcxxStdenv = overrideCC llvmPackagesNoBintools.stdenv (
llvmPackagesNoBintools.libstdcxxClang.override {
inherit (llvmPackages_18) bintools;
}
);
stdenvToBuildRocmLlvm = if useLibcxx then llvmStdenv else llvmLibstdcxxStdenv;
gcc-include = runCommand "gcc-include" { } ''
mkdir -p $out
ln -s ${gcc-unwrapped}/include/ $out/
ln -s ${gcc-unwrapped}/lib/ $out/
'';
## Stage 2 ##
# Helpers
bintools-unwrapped = callPackage ./stage-2/bintools-unwrapped.nix { inherit llvm lld; };
bintools = wrapBintoolsWith { bintools = bintools-unwrapped; };
rStdenv = callPackage ./stage-2/rstdenv.nix {
inherit
llvm
clang-unwrapped
lld
runtimes
bintools
;
stdenv = gcc12Stdenv;
# A prefix for use as the GCC prefix when building rocmcxx
disallowedRefsForToolchain = [
stdenv.cc
stdenv.cc.cc
stdenv.cc.bintools
gcc-unwrapped
stdenvToBuildRocmLlvm
];
gcc-prefix =
let
gccPrefixPaths = [
gcc-unwrapped
gcc-unwrapped.lib
glibc.dev
];
in
symlinkJoin {
name = "gcc-prefix";
paths = gccPrefixPaths ++ [
glibc
];
disallowedRequisites = gccPrefixPaths;
postBuild = ''
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
mkdir /build/tmpout
mv $out/* /build/tmpout
cp -Lr --no-preserve=mode /build/tmpout/* $out/
set -x
versionedIncludePath="$(echo $out/include/c++/*/)"
mv $versionedIncludePath/* $out/include/c++/
rm -rf $versionedIncludePath/
find $out/lib -type f -exec ${removeReferencesTo}/bin/remove-references-to -t ${gcc-unwrapped.lib} {} +
ln -s $out $out/x86_64-unknown-linux-gnu
'';
};
version = "6.3.1";
# major version of this should be the clang version ROCm forked from
rocmLlvmVersion = "18.0.0-${llvmSrc.rev}";
usefulOutputs =
drv:
builtins.filter (x: x != null) [
drv
(drv.lib or null)
(drv.dev or null)
];
listUsefulOutputs = builtins.concatMap usefulOutputs;
llvmSrc = fetchFromGitHub {
# Performance improvements cherry-picked on top of rocm-6.3.x
# most importantly, amdgpu-early-alwaysinline memory usage fix
owner = "LunNova";
repo = "llvm-project-rocm";
rev = "4182046534deb851753f0d962146e5176f648893";
hash = "sha256-sPmYi1WiiAqnRnHVNba2nPUxGflBC01FWCTNLPlYF9c=";
};
llvmSrcFixed = llvmSrc;
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
# optionally using LLVM libcxx
llvmPackagesRocm = llvmPackages_18.override (_old: {
stdenv = stdenvToBuildRocmLlvm; # old.stdenv #llvmPackagesNoBintools.libcxxStdenv;
# not setting gitRelease = because that causes patch selection logic to use git patches
# ROCm LLVM is closer to 18 official
# gitRelease = {}; officialRelease = null;
officialRelease = { }; # Set but empty because we're overriding everything from it.
version = rocmLlvmVersion;
src = llvmSrcFixed;
monorepoSrc = llvmSrcFixed;
doCheck = false;
});
sysrootCompiler =
cc: name: paths:
let
linked = symlinkJoin { inherit name paths; };
in
runCommand name { } ''
set -x
mkdir -p $out/
cp --reflink=auto -rL ${linked}/* $out/
chmod -R +rw $out
mkdir -p $out/usr
ln -s $out/ $out/usr/local
mkdir -p $out/nix-support/
rm -rf $out/lib64 # we don't need mixed 32 bit
echo 'export CC=clang' >> $out/nix-support/setup-hook
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
# FIXME: could copy into a different subdir?
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
'';
findClangNostdlibincPatch =
x:
(
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (builtins.baseNameOf x))
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (builtins.baseNameOf x))
);
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
{
"x86_64" = "X86";
"aarch64" = "AArch64";
}
.${llvmStdenv.targetPlatform.parsed.cpu.name}
}";
# -ffat-lto-objects = emit LTO object files that are compatible with non-LTO-supporting builds too
# FatLTO objects are a special type of fat object file that contain LTO compatible IR in addition to generated object code,
# instead of containing object code for multiple target architectures. This allows users to defer the choice of whether to
# use LTO or not to link-time, and has been a feature available in other compilers, like GCC, for some time.
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
addGccLtoCmakeFlags = !llvmPackagesRocm.stdenv.cc.isClang;
llvmExtraCflags =
"-O3 -DNDEBUG -march=skylake -mtune=znver3"
+ (lib.optionalString addGccLtoCmakeFlags " -D_GLIBCXX_USE_CXX11_ABI=0 -flto -ffat-lto-objects -flto-compression-level=19 -Wl,-flto")
+ (lib.optionalString llvmPackagesRocm.stdenv.cc.isClang " -flto=thin -ffat-lto-objects")
+ (lib.optionalString profilableStdenv " -fno-omit-frame-pointer -momit-leaf-frame-pointer -gz -g1");
in
rec {
inherit
llvm
clang-unwrapped
lld
bintools
;
# Runtimes
libc = callPackage ./stage-2/libc.nix {
inherit rocmUpdateScript;
stdenv = rStdenv;
inherit (llvmPackagesRocm) libunwind;
inherit (llvmPackagesRocm) libcxx;
# Pass through original attrs for debugging where non-overridden llvm/clang is getting used
# llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
# clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig
llvm = (llvmPackagesRocm.llvm.override { ninja = emptyDirectory; }).overrideAttrs (old: {
dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env.NIX_BUILD_ID_STYLE = "fast";
postPatch = ''
${old.postPatch or ""}
patchShebangs lib/OffloadArch/make_generated_offload_arch_h.sh
'';
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_LIBCXX=ON"
];
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
});
lld =
(llvmPackagesRocm.lld.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(old: {
patches = builtins.filter (
x: !(lib.strings.hasSuffix "more-openbsd-program-headers.patch" (builtins.baseNameOf x))
) old.patches;
dontStrip = profilableStdenv;
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env.NIX_BUILD_ID_STYLE = "fast";
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LIBCXX=ON"
];
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure = ''
${old.preConfigure or ""}
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
});
clang-unwrapped =
(
(llvmPackagesRocm.clang-unwrapped.override {
libllvm = llvm;
ninja = emptyDirectory;
}).overrideAttrs
(
old:
let
filteredPatches = builtins.filter (x: !(findClangNostdlibincPatch x)) old.patches;
in
{
meta.platforms = [
"x86_64-linux"
];
pname = "${old.pname}-rocm";
patches = filteredPatches ++ [
# Never add FHS include paths
./clang-bodge-ignore-systemwide-incls.diff
# Prevents builds timing out if a single compiler invocation is very slow but
# per-arch jobs are completing by ensuring there's terminal output
./clang-log-jobs.diff
(fetchpatch {
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
relative = "clang";
})
# FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431
# Once we can switch to overrideScope this can be removed
(substituteAll {
src = ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch;
libllvmLibdir = "${llvm.lib}/lib";
})
];
nativeBuildInputs = old.nativeBuildInputs ++ [
llvmPackagesNoBintools.lld
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
dontStrip = profilableStdenv;
LDFLAGS = "-Wl,--build-id=sha1,--icf=all,--compress-debug-sections=zlib";
env = (old.env or { }) // {
NIX_BUILD_ID_STYLE = "fast";
};
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
requiredSystemFeatures = (old.requiredSystemFeatures or [ ]) ++ [ "big-parallel" ];
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ [
llvmTargetsFlag
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_THREADS=ON"
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_USE_LINKER=lld"
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" useLibcxx)
"-DCLANG_DEFAULT_CXX_STDLIB=${if useLibcxx then "libc++" else "libstdc++"}"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
"-DCMAKE_NM=${gcc-unwrapped}/bin/gcc-nm"
]
++ lib.optionals useLibcxx [
"-DLLVM_ENABLE_LTO=Thin"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_USE_LINKER=lld"
"-DCLANG_DEFAULT_RTLIB=compiler-rt"
]
++ lib.optionals (!useLibcxx) [
# FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
"-DGCC_INSTALL_PREFIX=${gcc-prefix}"
];
postFixup =
(old.postFixup or "")
+ ''
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
preConfigure =
(old.preConfigure or "")
+ ''
cmakeFlagsArray+=(
'-DCMAKE_C_FLAGS_RELEASE=${llvmExtraCflags}'
'-DCMAKE_CXX_FLAGS_RELEASE=${llvmExtraCflags}'
)
'';
}
)
)
// {
libllvm = llvm;
};
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
# in the right order
# and expects its libc to be in the sysroot
rocmcxx =
(sysrootCompiler clang-unwrapped "rocmcxx" (
listUsefulOutputs (
[
clang-unwrapped
bintools
compiler-rt
]
++ (lib.optionals useLibcxx [
libcxx
])
++ (lib.optionals (!useLibcxx) [
gcc-include
glibc
glibc.dev
])
)
))
// {
version = llvmMajorVersion;
cc = rocmcxx;
libllvm = llvm;
isClang = true;
isGNU = false;
};
clang-tools = llvmPackagesRocm.clang-tools.override {
inherit clang-unwrapped clang;
};
libunwind = callPackage ./stage-2/libunwind.nix {
inherit rocmUpdateScript;
stdenv = rStdenv;
};
libcxxabi = callPackage ./stage-2/libcxxabi.nix {
inherit rocmUpdateScript;
stdenv = rStdenv;
};
libcxx = callPackage ./stage-2/libcxx.nix {
inherit rocmUpdateScript;
stdenv = rStdenv;
};
compiler-rt = callPackage ./stage-2/compiler-rt.nix {
inherit rocmUpdateScript llvm;
stdenv = rStdenv;
compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
patches = old.patches ++ [
(fetchpatch {
name = "Fix-missing-main-function-in-float16-bfloat16-support-checks.patch";
url = "https://github.com/ROCm/llvm-project/commit/68d8b3846ab1e6550910f2a9a685690eee558af2.patch";
hash = "sha256-Db+L1HFMWVj4CrofsGbn5lnMoCzEcU+7q12KKFb17/g=";
relative = "compiler-rt";
})
];
});
compiler-rt = compiler-rt-libc;
bintools = wrapBintoolsWith {
bintools = llvmPackagesRocm.bintools-unwrapped.override {
inherit lld llvm;
};
};
## Stage 3 ##
# Helpers
clang = callPackage ./stage-3/clang.nix {
inherit
llvm
lld
clang-unwrapped
bintools
libc
libunwind
libcxxabi
libcxx
compiler-rt
;
stdenv = gcc12Stdenv;
clang = rocmcxx;
# Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
rocm-merged-llvm = symlinkJoin {
name = "rocm-llvm-merge";
paths =
[
llvm
llvm.dev
lld
lld.lib
lld.dev
libunwind
libunwind.dev
compiler-rt
compiler-rt.dev
rocmcxx
]
++ lib.optionals useLibcxx [
libcxx
libcxx.out
libcxx.dev
];
postBuild = builtins.unsafeDiscardStringContext ''
found_files=$(find $out -name '*.cmake')
if [ -z "$found_files" ]; then
>&2 echo "Error: No CMake files found in $out"
exit 1
fi
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
if grep "$target" $found_files; then
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
# exit 1
# # FIXME: enable this to reduce closure size
fi
done
'';
inherit version;
llvm-src = llvmSrc;
};
rocmClangStdenv = overrideCC gcc12Stdenv clang;
rocmClangStdenv = overrideCC (
if useLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
) clang;
# Projects
clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix {
inherit rocmUpdateScript llvm clang-unwrapped;
stdenv = rocmClangStdenv;
};
libclc = callPackage ./stage-3/libclc.nix {
inherit rocmUpdateScript llvm clang;
stdenv = rocmClangStdenv;
};
lldb = callPackage ./stage-3/lldb.nix {
inherit rocmUpdateScript clang;
stdenv = rocmClangStdenv;
};
mlir = callPackage ./stage-3/mlir.nix {
inherit rocmUpdateScript clr;
stdenv = rocmClangStdenv;
};
polly = callPackage ./stage-3/polly.nix {
inherit rocmUpdateScript;
stdenv = rocmClangStdenv;
};
flang = callPackage ./stage-3/flang.nix {
inherit rocmUpdateScript clang-unwrapped mlir;
stdenv = rocmClangStdenv;
};
openmp = callPackage ./stage-3/openmp.nix {
inherit
rocmUpdateScript
llvm
clang-unwrapped
clang
rocm-device-libs
rocm-runtime
rocm-thunk
;
stdenv = rocmClangStdenv;
};
# Runtimes
pstl = callPackage ./stage-3/pstl.nix {
inherit rocmUpdateScript;
stdenv = rocmClangStdenv;
};
openmp =
(llvmPackagesRocm.openmp.override {
stdenv = rocmClangStdenv;
llvm = rocm-merged-llvm;
targetLlvm = rocm-merged-llvm;
clang-unwrapped = clang;
}).overrideAttrs
(old: {
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ removeReferencesTo ];
cmakeFlags =
old.cmakeFlags
++ [
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
# OMPD support is broken in ROCm 6.3. Haven't investigated why.
"-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
"-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
]
++ lib.optionals addGccLtoCmakeFlags [
"-DCMAKE_AR=${gcc-unwrapped}/bin/gcc-ar"
"-DCMAKE_RANLIB=${gcc-unwrapped}/bin/gcc-ranlib"
];
env.LLVM = "${rocm-merged-llvm}";
env.LLVM_DIR = "${rocm-merged-llvm}";
buildInputs = old.buildInputs ++ [
rocm-device-libs
rocm-runtime
zlib
zstd
libxml2
libffi
];
});
}

View file

@ -1,48 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
targetName = "clang-unwrapped";
targetDir = "clang";
extraBuildInputs = [ llvm ];
extraCMakeFlags = [
"-DCLANG_INCLUDE_DOCS=ON"
"-DCLANG_INCLUDE_TESTS=ON"
];
extraPostPatch = ''
# Looks like they forgot to add finding libedit to the standalone build
ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules
substituteInPlace CMakeLists.txt \
--replace-fail "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)"
# `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'`
rm test/Analysis/scan-build/*.test
rm test/Analysis/scan-build/rebuild_index/rebuild_index.test
# `does not depend on a module exporting 'baz.h'`
rm test/Modules/header-attribs.cpp
# We do not have HIP or the ROCm stack available yet
rm test/Driver/hip-options.hip
# ???? `ld: cannot find crti.o: No such file or directory` linker issue?
rm test/Interpreter/dynamic-library.cpp
# `fatal error: 'stdio.h' file not found`
rm test/OpenMP/amdgcn_emit_llvm.c
'';
extraPostInstall = ''
mv bin/clang-tblgen $out/bin
'';
requiredSystemFeatures = [ "big-parallel" ];
}

View file

@ -1,15 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "lld";
targetDir = targetName;
extraBuildInputs = [ llvm ];
checkTargets = [ "check-${targetName}" ];
}

View file

@ -1,11 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
requiredSystemFeatures = [ "big-parallel" ];
isBroken = stdenv.hostPlatform.isAarch64; # https://github.com/ROCm/ROCm/issues/1831#issuecomment-1278205344
}

View file

@ -1,32 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false;
buildMan = false;
buildTests = false;
targetName = "runtimes";
targetDir = targetName;
targetRuntimes = [
"libunwind"
"libcxxabi"
"libcxx"
"compiler-rt"
];
extraBuildInputs = [ llvm ];
extraCMakeFlags = [
"-DLIBCXX_INCLUDE_BENCHMARKS=OFF"
"-DLIBCXX_CXX_ABI=libcxxabi"
];
extraLicenses = [ lib.licenses.mit ];
}

View file

@ -1,176 +0,0 @@
../libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp
../libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp
../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp
../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/construct.cxx2a.pass.cpp
../libcxx/test/libcxx/input.output/filesystems/class.directory_entry/directory_entry.mods/last_write_time.pass.cpp
../libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp
../libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
../libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/no_allocation.pass.cpp
../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_underaligned_buffer.pass.cpp
../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp
../libcxx/test/std/containers/associative/map/map.access/index_key.pass.cpp
../libcxx/test/std/containers/associative/map/map.access/index_rv_key.pass.cpp
../libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp
../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp
../libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp
../libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
../libcxx/test/std/containers/unord/unord.map/unord.map.elem/index.pass.cpp
../libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp
../libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp
../libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp
../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/path.decompose.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_normal.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_relative_and_proximate.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/clear.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/make_preferred.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/remove_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_extension.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_filename.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/swap.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.factory.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp
../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/swap.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_large.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_symlink/copy_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory_symlink/create_directory_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_symlink/create_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.proximate/proximate.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/remove_all.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove/remove.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.temp_dir_path/temp_directory_path.pass.cpp
../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
../libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp
../libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp
../libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp
../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp
../libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp
../libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/default.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
../libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
../libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
../libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.alg/swap.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp
../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp
../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp
../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp
../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.ctor/without_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_deallocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_initial_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_zero_sized_buffer.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.ctor/ctor_does_not_allocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp
../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate.pass.cpp
../libcxx/test/std/language.support/support.dynamic/hardware_inference_size.compile.pass.cpp
../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array14.pass.cpp
../libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete14.pass.cpp
../libcxx/test/libcxx/selftest/sh.cpp/empty.sh.cpp
../libcxx/test/libcxx/transitive_includes.sh.cpp

View file

@ -1,29 +0,0 @@
{
runCommand,
llvm,
lld,
}:
runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } ''
mkdir -p $out/bin
for prog in ${lld}/bin/*; do
ln -s $prog $out/bin/$(basename $prog)
done
for prog in ${llvm}/bin/*; do
ln -sf $prog $out/bin/$(basename $prog)
done
ln -s ${llvm}/bin/llvm-ar $out/bin/ar
ln -s ${llvm}/bin/llvm-as $out/bin/as
ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp
ln -s ${llvm}/bin/llvm-nm $out/bin/nm
ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy
ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump
ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib
ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf
ln -s ${llvm}/bin/llvm-size $out/bin/size
ln -s ${llvm}/bin/llvm-strip $out/bin/strip
ln -s ${lld}/bin/lld $out/bin/ld
''

View file

@ -1,64 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
glibc,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "compiler-rt";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
"libcxxabi"
"libcxx"
targetName
];
extraCMakeFlags = [
"-DCOMPILER_RT_INCLUDE_TESTS=ON"
"-DCOMPILER_RT_USE_LLVM_UNWINDER=ON"
"-DCOMPILER_RT_CXX_LIBRARY=libcxx"
"-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXXABI_INCLUDE_TESTS=OFF"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
"-DLIBCXXABI_INSTALL_LIBRARY=OFF"
"-DLIBCXXABI_INSTALL_HEADERS=OFF"
"-DLIBCXX_INCLUDE_DOCS=OFF"
"-DLIBCXX_INCLUDE_TESTS=OFF"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
"-DLIBCXX_INSTALL_LIBRARY=OFF"
"-DLIBCXX_INSTALL_HEADERS=OFF"
];
extraPostPatch = ''
# `No such file or directory: 'ldd'`
substituteInPlace ../compiler-rt/test/lit.common.cfg.py \
--replace "'ldd'," "'${glibc.bin}/bin/ldd',"
# We can run these
substituteInPlace ../compiler-rt/test/CMakeLists.txt \
--replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)"
# Could not launch llvm-config in /build/source/runtimes/build/bin
mkdir -p build/bin
ln -s ${llvm}/bin/llvm-config build/bin
'';
extraLicenses = [ lib.licenses.mit ];
}

View file

@ -1,27 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libc";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
extraPostPatch = ''
# `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:`
# We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working...
substituteInPlace ../libc/test/src/math/log10_test.cpp \
--replace-fail "i < N" "i < 0" \
--replace-fail "test(mpfr::RoundingMode::Nearest);" "" \
--replace-fail "test(mpfr::RoundingMode::Downward);" "" \
--replace-fail "test(mpfr::RoundingMode::Upward);" "" \
--replace-fail "test(mpfr::RoundingMode::TowardZero);" ""
'';
checkTargets = [ "check-${targetName}" ];
hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"`
}

View file

@ -1,43 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libcxx";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
"libcxxabi"
targetName
];
extraCMakeFlags = [
"-DLIBCXX_INCLUDE_DOCS=ON"
"-DLIBCXX_INCLUDE_TESTS=ON"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXXABI_INCLUDE_TESTS=OFF"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
"-DLIBCXXABI_INSTALL_LIBRARY=OFF"
"-DLIBCXXABI_INSTALL_HEADERS=OFF"
];
# Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered
extraPostPatch = ''
chmod +w -R ../libcxx/test/{libcxx,std}
cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm
'';
}

View file

@ -1,38 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "libcxxabi";
targetDir = "runtimes";
targetRuntimes = [
"libunwind"
targetName
"libcxx"
];
extraCMakeFlags = [
"-DLIBCXXABI_INCLUDE_TESTS=ON"
"-DLIBCXXABI_USE_LLVM_UNWINDER=ON"
"-DLIBCXXABI_USE_COMPILER_RT=ON"
# Workaround having to build combined
"-DLIBUNWIND_INCLUDE_DOCS=OFF"
"-DLIBUNWIND_INCLUDE_TESTS=OFF"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
"-DLIBUNWIND_INSTALL_LIBRARY=OFF"
"-DLIBUNWIND_INSTALL_HEADERS=OFF"
"-DLIBCXX_INCLUDE_DOCS=OFF"
"-DLIBCXX_INCLUDE_TESTS=OFF"
"-DLIBCXX_USE_COMPILER_RT=ON"
"-DLIBCXX_CXX_ABI=libcxxabi"
"-DLIBCXX_INSTALL_LIBRARY=OFF"
"-DLIBCXX_INSTALL_HEADERS=OFF"
];
}

View file

@ -1,27 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildMan = false; # No man pages to build
targetName = "libunwind";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
extraCMakeFlags = [
"-DLIBUNWIND_INCLUDE_DOCS=ON"
"-DLIBUNWIND_INCLUDE_TESTS=ON"
"-DLIBUNWIND_USE_COMPILER_RT=ON"
];
extraPostPatch = ''
# `command had no output on stdout or stderr` (Says these unsupported tests)
chmod +w -R ../libunwind/test
rm ../libunwind/test/floatregister.pass.cpp
rm ../libunwind/test/unwind_leaffunction.pass.cpp
rm ../libunwind/test/libunwind_02.pass.cpp
'';
}

View file

@ -1,37 +0,0 @@
{
stdenv,
overrideCC,
wrapCCWith,
llvm,
clang-unwrapped,
lld,
runtimes,
bintools,
}:
overrideCC stdenv (wrapCCWith rec {
inherit bintools;
libcxx = runtimes;
cc = clang-unwrapped;
gccForLibs = stdenv.cc.cc;
extraPackages = [
llvm
lld
];
nixSupport.cc-cflags = [
"-resource-dir=$out/resource-root"
"-fuse-ld=lld"
"-rtlib=compiler-rt"
"-unwindlib=libunwind"
"-Wno-unused-command-line-argument"
];
extraBuildCommands = ''
clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/resource-root
ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root
ln -s ${runtimes}/lib $out/resource-root
'';
})

View file

@ -1,122 +0,0 @@
runtime/test/tasking/hidden_helper_task/gtid.cpp
runtime/test/ompt/parallel/parallel_if0.c
runtime/test/ompt/parallel/serialized.c
runtime/test/ompt/teams/parallel_team.c
runtime/test/ompt/teams/serial_teams.c
runtime/test/ompt/teams/serialized.c
runtime/test/ompt/teams/team.c
libomptarget/test/api/assert.c
libomptarget/test/api/omp_device_managed_memory.c
libomptarget/test/api/omp_device_memory.c
libomptarget/test/api/omp_get_device_num.c
libomptarget/test/api/omp_host_pinned_memory.c
libomptarget/test/api/omp_host_pinned_memory_alloc.c
libomptarget/test/api/omp_target_memcpy_async1.c
libomptarget/test/api/omp_target_memcpy_async2.c
libomptarget/test/api/omp_target_memcpy_rect_async1.c
libomptarget/test/api/omp_target_memcpy_rect_async2.c
libomptarget/test/mapping/array_section_implicit_capture.c
libomptarget/test/mapping/data_absent_at_exit.c
libomptarget/test/mapping/data_member_ref.cpp
libomptarget/test/mapping/declare_mapper_api.cpp
libomptarget/test/mapping/declare_mapper_target.cpp
libomptarget/test/mapping/declare_mapper_target_data.cpp
libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
libomptarget/test/mapping/firstprivate_aligned.cpp
libomptarget/test/mapping/has_device_addr.cpp
libomptarget/test/mapping/implicit_device_ptr.c
libomptarget/test/mapping/is_device_ptr.cpp
libomptarget/test/mapping/lambda_mapping.cpp
libomptarget/test/mapping/low_alignment.c
libomptarget/test/mapping/map_back_race.cpp
libomptarget/test/mapping/power_of_two_alignment.c
libomptarget/test/mapping/pr38704.c
libomptarget/test/mapping/prelock.cpp
libomptarget/test/mapping/present/target_data_at_exit.c
libomptarget/test/mapping/private_mapping.c
libomptarget/test/mapping/ptr_and_obj_motion.c
libomptarget/test/mapping/reduction_implicit_map.cpp
libomptarget/test/mapping/target_derefence_array_pointrs.cpp
libomptarget/test/mapping/target_map_for_member_data.cpp
libomptarget/test/mapping/target_update_array_extension.c
libomptarget/test/mapping/target_use_device_addr.c
libomptarget/test/offloading/atomic-compare-signedness.c
libomptarget/test/offloading/bug47654.cpp
libomptarget/test/offloading/bug49021.cpp
libomptarget/test/offloading/bug49779.cpp
libomptarget/test/offloading/bug50022.cpp
libomptarget/test/offloading/bug51781.c
libomptarget/test/offloading/bug51982.c
libomptarget/test/offloading/bug53727.cpp
libomptarget/test/offloading/complex_reduction.cpp
libomptarget/test/offloading/cuda_no_devices.c
libomptarget/test/offloading/d2d_memcpy.c
libomptarget/test/offloading/dynamic_module.c
libomptarget/test/offloading/dynamic_module_load.c
libomptarget/test/offloading/global_constructor.cpp
libomptarget/test/offloading/lone_target_exit_data.c
libomptarget/test/offloading/memory_manager.cpp
libomptarget/test/offloading/parallel_offloading_map.cpp
libomptarget/test/offloading/static_linking.c
libomptarget/test/offloading/std_complex_arithmetic.cpp
libomptarget/test/offloading/target-teams-atomic.c
libomptarget/test/offloading/target_constexpr_mapping.cpp
libomptarget/test/offloading/target_critical_region.cpp
libomptarget/test/offloading/target_depend_nowait.cpp
libomptarget/test/offloading/target_nowait_target.cpp
libomptarget/test/offloading/taskloop_offload_nowait.cpp
libomptarget/test/offloading/test_libc.cpp
libomptarget/test/ompt/veccopy.c
libomptarget/test/ompt/veccopy_disallow_both.c
libomptarget/test/ompt/veccopy_emi.c
libomptarget/test/ompt/veccopy_emi_map.c
libomptarget/test/ompt/veccopy_map.c
libomptarget/test/ompt/veccopy_no_device_init.c
libomptarget/test/ompt/veccopy_wrong_return.c
libomptarget/test/api/is_initial_device.c
libomptarget/test/mapping/declare_mapper_nested_default_mappers_array_subscript.cpp
libomptarget/test/mapping/declare_mapper_nested_default_mappers_ptr_subscript.cpp
libomptarget/test/mapping/declare_mapper_nested_default_mappers_var.cpp
libomptarget/test/mapping/target_pointers_members_map.cpp
libomptarget/test/api/omp_dynamic_shared_memory_mixed.c
libomptarget/test/api/omp_env_vars.c
libomptarget/test/api/omp_get_mapped_ptr.c
libomptarget/test/api/omp_get_num_devices.c
libomptarget/test/api/omp_get_num_devices_with_empty_target.c
libomptarget/test/mapping/alloc_fail.c
libomptarget/test/mapping/array_section_use_device_ptr.c
libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
libomptarget/test/mapping/declare_mapper_target_update.cpp
libomptarget/test/mapping/delete_inf_refcount.c
libomptarget/test/mapping/lambda_by_value.cpp
libomptarget/test/mapping/ompx_hold/omp_target_disassociate_ptr.c
libomptarget/test/mapping/ompx_hold/struct.c
libomptarget/test/mapping/ompx_hold/target-data.c
libomptarget/test/mapping/ompx_hold/target.c
libomptarget/test/mapping/present/target.c
libomptarget/test/mapping/present/target_array_extension.c
libomptarget/test/mapping/present/target_data.c
libomptarget/test/mapping/present/target_data_array_extension.c
libomptarget/test/mapping/present/target_enter_data.c
libomptarget/test/mapping/present/target_exit_data_delete.c
libomptarget/test/mapping/present/target_exit_data_release.c
libomptarget/test/mapping/present/target_update.c
libomptarget/test/mapping/present/target_update_array_extension.c
libomptarget/test/mapping/present/zero_length_array_section.c
libomptarget/test/mapping/present/zero_length_array_section_exit.c
libomptarget/test/mapping/target_data_array_extension_at_exit.c
libomptarget/test/mapping/target_has_device_addr.c
libomptarget/test/mapping/target_implicit_partial_map.c
libomptarget/test/mapping/target_wrong_use_device_addr.c
libomptarget/test/offloading/host_as_target.c
libomptarget/test/offloading/info.c
libomptarget/test/offloading/offloading_success.c
libomptarget/test/offloading/offloading_success.cpp
libomptarget/test/offloading/wtime.c
libomptarget/test/unified_shared_memory/api.c
libomptarget/test/unified_shared_memory/associate_ptr.c
libomptarget/test/unified_shared_memory/close_enter_exit.c
libomptarget/test/unified_shared_memory/close_manual.c
libomptarget/test/unified_shared_memory/close_member.c
libomptarget/test/unified_shared_memory/close_modifier.c

View file

@ -1,11 +0,0 @@
./test/Target/LLVMIR/openmp-llvm.mlir
./test/mlir-spirv-cpu-runner/double.mlir
./test/mlir-spirv-cpu-runner/simple_add.mlir
./test/mlir-vulkan-runner/addf.mlir
./test/mlir-vulkan-runner/addi.mlir
./test/mlir-vulkan-runner/addi8.mlir
./test/mlir-vulkan-runner/mulf.mlir
./test/mlir-vulkan-runner/smul_extended.mlir
./test/mlir-vulkan-runner/subf.mlir
./test/mlir-vulkan-runner/time.mlir
./test/mlir-vulkan-runner/umul_extended.mlir

View file

@ -1,43 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang-unwrapped,
gtest,
}:
callPackage ../base.nix {
inherit stdenv rocmUpdateScript;
buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream<char>' and 'const llvm::StringRef')`
targetName = "clang-tools-extra";
targetProjects = [
"clang"
"clang-tools-extra"
];
extraBuildInputs = [ gtest ];
extraCMakeFlags = [
"-DLLVM_INCLUDE_DOCS=OFF"
"-DLLVM_INCLUDE_TESTS=OFF"
"-DCLANG_INCLUDE_DOCS=OFF"
"-DCLANG_INCLUDE_TESTS=ON"
"-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON"
];
extraPostInstall = ''
# Remove LLVM and Clang
for path in `find ${llvm} ${clang-unwrapped}`; do
if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then
rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true
fi
done
# Cleanup empty directories
find $out -type d -empty -delete
'';
requiredSystemFeatures = [ "big-parallel" ];
}

View file

@ -1,77 +0,0 @@
{
stdenv,
wrapCCWith,
llvm,
lld,
clang-unwrapped,
bintools,
libc,
libunwind,
libcxxabi,
libcxx,
compiler-rt,
}:
wrapCCWith rec {
inherit libcxx bintools;
# We do this to avoid HIP pathing problems, and mimic a monolithic install
cc = stdenv.mkDerivation (finalAttrs: {
inherit (clang-unwrapped) version;
pname = "rocm-llvm-clang";
dontUnpack = true;
installPhase = ''
runHook preInstall
clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share}
for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do
cp -as $path/* $out
chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}}
rm -f $out/lib/libc++.so
done
ln -s $out/lib/* $out/lib/clang/$clang_version/lib
ln -sf $out/include/* $out/lib/clang/$clang_version/include
runHook postInstall
'';
passthru.isClang = true;
passthru.isROCm = true;
});
gccForLibs = stdenv.cc.cc;
extraPackages = [
llvm
lld
libc
libunwind
libcxxabi
compiler-rt
];
nixSupport.cc-cflags = [
"-resource-dir=$out/resource-root"
"-fuse-ld=lld"
"-rtlib=compiler-rt"
"-unwindlib=libunwind"
"-Wno-unused-command-line-argument"
];
extraBuildCommands = ''
clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
mkdir -p $out/resource-root
ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root
# Not sure why, but hardening seems to make things break
echo "" > $out/nix-support/add-hardening.sh
# GPU compilation uses builtin `lld`
substituteInPlace $out/bin/{clang,clang++} \
--replace-fail "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;"
'';
}

View file

@ -1,32 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clang-unwrapped,
mlir,
graphviz,
python3Packages,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "flang";
targetDir = targetName;
extraNativeBuildInputs = [
graphviz
python3Packages.sphinx-markdown-tables
];
extraBuildInputs = [ mlir ];
extraCMakeFlags = [
"-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang"
"-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen"
"-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen"
"-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...`
];
# `flang/lib/Semantics/check-omp-structure.cpp:1905:1: error: no member named 'v' in 'Fortran::parser::OmpClause::OmpxDynCgroupMem'`
isBroken = true;
}

View file

@ -1,38 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang,
spirv-llvm-translator,
}:
let
spirv = (spirv-llvm-translator.override { inherit llvm; });
in
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
targetName = "libclc";
targetDir = targetName;
extraBuildInputs = [ spirv ];
# `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0
# Try removing the `spirv-mesa3d` and `clspv` patches next update
# `clspv` tests fail, unresolved calls
extraPostPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \
"find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \
--replace-fail "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \
"find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \
--replace-fail " spirv-mesa3d-" "" \
--replace-fail " spirv64-mesa3d-" "" \
--replace-fail "NOT \''${t} MATCHES" \
"NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES"
'';
checkTargets = [ ];
isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...?
}

View file

@ -1,40 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clang,
xz,
swig,
lua5_3,
graphviz,
gtest,
python3Packages,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely
targetName = "lldb";
targetDir = targetName;
extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ];
extraBuildInputs = [
xz
swig
lua5_3
graphviz
gtest
];
extraCMakeFlags = [
"-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version"
"-DLLDB_INCLUDE_TESTS=ON"
"-DLLDB_INCLUDE_UNITTESTS=ON"
];
extraPostPatch = ''
export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"`
'';
checkTargets = [ "check-${targetName}" ];
}

View file

@ -1,61 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
clr,
vulkan-headers,
vulkan-loader,
glslang,
shaderc,
fetchpatch,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No decent way to hack this to work
buildMan = false; # No man pages to build
targetName = "mlir";
targetDir = targetName;
# Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'`
extraPatches = [
(fetchpatch {
url = "https://github.com/ROCm/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch";
hash = "sha256-3c91A9InMKxm+JcnWxoUeOU68y5I6w1AAXx6T9UByqI=";
})
];
extraNativeBuildInputs = [ clr ];
extraBuildInputs = [
vulkan-headers
vulkan-loader
glslang
shaderc
];
extraCMakeFlags = [
"-DMLIR_INCLUDE_DOCS=ON"
"-DMLIR_INCLUDE_TESTS=ON"
"-DMLIR_ENABLE_ROCM_RUNNER=ON"
"-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON"
"-DMLIR_ENABLE_VULKAN_RUNNER=ON"
"-DROCM_TEST_CHIPSET=gfx000" # CPU runner
];
extraPostPatch = ''
# `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists`
substituteInPlace CMakeLists.txt \
--replace-fail "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE"
# Mainly `No such file or directory`
cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm
'';
extraPostInstall = ''
mkdir -p $out/bin
mv bin/mlir-tblgen $out/bin
'';
checkTargets = [ "check-${targetName}" ];
requiredSystemFeatures = [ "big-parallel" ];
}

View file

@ -1,55 +0,0 @@
{
lib,
stdenv,
callPackage,
rocmUpdateScript,
llvm,
clang,
clang-unwrapped,
rocm-device-libs,
rocm-runtime,
rocm-thunk,
perl,
elfutils,
libdrm,
numactl,
lit,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "openmp";
targetDir = targetName;
extraNativeBuildInputs = [ perl ];
extraBuildInputs = [
rocm-device-libs
rocm-runtime
rocm-thunk
elfutils
libdrm
numactl
];
extraCMakeFlags = [
"-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs
"-DCLANG_TOOL=${clang}/bin/clang"
"-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler"
"-DPACKAGER_TOOL=${clang-unwrapped}/bin/clang-offload-packager"
"-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin"
"-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped"
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
];
extraPostPatch = ''
# We can't build this target at the moment
substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \
--replace "gfx1010" ""
# No idea what's going on here...
cat ${./1000-openmp-failing-tests.list} | xargs -d \\n rm
'';
checkTargets = [ "check-${targetName}" ];
extraLicenses = [ lib.licenses.mit ];
}

View file

@ -1,19 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
targetName = "polly";
targetDir = targetName;
extraPostPatch = ''
# `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists`
substituteInPlace CMakeLists.txt \
--replace-fail "NOT TARGET gtest" "FALSE"
'';
checkTargets = [ "check-${targetName}" ];
}

View file

@ -1,16 +0,0 @@
{
stdenv,
callPackage,
rocmUpdateScript,
}:
callPackage ../base.nix rec {
inherit stdenv rocmUpdateScript;
buildDocs = false; # No documentation to build
buildMan = false; # No man pages to build
buildTests = false; # Too many errors
targetName = "pstl";
targetDir = "runtimes";
targetRuntimes = [ targetName ];
checkTargets = [ "check-${targetName}" ];
}

View file

@ -7,21 +7,21 @@
cmake,
rocm-cmake,
clr,
clang-tools-extra,
openmp,
rocblas,
hipblas-common,
hipblas,
hipblaslt,
rocmlir,
composable_kernel,
miopen,
protobuf,
abseil-cpp,
half,
nlohmann_json,
msgpack,
sqlite,
oneDNN_2,
blaze,
cppcheck,
rocm-device-libs,
texliveSmall,
doxygen,
sphinx,
@ -54,7 +54,7 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "migraphx";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -71,7 +71,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "AMDMIGraphX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-VDYUSpWYAdJ63SKVCO26DVAC3RtZM7otqN0sYUA6DBQ=";
hash = "sha256-h9cTbrMwHeRGVJS/uHQnCXplNcrBqxbhwz2AcAEso0M=";
};
nativeBuildInputs =
@ -80,7 +80,6 @@ stdenv.mkDerivation (finalAttrs: {
cmake
rocm-cmake
clr
clang-tools-extra
python3Packages.python
]
++ lib.optionals buildDocs [
@ -96,8 +95,10 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs = [
openmp
rocblas
hipblas-common
hipblas
hipblaslt
rocmlir
composable_kernel
miopen
protobuf
half
@ -106,16 +107,31 @@ stdenv.mkDerivation (finalAttrs: {
sqlite
oneDNN_2
blaze
cppcheck
python3Packages.pybind11
python3Packages.onnx
];
LDFLAGS = "-Wl,--allow-shlib-undefined";
cmakeFlags = [
"-DMIGRAPHX_ENABLE_GPU=ON"
"-DMIGRAPHX_ENABLE_CPU=ON"
"-DMIGRAPHX_ENABLE_FPGA=ON"
"-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch?
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DEMBED_USE=CArrays" # Fixes error with lld
"-DDMIGRAPHX_ENABLE_PYTHON=ON"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
# migraphx relies on an incompatible fork of composable_kernel
# migraphxs relies on miopen which relies on current composable_kernel
# impossible to build with this ON; we can't link both of them even if we package both
"-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
@ -126,20 +142,12 @@ stdenv.mkDerivation (finalAttrs: {
postPatch =
''
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
export CXXFLAGS+="--rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include"
patchShebangs tools
# `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]`
substituteInPlace CMakeLists.txt \
--replace "set(MIGRAPHX_TIDY_ERRORS ALL)" ""
# JIT library was removed from composable_kernel...
# https://github.com/ROCm/composable_kernel/issues/782
substituteInPlace src/targets/gpu/CMakeLists.txt \
--replace " COMPONENTS jit_library" "" \
--replace " composable_kernel::jit_library" "" \
--replace "if(WIN32)" "if(TRUE)"
''
+ lib.optionalString (!buildDocs) ''
substituteInPlace CMakeLists.txt \
@ -172,8 +180,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -182,6 +190,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = true;
};
})

View file

@ -10,9 +10,13 @@
rocm-cmake,
rocblas,
rocmlir,
rocrand,
rocm-runtime,
rocm-merged-llvm,
hipblas-common,
hipblas,
hipblaslt,
clr,
clang-tools-extra,
clang-ocl,
composable_kernel,
frugally-deep,
rocm-docs-core,
@ -30,43 +34,53 @@
rocm-comgr,
roctracer,
python3Packages,
# FIXME: should be able to use all clr targets
gpuTargets ? [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
], # clr.gpuTargets
buildDocs ? false, # Needs internet because of rocm-docs-core
buildTests ? false,
withComposableKernel ? composable_kernel.anyGfx9Target,
}:
let
version = "6.0.2";
# FIXME: cmake files need patched to include this properly
cFlags = "-O3 -DNDEBUG -Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIOpen";
rev = "rocm-${version}";
hash = "sha256-mbOdlSb0ESKi9hMkq3amv70Xkp/YKnZYre24d/y5TD0=";
hash = "sha256-rX+BE6wBDMnLyc6eai3bDVvmfahomDO0s10n6HhWu7c=";
fetchLFS = true;
fetchSubmodules = true;
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
leaveDotGit = true;
# If you're reading this, it's gonna take a bit of time.
# fetchSubModules doesn't work with postFetch???
# fetchLFS isn't actually fetching the LFS files...
postFetch = ''
export HOME=$(mktemp -d)
cd $out
# We need more history to fetch LFS files
set -x
git remote add origin $url
git fetch origin
git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
git clean -fdx
git checkout rocm-${version}
# We need to do this manually since using leaveDotGit and fetchSubmodules errors
git submodule update --init
# Fetch the LFS files
git switch -c rocm-${version} refs/tags/rocm-${version}
git config lfs.fetchexclude "none"
rm .lfsconfig
git lfs install
git lfs fetch --all
git lfs track "*.kdb.bz2"
GIT_TRACE=1 git lfs fetch --include="src/kernels/**"
GIT_TRACE=1 git lfs pull --include="src/kernels/**"
git lfs checkout
# Remove the defunct .git folder
rm -rf .git
'';
};
@ -112,8 +126,13 @@ stdenv.mkDerivation (finalAttrs: {
inherit version src;
pname = "miopen";
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
# Find zstd and add to target. Mainly for torch.
patches = [
./skip-preexisting-dbs.patch
./fix-isnan.patch # https://github.com/ROCm/MIOpen/pull/3448
(fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
@ -122,11 +141,14 @@ stdenv.mkDerivation (finalAttrs: {
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
})
(fetchpatch {
name = "Extend-MIOpen-ISA-compatibility.patch";
url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
})
# FIXME: We need to rebase or drop this arch compat patch
# https://github.com/ROCm/MIOpen/issues/3540 suggests that
# arch compat patching doesn't work correctly for gfx1031
# (fetchpatch {
# name = "Extend-MIOpen-ISA-compatibility.patch";
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
# })
];
outputs =
@ -139,21 +161,24 @@ stdenv.mkDerivation (finalAttrs: {
++ lib.optionals buildTests [
"test"
];
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
nativeBuildInputs = [
pkg-config
cmake
rocm-cmake
clr
clang-tools-extra
];
buildInputs =
[
hipblas
hipblas-common
rocblas
rocmlir
clang-ocl
composable_kernel
half
boost
sqlite
@ -161,6 +186,11 @@ stdenv.mkDerivation (finalAttrs: {
nlohmann_json
frugally-deep
roctracer
rocrand
hipblaslt
]
++ lib.optionals withComposableKernel [
composable_kernel
]
++ lib.optionals buildDocs [
latex
@ -178,15 +208,32 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_CXX_FLAGS=-Wno-#warnings" # <half> -> <half/half.hpp>
"-DUNZIPPER=${bzip2}/bin/bunzip2"
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
# needs to stream to stdout so bzcat rather than bunzip2
"-DUNZIPPER=${bzip2}/bin/bzcat"
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DMIOPEN_BACKEND=HIP"
]
++ lib.optionals buildTests [
@ -195,24 +242,26 @@ stdenv.mkDerivation (finalAttrs: {
];
postPatch = ''
substituteInPlace cmake/ClangTidy.cmake \
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
endmacro()
macro(enable_clang_tidy_unused)' \
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
return()'
patchShebangs test src/composable_kernel fin utils install_deps.cmake
substituteInPlace CMakeLists.txt \
--replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \
--replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \
--replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key'
substituteInPlace test/gtest/CMakeLists.txt \
--replace "include(googletest)" ""
substituteInPlace test/gtest/CMakeLists.txt \
--replace-fail " gtest_main " " ${gtest}/lib/libgtest.so ${gtest}/lib/libgtest_main.so "
ln -sf ${gfx900} src/kernels/gfx900.kdb
ln -sf ${gfx906} src/kernels/gfx906.kdb
ln -sf ${gfx908} src/kernels/gfx908.kdb
ln -sf ${gfx90a} src/kernels/gfx90a.kdb
ln -sf ${gfx1030} src/kernels/gfx1030.kdb
mkdir -p build/share/miopen/db/
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
'';
# Unfortunately, it seems like we have to call make on these manually
@ -254,8 +303,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -264,8 +313,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,31 @@
From 17f67e0aa31cd2f1c1cb012d3858abf6956acc72 Mon Sep 17 00:00:00 2001
From: "Sv. Lockal" <lockalsash@gmail.com>
Date: Tue, 24 Dec 2024 14:43:10 +0000
Subject: [PATCH] Fix missing isnan definition on libstdc++ >=14 systems
Closes #3441
---
driver/reducecalculation_driver.hpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp
index 8226b3c953..2001969509 100644
--- a/driver/reducecalculation_driver.hpp
+++ b/driver/reducecalculation_driver.hpp
@@ -33,6 +33,7 @@
#include "random.hpp"
#include <algorithm>
#include <cfloat>
+#include <cmath>
#include <cstdlib>
#include <memory>
#include <miopen/miopen.h>
@@ -77,7 +78,7 @@ int32_t mloReduceCalculationForwardRunHost(miopenTensorDescriptor_t inputDesc,
for(size_t i = 0; i < reduce_size; ++i)
{
Tcheck val = static_cast<Tcheck>(input[input_idx]);
- if(nanPropagation && isnan(val))
+ if(nanPropagation && std::isnan(val))
{
val = 0.0f;
}

View file

@ -0,0 +1,22 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d0ffaf983..0b9ed0952 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ endif()
function(unpack_db db_bzip2_file)
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
string(REPLACE "." "_" __tname ${__fname})
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
DEPENDS sqlite2txt generate_${__tname}
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
)
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
add_dependencies(generate_kernels generate_${__tname}_txt)

View file

@ -12,7 +12,6 @@
rocblas,
miopen,
migraphx,
clang,
openmp,
protobuf,
qtcreator,
@ -43,13 +42,13 @@ stdenv.mkDerivation (finalAttrs: {
"cpu"
);
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIVisionX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-d32lcJq24MXeIWbNbo6putWaol5kF2io6cz4ZuL+DbE=";
hash = "sha256-SisCbUDCAiWQ1Ue7qrtoT6vO/1ztzqji+3cJD6MXUNw=";
};
patches = [
@ -98,6 +97,9 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
]
++ lib.optionals (gpuTargets != [ ]) [
@ -115,37 +117,26 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = ''
# We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...`
export CXXFLAGS+="--rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
patchShebangs rocAL/rocAL_pybind/examples
# Properly find miopen
export CXXFLAGS+=" --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
# Properly find miopen, fix ffmpeg version detection
substituteInPlace amd_openvx_extensions/CMakeLists.txt \
--replace "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
--replace "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
--replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
--replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
# Properly find turbojpeg
substituteInPlace amd_openvx/cmake/FindTurboJpeg.cmake \
--replace "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
--replace "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
# Fix bad paths
substituteInPlace rocAL/rocAL/rocAL_hip/CMakeLists.txt amd_openvx_extensions/amd_nn/nn_hip/CMakeLists.txt amd_openvx/openvx/hipvx/CMakeLists.txt \
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clang}/bin/clang++"
substituteInPlace cmake/FindTurboJpeg.cmake \
--replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
--replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
'';
postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
'';
postInstall = lib.optionalString (!useOpenCL && !useCPU) ''
patchelf $out/lib/rocal_pybind*.so --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
chmod +x $out/lib/rocal_pybind*.so
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -154,8 +145,6 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
broken = useOpenCL;
};
})

View file

@ -0,0 +1,42 @@
{
fetchFromGitHub,
stdenv,
cmake,
clr,
numactl,
nlohmann_json,
}:
stdenv.mkDerivation {
pname = "mscclpp";
version = "unstable-2024-12-13";
src = fetchFromGitHub {
owner = "microsoft";
repo = "mscclpp";
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
clr
numactl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
'';
cmakeFlags = [
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
"-DMSCCLPP_USE_ROCM=ON"
"-DMSCCLPP_BUILD_TESTS=OFF"
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
"-DFETCHCONTENT_QUIET=OFF"
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
];
env.ROCM_PATH = clr;
}

View file

@ -6,18 +6,35 @@
cmake,
rocm-cmake,
rocm-smi,
rocm-core,
clr,
mscclpp,
perl,
hipify,
gtest,
chrpath,
rocprofiler,
rocprofiler-register,
autoPatchelfHook,
buildTests ? false,
gpuTargets ? [ ],
gpuTargets ? (clr.localGpuTargets or [ ]),
}:
let
useAsan = buildTests;
useUbsan = buildTests;
san = lib.optionalString (useAsan || useUbsan) (
"-fno-gpu-sanitize -fsanitize=undefined "
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
);
in
# Note: we can't properly test or make use of multi-node collective ops
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
stdenv.mkDerivation (finalAttrs: {
pname = "rccl";
version = "6.0.2";
pname = "rccl${clr.gpuArchSuffix}";
version = "6.3.3";
outputs =
[
@ -27,11 +44,17 @@ stdenv.mkDerivation (finalAttrs: {
"test"
];
patches = [
./fix-mainline-support-and-ub.diff
./enable-mscclpp-on-all-gfx9.diff
./rccl-test-missing-iomanip.diff
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rccl";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Oyml47yGEB7fALxBcDjqFngS38cnI39sDj94/JV7wE0=";
hash = "sha256-998tDiC0Qp9hhcXtFpiCWqwdKPVT2vNp0GU/rng03Bw=";
};
nativeBuildInputs = [
@ -40,12 +63,16 @@ stdenv.mkDerivation (finalAttrs: {
clr
perl
hipify
autoPatchelfHook # ASAN doesn't add rpath without this
];
buildInputs =
[
rocm-smi
gtest
rocprofiler
rocprofiler-register
mscclpp
]
++ lib.optionals buildTests [
chrpath
@ -53,8 +80,17 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DROCM_PATH=${clr}"
"-DHIP_COMPILER=${clr}/bin/amdclang++"
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
"-DENABLE_MSCCL_KERNEL=ON"
"-DENABLE_MSCCLPP=ON"
"-DMSCCLPP_ROOT=${mscclpp}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
@ -62,32 +98,37 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
# AMD can't make up their minds and keep changing which one is used in different projects.
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
];
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
# Reported upstream
env.CFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.CXXFLAGS = "-I${clr}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.LDFLAGS = "${san}";
postPatch = ''
patchShebangs src tools
# Really strange behavior, `#!/usr/bin/env perl` should work...
substituteInPlace CMakeLists.txt \
--replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" \
--replace-warn "-parallel-jobs=12" "-parallel-jobs=1" \
--replace-warn "-parallel-jobs=16" "-parallel-jobs=1"
'';
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rmdir $out/bin
'';
postInstall =
lib.optionalString useAsan ''
patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -99,8 +140,5 @@ stdenv.mkDerivation (finalAttrs: {
];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,13 @@
diff --git a/src/init.cc b/src/init.cc
index 738f756..1b0e4fc 100644
--- a/src/init.cc
+++ b/src/init.cc
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
hipDeviceProp_t devProp;
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
if (comm->mscclppCompatible) {
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];

View file

@ -0,0 +1,178 @@
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
index 8c5f081..9922b79 100644
--- a/src/include/bootstrap.h
+++ b/src/include/bootstrap.h
@@ -10,11 +10,13 @@
#include "nccl.h"
#include "comm.h"
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
struct ncclBootstrapHandle {
uint64_t magic;
union ncclSocketAddress addr;
};
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
ncclResult_t bootstrapNetInit();
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
index b3063d5..464b80d 100644
--- a/src/misc/rocmwrap.cc
+++ b/src/misc/rocmwrap.cc
@@ -131,9 +131,12 @@ static void initOnceFunc() {
//format and store the kernel conf file location
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
fp = fopen(kernel_conf_file, "r");
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
+ if (fp == NULL) {
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
+ }
//look for kernel_opt1 and kernel_opt2 in the conf file and check
- while (fgets(buf, sizeof(buf), fp) != NULL) {
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
if (strstr(buf, kernel_opt1) != NULL) {
found_opt1 = 1;
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
@@ -143,11 +146,12 @@ static void initOnceFunc() {
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
}
}
- if (!found_opt1 || !found_opt2) {
+ if (fp && (!found_opt1 || !found_opt2)) {
dmaBufSupport = 0;
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
}
+ if (fp) fclose(fp);
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
else goto error;
diff --git a/src/nccl.h.in b/src/nccl.h.in
index 1d127b0..6296073 100644
--- a/src/nccl.h.in
+++ b/src/nccl.h.in
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
/*! @brief Opaque unique id used to initialize communicators
@details The ncclUniqueId must be passed to all participating ranks */
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
/*! @defgroup rccl_result_code Result Codes
@details The various result codes that RCCL API calls may return
diff --git a/src/proxy.cc b/src/proxy.cc
index 50e5437..51bb401 100644
--- a/src/proxy.cc
+++ b/src/proxy.cc
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ if (pool->pools) {
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ } else {
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
+ }
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
pool->banks++;
pool->offset = 0;
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
index 6d77784..49762d3 100644
--- a/src/transport/net_ib.cc
+++ b/src/transport/net_ib.cc
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
// Requires support from NIC driver modules
// Use ONLY for debugging!
moduleLoaded = 1;
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
}
if (moduleLoaded == -1) {
@@ -586,13 +586,14 @@ ncclResult_t ncclIbGdrSupport() {
// or created under a different path like `/sys/kernel/` or `/sys/` (depending on your ib_peer_mem module)
const char* memory_peers_paths[] = {"/sys/kernel/mm/memory_peers/amdkfd/version",
"/sys/kernel/memory_peers/amdkfd/version",
- "/sys/memory_peers/amdkfd/version"};
+ "/sys/memory_peers/amdkfd/version",
+ NULL};
int i = 0;
while (memory_peers_paths[i]) {
if (access(memory_peers_paths[i], F_OK) == 0) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
break;
} else {
moduleLoaded = 0;
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
if (moduleLoaded == 0) {
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
// if your system uses native OS ib_peer module
- char buf[256];
- FILE *fp = NULL;
- fp = fopen("/proc/kallsyms", "r");
+ FILE *fp = fopen("/proc/kallsyms", "r");
+ char *line = NULL;
+ size_t len = 0;
if (fp == NULL) {
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
} else {
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
+ while (getline(&line, &len, fp) > 0) {
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
break;
}
}
}
+ if (line) free(line);
+ if (fp) fclose(fp);
}
#else
// Check for the nv_peer_mem module being loaded
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
#endif
}
if (moduleLoaded == 0) {
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
return ncclSystemError;
}
return ncclSuccess;
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
index 2c86c33..5801c61 100755
--- a/tools/ib-test/include/nccl.h
+++ b/tools/ib-test/include/nccl.h
@@ -31,7 +31,7 @@ extern "C" {
typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/* Error type */
typedef enum { ncclSuccess = 0,
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
index 729561b..4e4bdd9 100644
--- a/tools/topo_expl/include/nccl.h
+++ b/tools/topo_expl/include/nccl.h
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_COMM_NULL NULL
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/*! @brief Error type */
typedef enum { ncclSuccess = 0,

View file

@ -0,0 +1,10 @@
--- a/test/common/TestBed.cpp
+++ b/test/common/TestBed.cpp
@@ -4,6 +4,7 @@
* See LICENSE.txt for license information
************************************************************************/
#include <unistd.h>
+#include <iomanip>
#include "TestBed.hpp"
#include <rccl/rccl.h>

View file

@ -4,9 +4,11 @@
fetchFromGitHub,
rocmUpdateScript,
cmake,
amdsmi,
rocm-smi,
rocm-runtime,
libcap,
libdrm,
grpc,
protobuf,
openssl,
@ -46,7 +48,7 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "rdc";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -63,7 +65,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "rdc";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-QugcajxILmDeQiWG5uAUO41Wut45irg2Ynufgn1bmps=";
hash = "sha256-s/31b8/Kn5l1QJ941UMSB8SCzpvODsPfOLMmEBUYYmY=";
};
nativeBuildInputs =
@ -79,9 +81,11 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs =
[
amdsmi
rocm-smi
rocm-runtime
libcap
libdrm
grpc
openssl
]
@ -89,6 +93,8 @@ stdenv.mkDerivation (finalAttrs: {
gtest
];
CXXFLAGS = "-I${libcap.dev}/include";
cmakeFlags =
[
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
@ -126,8 +132,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -136,7 +142,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
# broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version || versionAtLeast finalAttrs.version "7.0.0";
broken = true; # Too many errors, unsure how to fix
};
})

View file

@ -11,6 +11,7 @@
rocrand,
clr,
git,
pkg-config,
openmp,
openmpi,
gtest,
@ -22,7 +23,7 @@
stdenv.mkDerivation (finalAttrs: {
pname = "rocalution";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -42,7 +43,7 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "rocALUTION";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-mrN+CI2mqaMi8oKxui7HAIE2qSn50aNaFipkWwYMtbc=";
hash = "sha256-xdZ3HUiRGsreHfJH8RgL/s3jGyC5ABmBKcEfgtqWg8Y=";
};
nativeBuildInputs = [
@ -50,6 +51,7 @@ stdenv.mkDerivation (finalAttrs: {
rocm-cmake
clr
git
pkg-config
];
buildInputs =
@ -65,9 +67,12 @@ stdenv.mkDerivation (finalAttrs: {
gtest
];
CXXFLAGS = "-I${openmp.dev}/include";
cmakeFlags =
[
"-DCMAKE_CXX_COMPILER=hipcc"
"-DOpenMP_C_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_CXX_INCLUDE_DIR=${openmp.dev}/include"
"-DOpenMP_omp_LIBRARY=${openmp}/lib"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
"-DSUPPORT_HIP=ON"
@ -82,6 +87,7 @@ stdenv.mkDerivation (finalAttrs: {
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
@ -115,8 +121,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -125,8 +131,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -14,21 +14,24 @@
gtest,
gfortran,
openmp,
git,
amd-blis,
zstd,
hipblas-common,
hipblaslt,
python3Packages,
rocm-smi,
buildTensile ? true,
buildTests ? false,
buildBenchmarks ? false,
tensileLogic ? "asm_full",
tensileCOVersion ? "default",
buildTests ? true,
buildBenchmarks ? true,
# https://github.com/ROCm/Tensile/issues/1757
# Allows gfx101* users to use rocBLAS normally.
# Turn the below two values to `true` after the fix has been cherry-picked
# into a release. Just backporting that single fix is not enough because it
# depends on some previous commits.
tensileSepArch ? false,
tensileLazyLib ? false,
tensileLibFormat ? "msgpack",
tensileSepArch ? true,
tensileLazyLib ? true,
withHipBlasLt ? true,
# `gfx940`, `gfx941` are not present in this list because they are early
# engineering samples, and all final MI300 hardware are `gfx942`:
# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
@ -37,38 +40,47 @@
# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
# always try to use `gfx1010` code objects, hence building for `gfx1012` is
# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
gpuTargets ? [
"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102"
],
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}:
let
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocblas";
version = "6.0.2";
pname = "rocblas${clr.gpuArchSuffix}";
version = "6.3.3";
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
outputs = [
"out"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk=";
hash = "sha256-IYcrVcGH4yZDkFZeNOJPfG0qsPS/WiH0fTSUSdo1BH4=";
};
nativeBuildInputs =
[
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
git
]
++ lib.optionals buildTensile [
tensile
@ -77,12 +89,17 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs =
[
python3
hipblas-common
]
++ lib.optionals withHipBlasLt [
hipblaslt
]
++ lib.optionals buildTensile [
zstd
msgpack
libxml2
python3Packages.msgpack
python3Packages.joblib
python3Packages.zstandard
]
++ lib.optionals buildTests [
gtest
@ -91,38 +108,61 @@ stdenv.mkDerivation (finalAttrs: {
gfortran
openmp
amd-blis
rocm-smi
]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml
];
dontStrip = true;
env.CXXFLAGS =
"-O3 -DNDEBUG -I${hipblas-common}/include"
+ lib.optionalString (buildTests || buildBenchmarks) " -I${amd-blis}/include/blis";
# Fails to link tests if we don't add amd-blis libs
env.LDFLAGS = lib.optionalString (
buildTests || buildBenchmarks
) "-Wl,--as-needed -L${amd-blis}/lib -lblis-mt -lcblas";
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
cmakeFlags =
[
(lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
(lib.cmakeFeature "CMAKE_BUILD_TYPE" "Release")
(lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3")
(lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets))
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
# rocblas header files are not installed unless we set this
(lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include")
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
]
++ lib.optionals buildTensile [
"-DCPACK_SET_DESTDIR=OFF"
"-DLINK_BLIS=ON"
"-DTensile_CODE_OBJECT_VERSION=default"
"-DTensile_LOGIC=asm_full"
"-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false)
(lib.cmakeFeature "Tensile_LOGIC" tensileLogic)
(lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
(lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat)
(lib.cmakeBool "Tensile_PRINT_DEBUG" true)
]
++ lib.optionals (buildTests || buildBenchmarks) [
(lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis")
];
passthru.amdgpu_targets = gpuTargets';
patches = [
(fetchpatch {
name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
@ -135,14 +175,17 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = ''
substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
substituteInPlace CMakeLists.txt \
--replace-fail "4.42.0" "4.43.0"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
@ -151,8 +194,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,66 +1,76 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
, git
, rocm-comgr
, rocm-runtime
, hwdata
, texliveSmall
, doxygen
, graphviz
, buildDocs ? true
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
git,
rocm-comgr,
rocm-runtime,
hwdata,
texliveSmall,
doxygen,
graphviz,
buildDocs ? true,
}:
let
latex = lib.optionalAttrs buildDocs (texliveSmall.withPackages (ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
]));
in stdenv.mkDerivation (finalAttrs: {
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocdbgapi";
version = "6.0.2";
version = "6.3.3";
outputs = [
"out"
] ++ lib.optionals buildDocs [
"doc"
];
outputs =
[
"out"
]
++ lib.optionals buildDocs [
"doc"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCdbgapi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-+CxaTmxRt/RicqQddqIEHs8vvAPCMKXkWg7kbZvnUsQ=";
hash = "sha256-6itfBrWVspobU47aiJAOQoxT8chwrq9scRn0or3bXto=";
};
nativeBuildInputs = [
cmake
rocm-cmake
git
] ++ lib.optionals buildDocs [
latex
doxygen
graphviz
];
nativeBuildInputs =
[
cmake
rocm-cmake
git
]
++ lib.optionals buildDocs [
latex
doxygen
graphviz
];
buildInputs = [
rocm-comgr
@ -83,21 +93,15 @@ in stdenv.mkDerivation (finalAttrs: {
make -j$NIX_BUILD_CORES doc
'';
postInstall = ''
substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-config.cmake \
--replace "/build/source/build/" ""
substituteInPlace $out/lib/cmake/amd-dbgapi/amd-dbgapi-targets.cmake \
--replace "/build/source/build" "$out"
'' + lib.optionalString buildDocs ''
postInstall = lib.optionalString buildDocs ''
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
rmdir $out/share/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -106,6 +110,5 @@ in stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,5 +1,4 @@
{
rocfft,
lib,
stdenv,
fetchFromGitHub,
@ -15,18 +14,18 @@
gtest,
openmp,
rocrand,
gpuTargets ? [ ],
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocfft";
version = "6.0.2";
pname = "rocfft${clr.gpuArchSuffix}";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-6Gjsy14GeR08VqnNmFhu8EyYDnQ+VZRlg+u9MAAWfHc=";
hash = "sha256-RrxdwZ64uC7lQzyJI1eGHX2dmRnW8TfNThnuvuz5XWo=";
};
nativeBuildInputs = [
@ -36,6 +35,8 @@ stdenv.mkDerivation (finalAttrs: {
rocm-cmake
];
# FIXME: rocfft_aot_helper runs at the end of the build and has a risk of timing it out
# due to a long period with no terminal output
buildInputs = [ sqlite ];
cmakeFlags =
@ -156,8 +157,8 @@ stdenv.mkDerivation (finalAttrs: {
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
@ -169,8 +170,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -9,11 +9,13 @@
flex,
glibc,
zlib,
zstd,
gmp,
mpfr,
ncurses,
expat,
rocdbgapi,
perl,
python3,
babeltrace,
sourceHighlight,
@ -21,13 +23,13 @@
stdenv.mkDerivation (finalAttrs: {
pname = "rocgdb";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCgdb";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-XeX/k8gfo9HgcUSIjs35C7IqCmFhvBOqQJSOoPF6HK4=";
hash = "sha256-Z+uk+ViLXgk5hXrIhVHRY0Kly7mktYms7M3o9Tmxv8s=";
};
nativeBuildInputs = [
@ -35,10 +37,13 @@ stdenv.mkDerivation (finalAttrs: {
texinfo # For makeinfo
bison
flex
perl # used in mkinstalldirs script during installPhase
python3
];
buildInputs = [
zlib
zstd
gmp
mpfr
ncurses
@ -56,14 +61,14 @@ stdenv.mkDerivation (finalAttrs: {
"--with-iconv-path=${glibc.bin}"
"--enable-tui"
"--with-babeltrace"
"--with-babeltrace=${babeltrace}"
"--with-python=python3"
"--with-system-zlib"
"--with-system-zstd"
"--enable-64-bit-bfd"
"--with-gmp=${gmp.dev}"
"--with-mpfr=${mpfr.dev}"
"--with-expat"
"--with-libexpat-prefix=${expat.dev}"
"--with-expat=${expat}"
# So the installed binary is called "rocgdb" instead on plain "gdb"
"--program-prefix=roc"
@ -74,25 +79,32 @@ stdenv.mkDerivation (finalAttrs: {
"--disable-ld"
"--disable-gas"
"--disable-gdbserver"
"--disable-sim"
"--disable-gdbtk"
"--disable-gprofng"
"--disable-shared"
];
postPatch = ''
for file in *; do
if [ -f "$file" ]; then
patchShebangs "$file"
fi
done
'';
# The source directory for ROCgdb (based on upstream GDB) contains multiple project
# of GNUs toolchain (binutils and onther), we only need to install the GDB part.
installPhase = ''
make install-gdb
'';
# `-Wno-format-nonliteral` doesn't work
env.NIX_CFLAGS_COMPILE = "-Wno-error=format-security";
env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include";
env.CXXFLAGS = finalAttrs.env.CFLAGS;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -101,6 +113,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.gpl3Plus;
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,27 +1,31 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
rocm-core,
cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-cmake";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-cmake";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-qSjWT0KOQ5oDV06tfnKN+H/JzdoOnR9KY0c+SjvDepM=";
hash = "sha256-U4vGkH2iUlNJkqiNmVuFianD4WR9yuGvZsYG58smg0k=";
};
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-core ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -30,6 +34,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.mit;
maintainers = teams.rocm.members;
platforms = platforms.unix;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,11 +1,13 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
fetchpatch,
cmake,
rocm-cmake,
python3,
rocm-merged-llvm,
rocm-device-libs,
zlib,
zstd,
libxml2,
}:
@ -20,34 +22,45 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-comgr";
version = "6.0.2";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCm-CompilerSupport";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-9HuNU/k+kPJMlzqOTM20gm6SAOWJe9tpAZXEj4erdmI=";
};
sourceRoot = "${finalAttrs.src.name}/amd/comgr";
sourceRoot = "${finalAttrs.src.name}/lib/comgr";
patches = [
# [Comgr] Extend ISA compatibility
(fetchpatch {
sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
relative = "amd/comgr";
})
#[Comgr] Extend ISA compatibility for CCOB
(fetchpatch {
sha256 = "sha256-6Rwz12Lk4R2JK3olii3cr2Zd0ZLYe7VSpK1YRCOsJWY=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/2d8c459a4d4c0567a7a275b4b54560d88e5c6919.patch";
relative = "amd/comgr";
})
];
nativeBuildInputs = [
cmake
rocm-cmake
python3
];
buildInputs = [
rocm-device-libs
libxml2
zlib
zstd
rocm-merged-llvm
];
cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;X86" ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
cmakeFlags = [
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "APIs for compiling and inspecting AMDGPU code objects";
@ -55,8 +68,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -4,28 +4,43 @@
fetchFromGitHub,
rocmUpdateScript,
cmake,
writeText,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-core";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-core";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-sgL1UMt3o01zA8v41dyCG1fAsK/PkTRsHQJOvlNatZ4=";
hash = "sha256-UDnPGvgwzwv49CzF+Kt0v95CsxS33BZeqNcKw1K6jRI=";
};
nativeBuildInputs = [ cmake ];
cmakeFlags = [ "-DROCM_VERSION=${finalAttrs.version}" ];
# FIXME: What's the correct way to set this?
env.ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}0${lib.versions.minor finalAttrs.version}0${lib.versions.patch finalAttrs.version}";
env.BUILD_ID = "nixos-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
env.ROCM_BUILD_ID = "release-${finalAttrs.env.BUILD_ID}";
cmakeFlags = [
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${finalAttrs.version}"
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
];
setupHook = writeText "setupHook.sh" ''
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
export BUILD_ID="${finalAttrs.env.BUILD_ID}"
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
'';
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
page = "tags?per_page=1";
filter = ".[0].name | split(\"-\") | .[1]";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
@ -34,8 +49,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,7 +1,7 @@
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
index 07c60eb..c736b3e 100644
--- a/cmake/Packages.cmake
+++ b/cmake/Packages.cmake
--- a/amd/device-libs/cmake/Packages.cmake
+++ b/amd/device-libs/cmake/Packages.cmake
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
IMPORTED_LOCATION \"${target_path}\")")
endforeach()

View file

@ -1,11 +1,14 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
ninja,
libxml2,
zlib,
zstd,
ncurses,
rocm-merged-llvm,
python3,
}:
let
@ -17,32 +20,36 @@ let
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation (finalAttrs: {
stdenv.mkDerivation {
pname = "rocm-device-libs";
version = "6.0.2";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCm-Device-Libs";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-7XG7oSkJ3EPWTYGea0I50eB1/DPMD5agmjctxZYTbLQ=";
};
postPatch = ''
cd amd/device-libs
'';
patches = [ ./cmake.patch ];
nativeBuildInputs = [
cmake
rocm-cmake
ninja
python3
];
buildInputs = [ libxml2 ];
cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" ];
buildInputs = [
libxml2
zlib
zstd
ncurses
rocm-merged-llvm
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
cmakeFlags = [
"-DCMAKE_RELEASE_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "Set of AMD-specific device-side language runtime libraries";
@ -50,8 +57,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})
}

View file

@ -1,36 +1,37 @@
{ lib
, stdenv
, fetchFromGitHub
, gitUpdater
, buildPythonPackage
, setuptools
, beautifulsoup4
, gitpython
, pydata-sphinx-theme
, pygithub
, sphinx
, breathe
, myst-parser
, sphinx-book-theme
, sphinx-copybutton
, sphinx-design
, sphinx-external-toc
, sphinx-notfound-page
, pyyaml
, fastjsonschema
{
lib,
fetchFromGitHub,
gitUpdater,
buildPythonPackage,
setuptools,
beautifulsoup4,
gitpython,
pydata-sphinx-theme,
pygithub,
sphinx,
breathe,
myst-nb,
myst-parser,
sphinx-book-theme,
sphinx-copybutton,
sphinx-design,
sphinx-external-toc,
sphinx-notfound-page,
pyyaml,
fastjsonschema,
}:
# FIXME: Move to rocmPackages_common
buildPythonPackage rec {
pname = "rocm-docs-core";
version = "1.12.0";
version = "1.17.0";
format = "pyproject";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-docs-core";
rev = "v${version}";
hash = "sha256-++Vi0jZLtHWsGy5IUohgF3P+Q6Jg/d0xWyDA6urbHUA=";
hash = "sha256-fGRJyQq0Eook1Dc9Qy+dehQ5BVNX+6pkkFN9adb21Eo=";
};
buildInputs = [ setuptools ];
@ -42,6 +43,7 @@ buildPythonPackage rec {
pygithub
sphinx
breathe
myst-nb
myst-parser
sphinx-book-theme
sphinx-copybutton
@ -59,7 +61,10 @@ buildPythonPackage rec {
meta = with lib; {
description = "ROCm Documentation Python package for ReadTheDocs build standardization";
homepage = "https://github.com/ROCm/rocm-docs-core";
license = with licenses; [ mit cc-by-40 ];
license = with licenses; [
mit
cc-by-40
];
maintainers = teams.rocm.members;
platforms = platforms.linux;
};

View file

@ -0,0 +1,27 @@
{
symlinkJoin,
linkFarm,
clr,
hipblas,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocm-device-libs,
rocm-smi,
llvm,
}:
symlinkJoin {
name = "rocm-path-${clr.version}";
paths = [
clr
hipblas-common
hipblas
rocblas
rocsolver
rocsparse
rocm-device-libs
rocm-smi
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
];
}

View file

@ -6,37 +6,40 @@
rocmUpdateScript,
pkg-config,
cmake,
ninja,
xxd,
rocm-device-libs,
rocm-thunk,
elfutils,
libdrm,
numactl,
valgrind,
libxml2,
rocm-merged-llvm,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-runtime";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCR-Runtime";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-xNMG954HI9SOfvYYB/62fhmm9mmR4I10uHP2nqn9EgI=";
hash = "sha256-du20+5VNYgwchGO7W7FIVebBqLPtfSBnmPVbPpgEZjo=";
};
sourceRoot = "${finalAttrs.src.name}/src";
env.CFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
env.CXXFLAGS = "-I${numactl.dev}/include -I${elfutils.dev}/include -w";
nativeBuildInputs = [
pkg-config
cmake
ninja
xxd
rocm-merged-llvm
];
buildInputs = [
rocm-thunk
elfutils
libdrm
numactl
@ -44,34 +47,56 @@ stdenv.mkDerivation (finalAttrs: {
libxml2
];
cmakeFlags = [
"-DBUILD_SHARED_LIBS=ON"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
patches = [
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
(fetchpatch {
name = "extend-isa-compatibility-check.patch";
url = "https://salsa.debian.org/rocm-team/rocr-runtime/-/raw/076026d43bbee7f816b81fea72f984213a9ff961/debian/patches/0004-extend-isa-compatibility-check.patch";
hash = "sha256-cC030zVGS4kNXwaztv5cwfXfVwOldpLGV9iYgEfPEnY=";
stripLen = 1;
# [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
})
(fetchpatch {
# [PATCH] rocr: refactor of runtime.cpp based on Coverity
url = "https://github.com/ROCm/ROCR-Runtime/commit/441bd9fe6c7bdb5c4c31f71524ed642786bc923e.patch";
hash = "sha256-7bQXxGkipzgT2aXRxCuh3Sfmo/zc/IOmA0x1zB+fMb0=";
})
(fetchpatch {
# [PATCH] queues: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
})
(fetchpatch {
# [PATCH] topology: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
})
(fetchpatch {
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
})
./remove-hsa-aqlprofile-dep.patch
];
postPatch = ''
patchShebangs image/blit_src/create_hsaco_ascii_file.sh
patchShebangs core/runtime/trap_handler/create_trap_handler_header.sh
patchShebangs core/runtime/blit_shaders/create_blit_shader_header.sh
patchShebangs --host image core runtime
substituteInPlace CMakeLists.txt \
--replace 'hsa/include/hsa' 'include/hsa'
# We compile clang before rocm-device-libs, so patch it in afterwards
# Replace object version: https://github.com/ROCm/ROCR-Runtime/issues/166 (TODO: Remove on LLVM update?)
substituteInPlace image/blit_src/CMakeLists.txt \
--replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \
--replace '-mcode-object-version=4' '-mcode-object-version=5'
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -80,8 +105,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ ncsa ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,27 @@
libhsa-amd-aqlprofile64 library is unfree
Bug: https://github.com/ROCm/ROCm/issues/1781
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
}
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
- os::CloseLib(lib);
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
- }
-
setFlag(HSA_EXTENSION_AMD_PROFILER);
break;
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
return HSA_STATUS_SUCCESS;
}
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
+ if (0) {
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);

View file

@ -1,20 +1,21 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, wrapPython
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
wrapPython,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-smi";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm_smi_lib";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-fS52hpTv1WEycwkGZLXjz383WJWzyk8RvJRshEQSG/A=";
hash = "sha256-j9pkyUt+p6IkhawIhiTymqDBydxXZunxmdyCyRN0RxE=";
};
patches = [ ./cmake.patch ];
@ -34,13 +35,14 @@ stdenv.mkDerivation (finalAttrs: {
postInstall = ''
wrapPythonProgramsIn $out
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -49,6 +51,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = [ "x86_64-linux" ];
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,32 @@
{
clr,
ollama,
python3Packages,
rocmPackages,
magma-hip,
emptyDirectory,
stdenv,
}:
# This package exists purely to have a bunch of passthru.tests attrs
stdenv.mkDerivation {
name = "rocm-tests";
nativeBuildInputs = [
clr
];
src = emptyDirectory;
postInstall = "mkdir -p $out";
passthru.tests = {
ollama = ollama.override {
inherit rocmPackages;
acceleration = "rocm";
};
torch = python3Packages.torch.override {
inherit rocmPackages;
rocmSupport = true;
cudaSupport = false;
magma-hip = magma-hip.override {
inherit rocmPackages;
};
};
};
}

View file

@ -1,54 +0,0 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, pkg-config
, cmake
, libdrm
, numactl
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-thunk";
version = "6.0.2";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCT-Thunk-Interface";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-F6Qi+A9DuSx2e4WSfp4cnniKr0CkCZcZqsKwQmmZHhk=";
};
nativeBuildInputs = [
pkg-config
cmake
];
buildInputs = [
libdrm
numactl
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
};
meta = with lib; {
description = "Radeon open compute thunk interface";
homepage = "https://github.com/ROCm/ROCT-Thunk-Interface";
license = with licenses; [ bsd2 mit ];
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -9,24 +9,25 @@
busybox,
python3,
gnugrep,
clr, # Only for localGpuTargets
# rocminfo requires that the calling user have a password and be in
# the video group. If we let rocm_agent_enumerator rely upon
# rocminfo's output, then it, too, has those requirements. Instead,
# we can specify the GPU targets for this system (e.g. "gfx803" for
# Polaris) such that no system call is needed for downstream
# compilers to determine the desired target.
defaultTargets ? [ ],
defaultTargets ? (clr.localGpuTargets or [ ]),
}:
stdenv.mkDerivation (finalAttrs: {
version = "6.0.2";
version = "6.3.3";
pname = "rocminfo";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocminfo";
rev = "rocm-${finalAttrs.version}";
sha256 = "sha256-k0QeCyQcarGbAh4ft8Y7JBK6l2nWxDUc20XoYmtrMMs=";
sha256 = "sha256-fQPtO5TNbCbaZZ7VtGkkqng5QZ+FcScdh1opWr5YkLU=";
};
nativeBuildInputs = [
@ -49,8 +50,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -59,9 +60,5 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members;
platforms = platforms.linux;
broken =
stdenv.hostPlatform.isAarch64
|| versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -2,12 +2,10 @@
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
rocm-cmake,
rocminfo,
ninja,
clr,
git,
libxml2,
@ -20,6 +18,13 @@
buildTests ? false, # `argument of type 'NoneType' is not iterable`
}:
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
# It seems to be forked from AMD's own LLVM
# If possible reusing the rocmPackages.llvm build would be better
# Would have to confirm it is compatible with ROCm's tagged LLVM.
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
# in subdirs
# Theoretically, we could have our MLIR have an output
# with the source and built objects so that we can just
# use it as the external LLVM repo for this
@ -36,7 +41,7 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocmlir${suffix}";
version = "6.0.2";
version = "6.3.3";
outputs =
[
@ -50,13 +55,12 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "rocMLIR";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-AypY0vL8Ij1zLycwpG2EPWWl4utp4ejXpAK0Jj/UvrA=";
hash = "sha256-0SQ6uLDRfVfdCX+8a7D6pu6dYlFvX0HFzCDEvlKYfak=";
};
nativeBuildInputs = [
cmake
rocm-cmake
ninja
clr
python3Packages.python
python3Packages.tomli
@ -75,23 +79,17 @@ stdenv.mkDerivation (finalAttrs: {
];
patches = [
(fetchpatch {
name = "fix-TosaToRock-missing-includes.patch";
url = "https://github.com/ROCm/rocMLIR/commit/80b8c94a5dd6ab832733116fe0339c1d6011ab57.patch";
hash = "sha256-przg1AQZTiVbVd/4wA+KlGXu/RISO5n11FBkmUFKRSA=";
})
(fetchpatch {
name = "fix-cmake-depedency-on-transforms.patch";
url = "https://github.com/ROCm/rocMLIR/commit/b85ca4855e0f0214c2fd695e493c884cf08a3472.patch";
hash = "sha256-m108PnwvDAN3xWko+gZMgvCNFl4LXTvC67JHXhFHeBc=";
})
./initparamdata-sort-const.patch
];
cmakeFlags =
[
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
"-DLLVM_ENABLE_ZSTD=ON"
"-DLLVM_ENABLE_ZLIB=ON"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_ENABLE_TERMINFO=ON"
"-DROCM_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
@ -99,9 +97,7 @@ stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals buildRockCompiler [
"-DBUILD_FAT_LIBROCKCOMPILER=ON"
(lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
]
++ lib.optionals (!buildRockCompiler) [
"-DROCM_TEST_CHIPSET=gfx000"
@ -111,6 +107,10 @@ stdenv.mkDerivation (finalAttrs: {
patchShebangs mlir
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
# remove when no longer required
substituteInPlace mlir/test/{e2e/generateE2ETest.py,fusion/e2e/generate-fusion-tests.py} \
--replace-fail "\"/opt/rocm/bin" "\"${rocminfo}/bin"
@ -150,10 +150,9 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
page = "tags?per_page=2";
filter = ".[1].name | split(\"-\") | .[1]";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
@ -162,8 +161,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ asl20 ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,13 @@
diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
index 3f5ee596819a..590d53788822 100644
--- a/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
+++ b/mlir/include/mlir/Dialect/Rock/Tuning/GridwiseGemmParams.h
@@ -209,7 +209,7 @@ private:
size_t original_pos;
int64_t padding_amount;
- bool operator<(const InitParamData &rhs) {
+ bool operator<(const InitParamData &rhs) const {
if (this->padding_amount < rhs.padding_amount) {
return true;
} else if (this->padding_amount == rhs.padding_amount) {

View file

@ -1,87 +1,91 @@
{ lib
, fetchpatch
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
, clr
, gtest
, gbenchmark
, buildTests ? false
, buildBenchmarks ? false
, gpuTargets ? [ ]
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprim";
version = "6.0.2";
version = "6.3.3";
outputs = [
"out"
] ++ lib.optionals buildTests [
"test"
] ++ lib.optionals buildBenchmarks [
"benchmark"
];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocPRIM";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-nWvq26qRPZ6Au1rc5cR74TKArcdUFg7O9djFi8SvMeM=";
hash = "sha256-0aHxpBuYIYhI2UER45YhHHL5YcxA+XeXoihcUs2AmCo=";
};
patches = [
(fetchpatch {
name = "arch-conversion-marco.patch";
url = "https://salsa.debian.org/rocm-team/rocprim/-/raw/70c8aaee3cf545d92685f4ed9bf8f41e3d4d570c/debian/patches/arch-conversion-macro.patch";
hash = "sha256-oXdmbCArOB5bKE8ozDFrSh4opbO+c4VI6PNhljeUSms=";
})
];
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs = lib.optionals buildTests [
gtest
] ++ lib.optionals buildBenchmarks [
gbenchmark
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DCMAKE_CXX_COMPILER=hipcc"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
] ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
] ++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
cmakeFlags =
[
"-DCMAKE_BUILD_TYPE=Release"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
mv $out/bin/rocprim $test/bin
'' + lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
'' + lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
mv $out/bin/rocprim $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -90,6 +94,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,79 @@
{
lib,
stdenv,
rocm-runtime,
rocprofiler,
numactl,
libpciaccess,
libxml2,
elfutils,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
clr,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler-register";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler-register";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-UZsCiGnudsbL1v5lKBx7Vz3/HRnGn4f86Pd+qu3ryh0=";
fetchSubmodules = true;
};
nativeBuildInputs = [
cmake
clang
clr
];
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
rocm-runtime
rocprofiler.rocmtoolkit-merged
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = "rocprofiler-register";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members;
platforms = platforms.linux;
};
})

View file

@ -1,109 +0,0 @@
diff --git a/bin/rocprofv2 b/bin/rocprofv2
index d0445e7..6079af8 100755
--- a/bin/rocprofv2
+++ b/bin/rocprofv2
@@ -7,7 +7,8 @@ if [ -n "${ROCP_PRELOAD}" ]; then LD_PRELOAD="${ROCP_PRELOAD}"; fi
CURRENT_DIR="$( dirname -- "$0"; )";
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
-ROCM_DIR=$( dirname -- "$ROCPROFV2_DIR"; )
+ROCPROFILER_DIR=$( dirname -- "$ROCPROFV2_DIR"; )
+ROCM_DIR=@rocmtoolkit_merged@
PLUGIN_LIST=("ctf" "perfetto" "file" "att" "cli")
RUN_FROM_BUILD=0
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
@@ -15,7 +16,7 @@ if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
ROCM_DIR=$ROCPROFV2_DIR
fi
-export ROCPROFILER_METRICS_PATH=$ROCM_DIR/libexec/rocprofiler/counters/derived_counters.xml
+export ROCPROFILER_METRICS_PATH=$ROCPROFILER_DIR/libexec/rocprofiler/counters/derived_counters.xml
export LD_LIBRARY_PATH=$ROCM_DIR/lib:$LD_LIBRARY_PATH
# Define color code
@@ -83,7 +84,7 @@ while [ 1 ]; do
exit 1
fi
elif [[ "$1" == "--list-counters" ]]; then
- export LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so
+ export LD_PRELOAD=$LD_PRELOAD:$ROC_DIR/lib/rocprofiler/librocprofiler_tool.so
eval $ROCM_DIR/libexec/rocprofiler/ctrl
exit 1
elif [[ "$1" == "-i" || "$1" == "--input" ]]; then
@@ -221,7 +222,7 @@ while [ 1 ]; do
if [ $RUN_FROM_BUILD == 1 ]; then
ATT_PATH=$ROCM_DIR/plugin/att/att/att.py
else
- ATT_PATH=$ROCM_DIR/libexec/rocprofiler/att/att.py
+ ATT_PATH=$ROCPROFILER_DIR/libexec/rocprofiler/att/att.py
export ROCPROFV2_ATT_LIB_PATH=$ROCM_DIR/lib/hsa-amd-aqlprofile/librocprofv2_att.so
fi
ATT_ARGV=$3
@@ -294,13 +295,13 @@ if [ -n "$PMC_LINES" ] && [ ! -n "$ATT_ARGV" ]; then
export OUTPUT_PATH=$FINAL_PATH
fi
let COUNTER=COUNTER+1
- LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $*
+ LD_PRELOAD=$LD_PRELOAD:$ROCPROFILER_DIR/lib/rocprofiler/librocprofiler_tool.so $*
if [ -n "$OUTPUT_PATH" ]; then
echo -e "\nThe output path for the following counters: $OUTPUT_PATH"
fi
done
else
- LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/rocprofiler/librocprofiler_tool.so $*
+ LD_PRELOAD=$LD_PRELOAD:$ROCPROFILER_DIR/lib/rocprofiler/librocprofiler_tool.so $*
fi
get_pmc_results_txt_path() {
diff --git a/bin/rpl_run.sh b/bin/rpl_run.sh
index 6b236ed..a9c233c 100755
--- a/bin/rpl_run.sh
+++ b/bin/rpl_run.sh
@@ -25,16 +25,17 @@
ROCPROF_ARGS="$*"
time_stamp=`date +%y%m%d_%H%M%S`
BIN_DIR=$(dirname $(realpath ${BASH_SOURCE[0]}))
-ROOT_DIR=$(dirname $BIN_DIR)
+ROCPROFILER_DIR=$(dirname $BIN_DIR)
+ROOT_DIR=@rocmtoolkit_merged@
RUN_DIR=`pwd`
TMP_DIR="/tmp"
DATA_DIR="rpl_data_${time_stamp}_$$"
-RPL_PATH=$ROOT_DIR/lib
+RPL_PATH=$ROCPROFILER_DIR/lib
TLIB_PATH=$RPL_PATH/rocprofiler
TTLIB_PATH=$ROOT_DIR/lib/roctracer
ROCM_LIB_PATH=$ROOT_DIR/lib
-PROF_BIN_DIR=$ROOT_DIR/libexec/rocprofiler
+PROF_BIN_DIR=$ROCPROFILER_DIR/libexec/rocprofiler
# check if rocprof is supportd on this gpu arch
V1_SUPPORTED_GPU_ARCHS=("gfx80x","gfx90x","gfx10xx","gfx94x")
@@ -80,7 +81,7 @@ unset ROCP_PROXY_QUEUE
# Disable AQL-profile read API
export AQLPROFILE_READ_API=0
# ROC Profiler package path
-export ROCP_PACKAGE_DIR=$ROOT_DIR
+export ROCP_PACKAGE_DIR=$ROCPROFILER_DIR
# enabled SPM KFD mode
export ROCP_SPM_KFD_MODE=1
@@ -400,7 +401,7 @@ unset_v1_envs() {
################################################################################################
# main
-echo "RPL: on '$time_stamp' from '$ROOT_DIR' in '$RUN_DIR'"
+echo "RPL: on '$time_stamp' from '$ROCPROFILER_DIR' in '$RUN_DIR'"
# Parsing arguments
if [ -z "$1" ] ; then
usage
@@ -633,7 +634,7 @@ elif [ "$input_type" = "txt" -o "$input_type" = "none" ] ; then
else
echo "<metric></metric>" > $RES_DIR/input.xml
fi
- input_list=`/bin/ls $RES_DIR/input*.xml`
+ input_list=`ls $RES_DIR/input*.xml`
export ROCPROFILER_SESS=$RES_DIR
else
fatal "Bad input file type '$INPUT_FILE'"

View file

@ -1,12 +0,0 @@
diff --git a/src/tools/versioning/version.cpp b/src/tools/versioning/version.cpp
index 11bdd00..339743c 100644
--- a/src/tools/versioning/version.cpp
+++ b/src/tools/versioning/version.cpp
@@ -1,6 +1,7 @@
#include <rocm-core/rocm_version.h>
#include <iostream>
#include <sstream>
+#include <stdint.h>
int main() {

View file

@ -4,17 +4,14 @@
fetchFromGitHub,
rocmUpdateScript,
symlinkJoin,
replaceVars,
cmake,
clang,
clr,
rocm-core,
rocm-thunk,
rocm-runtime,
rocm-device-libs,
roctracer,
rocdbgapi,
rocm-smi,
hsa-amd-aqlprofile-bin,
numactl,
libpciaccess,
libxml2,
@ -22,6 +19,7 @@
mpi,
systemd,
gtest,
git,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
@ -32,12 +30,10 @@ let
paths = [
rocm-core
rocm-thunk
rocm-runtime
rocm-device-libs
roctracer
rocdbgapi
rocm-smi
hsa-amd-aqlprofile-bin
clr
];
@ -48,32 +44,27 @@ let
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-yzgw9g5cHAZpdbU44+1ScZyUcZ2I4GGfjbm9GSqCClk=";
hash = "sha256-x6DVt1logBE8aNnuwukQhsv/vRqkJALcfAF+6yEQuIk=";
fetchSubmodules = true;
};
patches = [
# These just simply won't build
./0000-dont-install-tests-hsaco.patch
# Fix bad paths
(replaceVars ./0001-fix-shell-scripts.patch {
rocmtoolkit_merged = rocmtoolkit-merged;
})
# Fix for missing uint32_t not defined
./0002-include-stdint-in-version.patch
./optional-aql-in-cmake.patch
];
nativeBuildInputs = [
cmake
clang
clr
git
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
@ -93,12 +84,20 @@ stdenv.mkDerivation (finalAttrs: {
propagatedBuildInputs = [ rocmtoolkit-merged ];
# HACK: allow building without aqlprofile, probably explodes at runtime if use profiling
env.LDFLAGS = "-z nodefs -Wl,-undefined,dynamic_lookup,--unresolved-symbols=ignore-all";
#HACK: rocprofiler's cmake doesn't add these deps properly
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
@ -107,6 +106,13 @@ stdenv.mkDerivation (finalAttrs: {
postPatch = ''
patchShebangs .
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
return()'
substituteInPlace CMakeLists.txt \
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
@ -115,20 +121,16 @@ stdenv.mkDerivation (finalAttrs: {
'';
postInstall = ''
# Why do these not already have the executable bit set?
chmod +x $out/lib/rocprofiler/librocprof-tool.so
chmod +x $out/share/rocprofiler/tests-v1/test/ocl/SimpleConvolution
# Why do these have the executable bit set?
chmod -x $out/libexec/rocprofiler/counters/basic_counters.xml
chmod -x $out/libexec/rocprofiler/counters/derived_counters.xml
chmod -x $out/libexec/rocprofiler/counters/*.xml
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
@ -136,8 +138,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ]; # mitx11
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor clr.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -0,0 +1,147 @@
From https://raw.githubusercontent.com/AphidGit/rocm_compile/refs/heads/main/rocprofiler.patch
diff --git a/cmake_modules/rocprofiler_env.cmake b/cmake_modules/rocprofiler_env.cmake
index 7b7c472..0aba3ed 100644
--- a/cmake_modules/rocprofiler_env.cmake
+++ b/cmake_modules/rocprofiler_env.cmake
@@ -36,6 +36,7 @@ if(ROCPROFILER_DEBUG_TRACE)
target_compile_definitions(rocprofiler-build-flags INTERFACE DEBUG_TRACE=1)
endif()
+set(ROCPROFILER_LD_AQLPROFILE false)
# Enable direct loading of AQL-profile HSA extension
if(ROCPROFILER_LD_AQLPROFILE)
target_compile_definitions(rocprofiler-build-flags INTERFACE ROCP_LD_AQLPROFILE=1)
@@ -80,9 +81,3 @@ if("${ROCM_ROOT_DIR}" STREQUAL "")
message(FATAL_ERROR "ROCM_ROOT_DIR is not found.")
endif()
-find_library(
- HSA_AMD_AQLPROFILE_LIBRARY
- NAMES hsa-amd-aqlprofile64
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_ROOT_DIR}
- PATH_SUFFIXES lib REQUIRED)
diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt
index 61782f0..16c83bf 100644
--- a/src/api/CMakeLists.txt
+++ b/src/api/CMakeLists.txt
@@ -51,15 +51,6 @@ find_file(
NO_DEFAULT_PATH REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
-find_library(
- AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so"
- HINTS ${CMAKE_PREFIX_PATH}
- PATHS ${ROCM_PATH}
- PATH_SUFFIXES lib)
-
-if(NOT AQLPROFILE_LIB)
- message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
-endif()
# ########################################################################################
# Adding Old Library Files
@@ -247,7 +238,7 @@ target_include_directories(
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/rocprofiler>
PRIVATE ${LIB_DIR} ${ROOT_DIR} ${PROJECT_SOURCE_DIR}/include/rocprofiler)
target_link_libraries(
- ${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 c stdc++
+ ${ROCPROFILER_TARGET} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++
dl rocprofiler::build-flags rocprofiler::memcheck)
get_target_property(ROCPROFILER_LIBRARY_V1_NAME ${ROCPROFILER_TARGET} NAME)
@@ -325,8 +316,7 @@ target_link_options(
-Wl,--no-undefined)
target_link_libraries(
rocprofiler-v2
- PRIVATE ${AQLPROFILE_LIB}
- hsa-runtime64::hsa-runtime64
+ PRIVATE hsa-runtime64::hsa-runtime64
Threads::Threads
atomic
numa
diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp
index 2c47186..6b39634 100644
--- a/src/util/hsa_rsrc_factory.cpp
+++ b/src/util/hsa_rsrc_factory.cpp
@@ -155,17 +155,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (kern_arg_pool_ == nullptr)
CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp
index 0a44d18..fab5b75 100644
--- a/test/util/hsa_rsrc_factory.cpp
+++ b/test/util/hsa_rsrc_factory.cpp
@@ -137,17 +137,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
- // Get AqlProfile API table
- aqlprofile_api_ = {0};
-#ifdef ROCP_LD_AQLPROFILE
- status = LoadAqlProfileLib(&aqlprofile_api_);
-#else
- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
- hsa_ven_amd_aqlprofile_VERSION_MAJOR,
- sizeof(aqlprofile_api_), &aqlprofile_api_);
-#endif
- CHECK_STATUS("aqlprofile API table load failed", status);
-
// Get Loader API table
loader_api_ = {0};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
diff --git a/tests-v2/unittests/core/CMakeLists.txt b/tests-v2/unittests/core/CMakeLists.txt
index 107cb51..0f6d4bf 100644
--- a/tests-v2/unittests/core/CMakeLists.txt
+++ b/tests-v2/unittests/core/CMakeLists.txt
@@ -235,8 +235,7 @@ set_target_properties(runCoreUnitTests PROPERTIES
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests")
target_link_libraries(
runCoreUnitTests
- PRIVATE ${AQLPROFILE_LIB}
- test_hsatool_library
+ PRIVATE test_hsatool_library
hsa-runtime64::hsa-runtime64
Threads::Threads
GTest::gtest GTest::gtest_main
@@ -285,4 +284,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()
diff --git a/tests-v2/unittests/profiler/CMakeLists.txt b/tests-v2/unittests/profiler/CMakeLists.txt
index 53180d5..0c4d4a7 100644
--- a/tests-v2/unittests/profiler/CMakeLists.txt
+++ b/tests-v2/unittests/profiler/CMakeLists.txt
@@ -122,7 +122,7 @@ target_compile_definitions(
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
- runUnitTests PRIVATE rocprofiler-v2 ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
+ runUnitTests PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} dw elf c dl)
add_dependencies(tests runUnitTests)
@@ -158,4 +158,4 @@ endif()
# for the *_FilePlugin tests
if(NOT EXISTS "${PROJECT_BINARY_DIR}/test-output")
file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/test-output")
-endif()
\ No newline at end of file
+endif()

View file

@ -12,13 +12,13 @@
stdenv.mkDerivation (finalAttrs: {
pname = "rocr-debug-agent";
version = "6.0.2";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocr_debug_agent";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-8Q800T7mwBy8/rujVNyCQ0ZpZ9uPKKk+Sv9ibpWou/8=";
hash = "sha256-HYag5/E72hopDhS9EVcdyGgSvzbCMzKqLC+SIS28Y9M=";
};
nativeBuildInputs = [
@ -45,8 +45,8 @@ stdenv.mkDerivation (finalAttrs: {
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -55,8 +55,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ ncsa ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -1,34 +1,38 @@
{ lib
, stdenv
, fetchFromGitHub
, rocmUpdateScript
, cmake
, rocm-cmake
, clr
, gtest
, gbenchmark
, buildTests ? false
, buildBenchmarks ? false
, gpuTargets ? [ ]
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? clr.localGpuTargets or [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocrand";
version = "6.0.2";
pname = "rocrand${clr.gpuArchSuffix}";
version = "6.3.3";
outputs = [
"out"
] ++ lib.optionals buildTests [
"test"
] ++ lib.optionals buildBenchmarks [
"benchmark"
];
outputs =
[
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-BBkcYOP+zh3OQTxuSkeiJizwnE9Gr5Jbhx0e8SU/mmU=";
hash = "sha256-rrRLPqEw39M+6dtPW8DcnQiSZNwxWNINJ1wjU098Vkk=";
};
nativeBuildInputs = [
@ -37,45 +41,52 @@ stdenv.mkDerivation (finalAttrs: {
clr
];
buildInputs = lib.optionals buildTests [
gtest
] ++ lib.optionals buildBenchmarks [
gbenchmark
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
] ++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
] ++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
] ++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
cmakeFlags =
[
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
'' + lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
'' + lib.optionalString (buildTests || buildBenchmarks) ''
rm -r $out/bin/rocRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rm -r $out/bin/rocRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
@ -84,6 +95,5 @@ stdenv.mkDerivation (finalAttrs: {
license = with licenses; [ mit ];
maintainers = teams.rocm.members;
platforms = platforms.linux;
broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0";
};
})

View file

@ -6,6 +6,7 @@
cmake,
rocm-cmake,
rocblas,
rocprim,
rocsparse,
clr,
fmt,
@ -14,12 +15,25 @@
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900" "gfx906:xnack-" ]
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocsolver";
version = "6.0.2";
pname = "rocsolver${clr.gpuArchSuffix}";
version = "6.3.3";
outputs =
[
@ -36,12 +50,13 @@ stdenv.mkDerivation (finalAttrs: {
owner = "ROCm";
repo = "rocSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-tglQpwCSFABRuEDiJrzQVFIdx9p85E2MiUYN0aoTAXo=";
hash = "sha256-+sGU+0CB48iolJSyYo+xH36q5LCUp+nKtOYbguzMuhg=";
};
nativeBuildInputs =
[
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
]
@ -51,7 +66,11 @@ stdenv.mkDerivation (finalAttrs: {
buildInputs =
[
# FIXME: rocblas and rocsolver can't build in parallel
# but rocsolver doesn't need rocblas' offload builds at build time
# could we build against a rocblas-minimal?
rocblas
rocprim
rocsparse
fmt
]
@ -64,8 +83,9 @@ stdenv.mkDerivation (finalAttrs: {
cmakeFlags =
[
"-DCMAKE_CXX_COMPILER=hipcc"
"-DCMAKE_CXX_FLAGS=-Wno-switch" # Way too many warnings
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
@ -96,9 +116,8 @@ stdenv.mkDerivation (finalAttrs: {
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
owner = finalAttrs.src.owner;
repo = finalAttrs.src.repo;
name = "rocsolver";
inherit (finalAttrs.src) owner repo;
};
requiredSystemFeatures = [ "big-parallel" ];
@ -111,8 +130,5 @@ stdenv.mkDerivation (finalAttrs: {
platforms = platforms.linux;
timeout = 14400; # 4 hours
maxSilent = 14400; # 4 hours
broken =
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|| versionAtLeast finalAttrs.version "7.0.0";
};
})

Some files were not shown because too many files have changed in this diff Show more