2024-10-19 10:10:06 +11:00
|
|
|
|
{
|
|
|
|
|
lib,
|
2024-10-27 04:45:16 +11:00
|
|
|
|
stdenv,
|
2025-01-17 21:25:14 -05:00
|
|
|
|
gcc13Stdenv,
|
2024-10-19 10:10:06 +11:00
|
|
|
|
buildPythonPackage,
|
|
|
|
|
fetchFromGitHub,
|
2025-01-26 22:45:56 +00:00
|
|
|
|
fetchpatch2,
|
2024-12-08 10:46:12 +01:00
|
|
|
|
|
|
|
|
|
# nativeBuildInputs
|
|
|
|
|
cmake,
|
2024-10-19 10:10:06 +11:00
|
|
|
|
ninja,
|
2024-12-08 10:46:12 +01:00
|
|
|
|
|
|
|
|
|
# build-system
|
2024-10-19 10:10:06 +11:00
|
|
|
|
pathspec,
|
|
|
|
|
pyproject-metadata,
|
|
|
|
|
scikit-build-core,
|
|
|
|
|
|
2024-12-08 10:46:12 +01:00
|
|
|
|
# dependencies
|
2024-10-19 10:10:06 +11:00
|
|
|
|
diskcache,
|
|
|
|
|
jinja2,
|
|
|
|
|
numpy,
|
|
|
|
|
typing-extensions,
|
2024-12-08 10:46:12 +01:00
|
|
|
|
|
|
|
|
|
# tests
|
2024-10-19 10:10:06 +11:00
|
|
|
|
scipy,
|
|
|
|
|
huggingface-hub,
|
2024-12-08 10:46:12 +01:00
|
|
|
|
|
|
|
|
|
# passthru
|
|
|
|
|
gitUpdater,
|
|
|
|
|
pytestCheckHook,
|
|
|
|
|
llama-cpp-python,
|
|
|
|
|
|
|
|
|
|
config,
|
|
|
|
|
cudaSupport ? config.cudaSupport,
|
|
|
|
|
cudaPackages ? { },
|
|
|
|
|
|
2024-10-19 10:10:06 +11:00
|
|
|
|
}:
|
2025-01-17 21:25:14 -05:00
|
|
|
|
let
|
|
|
|
|
stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
|
|
|
|
|
in
|
2024-12-08 10:46:12 +01:00
|
|
|
|
buildPythonPackage rec {
|
2024-10-27 04:45:16 +11:00
|
|
|
|
pname = "llama-cpp-python";
|
2025-01-14 08:44:09 +00:00
|
|
|
|
version = "0.3.6";
|
2024-10-19 10:10:06 +11:00
|
|
|
|
pyproject = true;
|
|
|
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
|
owner = "abetlen";
|
|
|
|
|
repo = "llama-cpp-python";
|
2024-12-08 10:46:12 +01:00
|
|
|
|
tag = "v${version}";
|
2025-01-14 08:44:09 +00:00
|
|
|
|
hash = "sha256-d5nMgpS7m6WEILs222ztwphoqkAezJ+qt6sVKSlpIYI=";
|
2024-10-19 10:10:06 +11:00
|
|
|
|
fetchSubmodules = true;
|
|
|
|
|
};
|
2024-12-08 10:46:12 +01:00
|
|
|
|
# src = /home/gaetan/llama-cpp-python;
|
2024-10-19 10:10:06 +11:00
|
|
|
|
|
2025-01-26 22:45:56 +00:00
|
|
|
|
patches = [
|
|
|
|
|
# fix segfault when running tests due to missing default Metal devices
|
|
|
|
|
(fetchpatch2 {
|
|
|
|
|
url = "https://github.com/ggerganov/llama.cpp/commit/acd38efee316f3a5ed2e6afcbc5814807c347053.patch?full_index=1";
|
|
|
|
|
stripLen = 1;
|
|
|
|
|
extraPrefix = "vendor/llama.cpp/";
|
|
|
|
|
hash = "sha256-71+Lpg9z5KPlaQTX9D85KS2LXFWLQNJJ18TJyyq3/pU=";
|
|
|
|
|
})
|
|
|
|
|
];
|
|
|
|
|
|
2024-10-19 10:10:06 +11:00
|
|
|
|
dontUseCmakeConfigure = true;
|
|
|
|
|
SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
|
2025-01-27 23:38:40 -05:00
|
|
|
|
# Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
|
|
|
|
|
# -march=native* which is either a no-op (if cc-wrapper is able to ignore
|
|
|
|
|
# it), or an attempt to build a non-reproducible binary.
|
|
|
|
|
#
|
|
|
|
|
# This issue was spotted when cmake rules appended feature modifiers to
|
|
|
|
|
# -mcpu, breaking linux build as follows:
|
|
|
|
|
#
|
|
|
|
|
# cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
|
|
|
|
|
[ "-DGGML_NATIVE=off" ]
|
|
|
|
|
++ lib.optionals cudaSupport [
|
2024-10-19 10:10:06 +11:00
|
|
|
|
"-DGGML_CUDA=on"
|
|
|
|
|
"-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
|
|
|
|
|
"-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
|
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
|
2025-01-26 22:46:44 +00:00
|
|
|
|
preBuild = ''
|
|
|
|
|
export CMAKE_BUILD_PARALLEL_LEVEL="$NIX_BUILD_CORES"
|
|
|
|
|
'';
|
|
|
|
|
|
2024-10-19 10:10:06 +11:00
|
|
|
|
nativeBuildInputs = [
|
|
|
|
|
cmake
|
|
|
|
|
ninja
|
2024-12-08 10:46:12 +01:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
build-system = [
|
2024-10-19 10:10:06 +11:00
|
|
|
|
pathspec
|
|
|
|
|
pyproject-metadata
|
|
|
|
|
scikit-build-core
|
|
|
|
|
];
|
|
|
|
|
|
2025-01-02 19:20:02 -06:00
|
|
|
|
buildInputs = lib.optionals cudaSupport (
|
|
|
|
|
with cudaPackages;
|
|
|
|
|
[
|
|
|
|
|
cuda_cudart # cuda_runtime.h
|
|
|
|
|
cuda_cccl # <thrust/*>
|
|
|
|
|
libcublas # cublas_v2.h
|
|
|
|
|
]
|
|
|
|
|
);
|
2024-10-19 10:10:06 +11:00
|
|
|
|
|
2025-01-17 21:25:14 -05:00
|
|
|
|
stdenv = stdenvTarget;
|
|
|
|
|
|
2024-12-08 10:46:12 +01:00
|
|
|
|
dependencies = [
|
2024-10-19 10:10:06 +11:00
|
|
|
|
diskcache
|
|
|
|
|
jinja2
|
|
|
|
|
numpy
|
|
|
|
|
typing-extensions
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
nativeCheckInputs = [
|
|
|
|
|
pytestCheckHook
|
|
|
|
|
scipy
|
|
|
|
|
huggingface-hub
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
disabledTests = [
|
|
|
|
|
# tries to download model from huggingface-hub
|
|
|
|
|
"test_real_model"
|
|
|
|
|
"test_real_llama"
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
pythonImportsCheck = [ "llama_cpp" ];
|
|
|
|
|
|
2024-12-08 10:46:12 +01:00
|
|
|
|
passthru = {
|
|
|
|
|
updateScript = gitUpdater { rev-prefix = "v"; };
|
2025-01-17 21:25:14 -05:00
|
|
|
|
tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
|
|
|
|
|
withCuda = llama-cpp-python.override {
|
|
|
|
|
cudaSupport = true;
|
|
|
|
|
};
|
|
|
|
|
};
|
2024-12-08 10:46:12 +01:00
|
|
|
|
};
|
2024-10-19 10:10:06 +11:00
|
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
|
description = "Python bindings for llama.cpp";
|
|
|
|
|
homepage = "https://github.com/abetlen/llama-cpp-python";
|
2024-12-08 10:46:12 +01:00
|
|
|
|
changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
|
2024-10-19 10:10:06 +11:00
|
|
|
|
license = lib.licenses.mit;
|
|
|
|
|
maintainers = with lib.maintainers; [ kirillrdy ];
|
|
|
|
|
};
|
|
|
|
|
}
|