nixpkgs/pkgs/development/python-modules/llama-cpp-python/default.nix

{
  lib,
  stdenv,
  gcc13Stdenv,
  buildPythonPackage,
  fetchFromGitHub,
  fetchpatch2,

  # nativeBuildInputs
  cmake,
  ninja,

  # build-system
  pathspec,
  pyproject-metadata,
  scikit-build-core,

  # dependencies
  diskcache,
  jinja2,
  numpy,
  typing-extensions,

  # tests
  scipy,
  huggingface-hub,

  # passthru
  gitUpdater,
  pytestCheckHook,
  llama-cpp-python,

  config,
  cudaSupport ? config.cudaSupport,
  cudaPackages ? { },

}:
let
  stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
in
buildPythonPackage rec {
  pname = "llama-cpp-python";
  version = "0.3.6";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "abetlen";
    repo = "llama-cpp-python";
    tag = "v${version}";
    hash = "sha256-d5nMgpS7m6WEILs222ztwphoqkAezJ+qt6sVKSlpIYI=";
    fetchSubmodules = true;
  };
  # src = /home/gaetan/llama-cpp-python;

  patches = [
    # fix segfault when running tests due to missing default Metal devices
    (fetchpatch2 {
      url = "https://github.com/ggerganov/llama.cpp/commit/acd38efee316f3a5ed2e6afcbc5814807c347053.patch?full_index=1";
      stripLen = 1;
      extraPrefix = "vendor/llama.cpp/";
      hash = "sha256-71+Lpg9z5KPlaQTX9D85KS2LXFWLQNJJ18TJyyq3/pU=";
    })
  ];

  dontUseCmakeConfigure = true;
  SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
    # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
    # -march=native* which is either a no-op (if cc-wrapper is able to ignore
    # it), or an attempt to build a non-reproducible binary.
    #
    # This issue was spotted when cmake rules appended feature modifiers to
    # -mcpu, breaking linux build as follows:
    #
    # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
    [ "-DGGML_NATIVE=off" ]
    ++ lib.optionals cudaSupport [
      "-DGGML_CUDA=on"
      "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
      "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
    ]
  );

  preBuild = ''
    export CMAKE_BUILD_PARALLEL_LEVEL="$NIX_BUILD_CORES"
  '';

  nativeBuildInputs = [
    cmake
    ninja
  ];

  build-system = [
    pathspec
    pyproject-metadata
    scikit-build-core
  ];

  buildInputs = lib.optionals cudaSupport (
    with cudaPackages;
    [
      cuda_cudart # cuda_runtime.h
      cuda_cccl # <thrust/*>
      libcublas # cublas_v2.h
    ]
  );

  stdenv = stdenvTarget;

  dependencies = [
    diskcache
    jinja2
    numpy
    typing-extensions
  ];

  nativeCheckInputs = [
    pytestCheckHook
    scipy
    huggingface-hub
  ];

  disabledTests = [
    # tries to download model from huggingface-hub
    "test_real_model"
    "test_real_llama"
  ];

  pythonImportsCheck = [ "llama_cpp" ];

  passthru = {
    updateScript = gitUpdater { rev-prefix = "v"; };
    tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
      withCuda = llama-cpp-python.override {
        cudaSupport = true;
      };
    };
  };

  meta = {
    description = "Python bindings for llama.cpp";
    homepage = "https://github.com/abetlen/llama-cpp-python";
    changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ kirillrdy ];
  };
}
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								{
 								  lib,
-												python312Packages.llama-cpp-python: use stdenv from cudaPackages

											
										
										
											2024-10-27 04:45:16 +11:00
+								  stdenv,
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								  gcc13Stdenv,
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  buildPythonPackage,
 								  fetchFromGitHub,
-												python312Packages.llama-cpp-python: fix darwin build w/o Metal devices

											
										
										
											2025-01-26 22:45:56 +00:00
+								  fetchpatch2,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # nativeBuildInputs
 								  cmake,
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  ninja,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # build-system
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  pathspec,
 								  pyproject-metadata,
 								  scikit-build-core,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  # dependencies
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  diskcache,
 								  jinja2,
 								  numpy,
 								  typing-extensions,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # tests
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  scipy,
 								  huggingface-hub,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # passthru
 								  gitUpdater,
 								  pytestCheckHook,
 								  llama-cpp-python,
 								  config,
 								  cudaSupport ? config.cudaSupport,
 								  cudaPackages ? { },
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								}:
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								let
 								  stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
 								in
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								buildPythonPackage rec {
-												python312Packages.llama-cpp-python: use stdenv from cudaPackages

											
										
										
											2024-10-27 04:45:16 +11:00
+								  pname = "llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.5 -> 0.3.6

											
										
										
											2025-01-14 08:44:09 +00:00
+								  version = "0.3.6";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  pyproject = true;
 								  src = fetchFromGitHub {
 								    owner = "abetlen";
 								    repo = "llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								    tag = "v${version}";
-												python312Packages.llama-cpp-python: 0.3.5 -> 0.3.6

											
										
										
											2025-01-14 08:44:09 +00:00
+								    hash = "sha256-d5nMgpS7m6WEILs222ztwphoqkAezJ+qt6sVKSlpIYI=";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    fetchSubmodules = true;
 								  };
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  # src = /home/gaetan/llama-cpp-python;
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
-												python312Packages.llama-cpp-python: fix darwin build w/o Metal devices

											
										
										
											2025-01-26 22:45:56 +00:00
+								  patches = [
 								    # fix segfault when running tests due to missing default Metal devices
 								    (fetchpatch2 {
 								      url = "https://github.com/ggerganov/llama.cpp/commit/acd38efee316f3a5ed2e6afcbc5814807c347053.patch?full_index=1";
 								      stripLen = 1;
 								      extraPrefix = "vendor/llama.cpp/";
 								      hash = "sha256-71+Lpg9z5KPlaQTX9D85KS2LXFWLQNJJ18TJyyq3/pU=";
 								    })
 								  ];
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  dontUseCmakeConfigure = true;
 								  SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
-												python312Packages.llama-cpp-python: fix linux build

Their cmake files were attempting to build with -mcpu=native+no<...>
which fails for gcc. Actually, we should never build for native anyway
(which is the raison d'etre for NIX_ENFORCE_NO_NATIVE).

Thankfully, the package provides a variable to disable native
optimizations.

											
										
										
											2025-01-27 23:38:40 -05:00
+								    # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
 								    # -march=native* which is either a no-op (if cc-wrapper is able to ignore
 								    # it), or an attempt to build a non-reproducible binary.
 								    #
 								    # This issue was spotted when cmake rules appended feature modifiers to
 								    # -mcpu, breaking linux build as follows:
 								    #
 								    # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
 								    [ "-DGGML_NATIVE=off" ]
 								    ++ lib.optionals cudaSupport [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								      "-DGGML_CUDA=on"
 								      "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
 								      "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
 								    ]
 								  );
-												python312Packages.llama-cpp-python: respect NIX_BUILD_CORES

											
										
										
											2025-01-26 22:46:44 +00:00
+								  preBuild = ''
 								    export CMAKE_BUILD_PARALLEL_LEVEL="$NIX_BUILD_CORES"
 								  '';
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  nativeBuildInputs = [
 								    cmake
 								    ninja
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  ];
 								  build-system = [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    pathspec
 								    pyproject-metadata
 								    scikit-build-core
 								  ];
-												treewide: unpin apple-sdk_11

											
										
										
											2025-01-02 19:20:02 -06:00
+								  buildInputs = lib.optionals cudaSupport (
 								    with cudaPackages;
 								    [
 								      cuda_cudart # cuda_runtime.h
 								      cuda_cccl # <thrust/*>
 								      libcublas # cublas_v2.h
 								    ]
 								  );
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								  stdenv = stdenvTarget;
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  dependencies = [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    diskcache
 								    jinja2
 								    numpy
 								    typing-extensions
 								  ];
 								  nativeCheckInputs = [
 								    pytestCheckHook
 								    scipy
 								    huggingface-hub
 								  ];
 								  disabledTests = [
 								    # tries to download model from huggingface-hub
 								    "test_real_model"
 								    "test_real_llama"
 								  ];
 								  pythonImportsCheck = [ "llama_cpp" ];
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  passthru = {
 								    updateScript = gitUpdater { rev-prefix = "v"; };
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								    tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
 								      withCuda = llama-cpp-python.override {
 								        cudaSupport = true;
 								      };
 								    };
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  };
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
 								  meta = {
 								    description = "Python bindings for llama.cpp";
 								    homepage = "https://github.com/abetlen/llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								    changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    license = lib.licenses.mit;
 								    maintainers = with lib.maintainers; [ kirillrdy ];
 								  };
 								}