nixpkgs/pkgs/development/python-modules/llama-cpp-python/default.nix

{
  lib,
  stdenv,
  gcc13Stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  # nativeBuildInputs
  cmake,
  ninja,

  # build-system
  pathspec,
  pyproject-metadata,
  scikit-build-core,

  # dependencies
  diskcache,
  jinja2,
  numpy,
  typing-extensions,

  # tests
  scipy,
  huggingface-hub,

  # passthru
  gitUpdater,
  pytestCheckHook,
  llama-cpp-python,

  config,
  cudaSupport ? config.cudaSupport,
  cudaPackages ? { },

}:
let
  stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
in
buildPythonPackage rec {
  pname = "llama-cpp-python";
  version = "0.3.9";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "abetlen";
    repo = "llama-cpp-python";
    tag = "v${version}";
    hash = "sha256-iw9teWZ612gUNM2Zm5WGdFTq7aNo8QRRIGeHoFpXdfQ=";
    fetchSubmodules = true;
  };
  # src = /home/gaetan/llama-cpp-python;

  dontUseCmakeConfigure = true;
  SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
    # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
    # -march=native* which is either a no-op (if cc-wrapper is able to ignore
    # it), or an attempt to build a non-reproducible binary.
    #
    # This issue was spotted when cmake rules appended feature modifiers to
    # -mcpu, breaking linux build as follows:
    #
    # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
    [
      "-DGGML_NATIVE=off"
      "-DGGML_BUILD_NUMBER=1"
    ]
    ++ lib.optionals cudaSupport [
      "-DGGML_CUDA=on"
      "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
      "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
    ]
  );

  enableParallelBuilding = true;

  nativeBuildInputs = [
    cmake
    ninja
  ];

  build-system = [
    pathspec
    pyproject-metadata
    scikit-build-core
  ];

  buildInputs = lib.optionals cudaSupport (
    with cudaPackages;
    [
      cuda_cudart # cuda_runtime.h
      cuda_cccl # <thrust/*>
      libcublas # cublas_v2.h
    ]
  );

  stdenv = stdenvTarget;

  dependencies = [
    diskcache
    jinja2
    numpy
    typing-extensions
  ];

  nativeCheckInputs = [
    pytestCheckHook
    scipy
    huggingface-hub
  ];

  disabledTests = [
    # tries to download model from huggingface-hub
    "test_real_model"
    "test_real_llama"
  ];

  pythonImportsCheck = [ "llama_cpp" ];

  passthru = {
    updateScript = gitUpdater { rev-prefix = "v"; };
    tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
      withCuda = llama-cpp-python.override {
        cudaSupport = true;
      };
    };
  };

  meta = {
    description = "Python bindings for llama.cpp";
    homepage = "https://github.com/abetlen/llama-cpp-python";
    changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [
      booxter
      kirillrdy
    ];
  };
}
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								{
 								  lib,
-												python312Packages.llama-cpp-python: use stdenv from cudaPackages

											
										
										
											2024-10-27 04:45:16 +11:00
+								  stdenv,
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								  gcc13Stdenv,
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  buildPythonPackage,
 								  fetchFromGitHub,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # nativeBuildInputs
 								  cmake,
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  ninja,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # build-system
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  pathspec,
 								  pyproject-metadata,
 								  scikit-build-core,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  # dependencies
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  diskcache,
 								  jinja2,
 								  numpy,
 								  typing-extensions,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # tests
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  scipy,
 								  huggingface-hub,
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
 								  # passthru
 								  gitUpdater,
 								  pytestCheckHook,
 								  llama-cpp-python,
 								  config,
 								  cudaSupport ? config.cudaSupport,
 								  cudaPackages ? { },
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								}:
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								let
 								  stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
 								in
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								buildPythonPackage rec {
-												python312Packages.llama-cpp-python: use stdenv from cudaPackages

											
										
										
											2024-10-27 04:45:16 +11:00
+								  pname = "llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.8 -> 0.3.9

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>

											
										
										
											2025-05-10 12:05:41 -04:00
+								  version = "0.3.9";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  pyproject = true;
 								  src = fetchFromGitHub {
 								    owner = "abetlen";
 								    repo = "llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								    tag = "v${version}";
-												python312Packages.llama-cpp-python: 0.3.8 -> 0.3.9

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>

											
										
										
											2025-05-10 12:05:41 -04:00
+								    hash = "sha256-iw9teWZ612gUNM2Zm5WGdFTq7aNo8QRRIGeHoFpXdfQ=";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    fetchSubmodules = true;
 								  };
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  # src = /home/gaetan/llama-cpp-python;
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
 								  dontUseCmakeConfigure = true;
 								  SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
-												python312Packages.llama-cpp-python: fix linux build

Their cmake files were attempting to build with -mcpu=native+no<...>
which fails for gcc. Actually, we should never build for native anyway
(which is the raison d'etre for NIX_ENFORCE_NO_NATIVE).

Thankfully, the package provides a variable to disable native
optimizations.

											
										
										
											2025-01-27 23:38:40 -05:00
+								    # Set GGML_NATIVE=off. Otherwise, cmake attempts to build with
 								    # -march=native* which is either a no-op (if cc-wrapper is able to ignore
 								    # it), or an attempt to build a non-reproducible binary.
 								    #
 								    # This issue was spotted when cmake rules appended feature modifiers to
 								    # -mcpu, breaking linux build as follows:
 								    #
 								    # cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
-												python3Packages.llama-cpp-python: 0.3.6 -> 0.3.8

diff: https://github.com/abetlen/llama-cpp-python/compare/v0.3.6...v0.3.8

											
										
										
											2025-03-21 15:51:40 +11:00
+								    [
 								      "-DGGML_NATIVE=off"
 								      "-DGGML_BUILD_NUMBER=1"
 								    ]
-												python312Packages.llama-cpp-python: fix linux build

Their cmake files were attempting to build with -mcpu=native+no<...>
which fails for gcc. Actually, we should never build for native anyway
(which is the raison d'etre for NIX_ENFORCE_NO_NATIVE).

Thankfully, the package provides a variable to disable native
optimizations.

											
										
										
											2025-01-27 23:38:40 -05:00
+								    ++ lib.optionals cudaSupport [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								      "-DGGML_CUDA=on"
 								      "-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
 								      "-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
 								    ]
 								  );
-												python312Packages.llama-cpp-python: enable parallel building

											
										
										
											2025-02-24 07:11:12 +11:00
+								  enableParallelBuilding = true;
-												python312Packages.llama-cpp-python: respect NIX_BUILD_CORES

											
										
										
											2025-01-26 22:46:44 +00:00
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  nativeBuildInputs = [
 								    cmake
 								    ninja
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  ];
 								  build-system = [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    pathspec
 								    pyproject-metadata
 								    scikit-build-core
 								  ];
-												treewide: unpin apple-sdk_11

											
										
										
											2025-01-02 19:20:02 -06:00
+								  buildInputs = lib.optionals cudaSupport (
 								    with cudaPackages;
 								    [
 								      cuda_cudart # cuda_runtime.h
 								      cuda_cccl # <thrust/*>
 								      libcublas # cublas_v2.h
 								    ]
 								  );
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								  stdenv = stdenvTarget;
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  dependencies = [
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    diskcache
 								    jinja2
 								    numpy
 								    typing-extensions
 								  ];
 								  nativeCheckInputs = [
 								    pytestCheckHook
 								    scipy
 								    huggingface-hub
 								  ];
 								  disabledTests = [
 								    # tries to download model from huggingface-hub
 								    "test_real_model"
 								    "test_real_llama"
 								  ];
 								  pythonImportsCheck = [ "llama_cpp" ];
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  passthru = {
 								    updateScript = gitUpdater { rev-prefix = "v"; };
-												python312Packages.llama-cpp-python: enable for Darwin

For Darwin, there's no CUDA, hence run CUDA tests only for Linux.

Also, if CUDA is used, we need gcc13 or lower, otherwise the build fails
with:

unsupported GNU version! gcc versions later than 13 are not supported!

This change also updates passthru tests so that they are defined on
linux only, since darwin has no CUDA anyway.

											
										
										
											2025-01-17 21:25:14 -05:00
+								    tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
 								      withCuda = llama-cpp-python.override {
 								        cudaSupport = true;
 								      };
 								    };
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								  };
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
 								  meta = {
 								    description = "Python bindings for llama.cpp";
 								    homepage = "https://github.com/abetlen/llama-cpp-python";
-												python312Packages.llama-cpp-python: 0.3.1 -> 0.3.2

Diff: https://github.com/abetlen/llama-cpp-python/compare/refs/tags/v0.3.1...v0.3.2

											
										
										
											2024-12-08 10:46:12 +01:00
+								    changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								    license = lib.licenses.mit;
-												python312Packages.llama-cpp-python: adding new maintainer (@booxter)

											
										
										
											2025-01-28 23:36:24 -05:00
+								    maintainers = with lib.maintainers; [
 								      booxter
 								      kirillrdy
 								    ];
-												python312Packages.llama-cpp-python: init at 0.3.1

											
										
										
											2024-10-19 10:10:06 +11:00
+								  };
 								}