mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-06-12 04:35:41 +03:00
56 lines
1.3 KiB
Nix
56 lines
1.3 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
setuptools,
|
|
pymupdf,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "pymupdf4llm";
|
|
version = "0.0.17";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "pymupdf";
|
|
repo = "RAG";
|
|
tag = "v${version}";
|
|
hash = "sha256-+RLK+UorkU8eVQJGrc7pVNZPtIpxMgA9mBKA6GeWUa0=";
|
|
};
|
|
|
|
sourceRoot = "${src.name}/pymupdf4llm";
|
|
|
|
build-system = [ setuptools ];
|
|
|
|
dependencies = [ pymupdf ];
|
|
|
|
checkPhase = ''
|
|
runHook preCheck
|
|
|
|
python3 - <<'EOF'
|
|
import fitz
|
|
import pymupdf4llm
|
|
|
|
doc = fitz.open()
|
|
page = doc.new_page()
|
|
page.insert_text((72, 72), "Hello, Nix!")
|
|
doc.save("input.pdf")
|
|
|
|
md = pymupdf4llm.to_markdown("input.pdf")
|
|
assert isinstance(md, str), "Returned value is not a string"
|
|
assert "Hello, Nix!" in md, "Returned value does not contain the expected text"
|
|
EOF
|
|
|
|
runHook postCheck
|
|
'';
|
|
|
|
pythonImportsCheck = [ "pymupdf4llm" ];
|
|
|
|
meta = {
|
|
description = "PyMuPDF Utilities for LLM/RAG - converts PDF pages to Markdown format for Retrieval-Augmented Generation";
|
|
homepage = "https://github.com/pymupdf/RAG";
|
|
changelog = "https://github.com/pymupdf/RAG/blob/${src.tag}/CHANGES.md";
|
|
license = lib.licenses.agpl3Only;
|
|
maintainers = with lib.maintainers; [ ryota2357 ];
|
|
};
|
|
}
|