From c309f53ac9dc2330241a2f643f0be225d542d0e3 Mon Sep 17 00:00:00 2001 From: Max Beutelspacher Date: Tue, 25 Feb 2025 21:16:32 +0100 Subject: [PATCH 1/5] feat: add the option to store sha256 cache in a file cache is also used for per package source checkouts and - is used as a key --- README.md | 7 ++++++- ros2nix/ros2nix.py | 33 +++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 5ec9406..bddd2cd 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,8 @@ usage: ros2nix [-h] [--output-dir OUTPUT_DIR] [--fetch] [--use-per-package-src] [--patches | --no-patches] [--distro DISTRO] [--src-param SRC_PARAM] [--source-root SOURCE_ROOT] - [--do-check] [--extra-build-inputs DEP1,DEP2,...] + [--cache-file CACHE_FILE] [--do-check] + [--extra-build-inputs DEP1,DEP2,...] [--extra-propagated-build-inputs DEP1,DEP2,...] [--extra-check-inputs DEP1,DEP2,...] [--extra-native-build-inputs DEP1,DEP2,...] [--flake] @@ -172,6 +173,10 @@ options: Set sourceRoot attribute value in the generated Nix expression. Substring '{package_name}' gets replaced with the package name. (default: None) + --cache-file CACHE_FILE + Path to a json-file to store sha265 hashes of + checkouts persistently to cache them across generation + runs. (default: None) --do-check Set doCheck attribute to true (default: False) --extra-build-inputs DEP1,DEP2,... Additional dependencies to add to the generated Nix diff --git a/ros2nix/ros2nix.py b/ros2nix/ros2nix.py index 8bf2190..64a757e 100755 --- a/ros2nix/ros2nix.py +++ b/ros2nix/ros2nix.py @@ -14,6 +14,7 @@ import os import re import subprocess import sys +import json from contextlib import contextmanager from textwrap import dedent, indent from typing import Iterable, Set, List @@ -280,6 +281,9 @@ def ros2nix(args): help="Set sourceRoot attribute value in the generated Nix expression. " "Substring '{package_name}' gets replaced with the package name.", ) + parser.add_argument( + "--cache-file", help="Path to a json-file to store sha265 hashes of checkouts persistently to cache them across generation runs." + ) parser.add_argument( "--do-check", action="store_true", @@ -357,6 +361,9 @@ def ros2nix(args): expressions: dict[str, str] = {} git_cache = {} + if args.cache_file is not None and os.path.exists(args.cache_file): + with open(args.cache_file) as f: + git_cache = json.load(f) patch_filenames = set() for source in args.source: @@ -418,15 +425,21 @@ def ros2nix(args): merge_base = merge_base_to_upstream(head) head = check_output(f"git rev-list {merge_base} -1 -- .".split()) - if not args.use_per_package_src and toplevel in git_cache: # only use cache if not using separate checkout per package - info = git_cache[toplevel] + def cache_key(prefix, rev): + if args.use_per_package_src: + return f"{prefix}-{rev}" + return rev + + + # Latest commit present in the upstream repo. If + # the local repository doesn't have additional + # commits, it is the same as HEAD. Should work + # even with detached HEAD. + upstream_rev = merge_base_to_upstream(head) + if cache_key(prefix, upstream_rev) in git_cache: + info = git_cache[cache_key(prefix, upstream_rev)] upstream_rev = info["rev"] else: - # Latest commit present in the upstream repo. If - # the local repository doesn't have additional - # commits, it is the same as HEAD. Should work - # even with detached HEAD. - upstream_rev = merge_base_to_upstream(head) info = json.loads( subprocess.check_output( ["nix-prefetch-git", "--quiet"] @@ -438,7 +451,7 @@ def ros2nix(args): + [toplevel, upstream_rev], ).decode() ) - git_cache[toplevel] = info + git_cache[cache_key(prefix, upstream_rev)] = {k : info[k] for k in ["rev", "sha256"]} match = re.match("https://github.com/(?P[^/]*)/(?P.*?)(.git|/.*)?$", url) sparse_checkout = f"""sparseCheckout = ["{prefix}"]; @@ -568,6 +581,10 @@ def ros2nix(args): generate_default(args) # TODO generate also release.nix (for testing/CI)? + if args.cache_file is not None: + with open(args.cache_file, "w") as f: + json.dump(git_cache, f) + if args.compare and compare_failed: err("Some files are not up-to-date") return 2 From c619c573f2db7f8a9f5f7b4fbbecb9fc808f5fcb Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 18 Apr 2025 11:41:38 +0200 Subject: [PATCH 2/5] Use git cache by default --- README.md | 9 +++------ ros2nix/ros2nix.py | 31 +++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index bddd2cd..b05a4c6 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,7 @@ usage: ros2nix [-h] [--output-dir OUTPUT_DIR] [--fetch] [--use-per-package-src] [--patches | --no-patches] [--distro DISTRO] [--src-param SRC_PARAM] [--source-root SOURCE_ROOT] - [--cache-file CACHE_FILE] [--do-check] - [--extra-build-inputs DEP1,DEP2,...] + [--no-cache] [--do-check] [--extra-build-inputs DEP1,DEP2,...] [--extra-propagated-build-inputs DEP1,DEP2,...] [--extra-check-inputs DEP1,DEP2,...] [--extra-native-build-inputs DEP1,DEP2,...] [--flake] @@ -173,10 +172,8 @@ options: Set sourceRoot attribute value in the generated Nix expression. Substring '{package_name}' gets replaced with the package name. (default: None) - --cache-file CACHE_FILE - Path to a json-file to store sha265 hashes of - checkouts persistently to cache them across generation - runs. (default: None) + --no-cache Don't use cache of git checkout sha265 hashes across + generation runs. (default: False) --do-check Set doCheck attribute to true (default: False) --extra-build-inputs DEP1,DEP2,... Additional dependencies to add to the generated Nix diff --git a/ros2nix/ros2nix.py b/ros2nix/ros2nix.py index 64a757e..157ef24 100755 --- a/ros2nix/ros2nix.py +++ b/ros2nix/ros2nix.py @@ -14,8 +14,8 @@ import os import re import subprocess import sys -import json from contextlib import contextmanager +from pathlib import Path from textwrap import dedent, indent from typing import Iterable, Set, List @@ -27,6 +27,23 @@ from superflore.utils import err, ok, resolve_dep, warn from .nix_expression import NixExpression, NixLicense +# Copied from https://github.com/srstevenson/xdg-base-dirs +# Copyright © Scott Stevenson +# Less than 10 lines, no need to mention full ISC license here. +def _path_from_env(variable: str, default: Path) -> Path: + if (value := os.environ.get(variable)) and (path := Path(value)).is_absolute(): + return path + return default + + +def xdg_cache_home() -> Path: + """Return a Path corresponding to XDG_CACHE_HOME.""" + return _path_from_env("XDG_CACHE_HOME", Path.home() / ".cache") + + +cache_file = xdg_cache_home() / "ros2nix" / "git-cache.json" + + def resolve_dependencies(deps: Iterable[str]) -> Set[str]: return set(itertools.chain.from_iterable(map(resolve_dependency, deps))) @@ -282,7 +299,9 @@ def ros2nix(args): "Substring '{package_name}' gets replaced with the package name.", ) parser.add_argument( - "--cache-file", help="Path to a json-file to store sha265 hashes of checkouts persistently to cache them across generation runs." + "--no-cache", + action="store_true", + help="Don't use cache of git checkout sha265 hashes across generation runs.", ) parser.add_argument( "--do-check", @@ -361,8 +380,8 @@ def ros2nix(args): expressions: dict[str, str] = {} git_cache = {} - if args.cache_file is not None and os.path.exists(args.cache_file): - with open(args.cache_file) as f: + if not args.no_cache and os.path.exists(cache_file): + with open(cache_file) as f: git_cache = json.load(f) patch_filenames = set() @@ -581,8 +600,8 @@ def ros2nix(args): generate_default(args) # TODO generate also release.nix (for testing/CI)? - if args.cache_file is not None: - with open(args.cache_file, "w") as f: + if not args.no_cache: + with open(cache_file, "w") as f: json.dump(git_cache, f) if args.compare and compare_failed: From a6c2ff98fb97b71a0fef14fb8edf6dc48bfc008a Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 18 Apr 2025 12:13:32 +0200 Subject: [PATCH 3/5] Use URL and prefix (directory) as git cache keys Using URL instead of git revisions should ensure that the cache doesn't grow indefinitely. With git revisions, the cache would be soon full of mostly old revisions. The advantage using git revision is that, for example, different branches of a repo can be cached simultaneously. I hope, this is not the common usage pattern. And if it is, one can always set XDG_CACHE_HOME variable to use different caches for different branches. --- ros2nix/ros2nix.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/ros2nix/ros2nix.py b/ros2nix/ros2nix.py index 157ef24..499c421 100755 --- a/ros2nix/ros2nix.py +++ b/ros2nix/ros2nix.py @@ -444,21 +444,18 @@ def ros2nix(args): merge_base = merge_base_to_upstream(head) head = check_output(f"git rev-list {merge_base} -1 -- .".split()) - def cache_key(prefix, rev): + def cache_key(url, prefix): if args.use_per_package_src: - return f"{prefix}-{rev}" - return rev - + return f"{url}?dir={prefix}" + return url # Latest commit present in the upstream repo. If # the local repository doesn't have additional # commits, it is the same as HEAD. Should work # even with detached HEAD. upstream_rev = merge_base_to_upstream(head) - if cache_key(prefix, upstream_rev) in git_cache: - info = git_cache[cache_key(prefix, upstream_rev)] - upstream_rev = info["rev"] - else: + info = git_cache.get(cache_key(url, prefix)) + if info is None or info["rev"] != upstream_rev: info = json.loads( subprocess.check_output( ["nix-prefetch-git", "--quiet"] @@ -470,7 +467,7 @@ def ros2nix(args): + [toplevel, upstream_rev], ).decode() ) - git_cache[cache_key(prefix, upstream_rev)] = {k : info[k] for k in ["rev", "sha256"]} + git_cache[cache_key(url, prefix)] = {k : info[k] for k in ["rev", "sha256"]} match = re.match("https://github.com/(?P[^/]*)/(?P.*?)(.git|/.*)?$", url) sparse_checkout = f"""sparseCheckout = ["{prefix}"]; From 81c4e3656b83ae57c85cd9f4a1ac2f06aa7f19f7 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 18 Apr 2025 12:26:57 +0200 Subject: [PATCH 4/5] Create directories for git cache if needed --- ros2nix/ros2nix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros2nix/ros2nix.py b/ros2nix/ros2nix.py index 499c421..22afc5b 100755 --- a/ros2nix/ros2nix.py +++ b/ros2nix/ros2nix.py @@ -598,6 +598,7 @@ def ros2nix(args): # TODO generate also release.nix (for testing/CI)? if not args.no_cache: + os.makedirs(os.path.dirname(cache_file), exist_ok=True) with open(cache_file, "w") as f: json.dump(git_cache, f) From cf9ca51f6982824901ff850c7893d84eddb6897c Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 18 Apr 2025 13:22:21 +0200 Subject: [PATCH 5/5] Update comments describing head optimization --- ros2nix/ros2nix.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ros2nix/ros2nix.py b/ros2nix/ros2nix.py index 22afc5b..bcb852e 100755 --- a/ros2nix/ros2nix.py +++ b/ros2nix/ros2nix.py @@ -440,8 +440,9 @@ def ros2nix(args): cwd=srcdir, shell=True).decode().strip() if args.use_per_package_src: - # we need to get merge_base again to filter out applied patches from the package git hash - merge_base = merge_base_to_upstream(head) + # Set head to point to the last commit the subdirectory was changed. This is + # not strictly necessary, but it will increase hit rate of git_cache. + merge_base = merge_base_to_upstream(head) # filter out locally applied patches head = check_output(f"git rev-list {merge_base} -1 -- .".split()) def cache_key(url, prefix):