workflows/eval: diff outpaths immediately

This moves the diff of outpaths into the outpaths job, mainly as a preparation to allow future improvements. For example, this will allow running the purity release checks only on changed outpaths instead of the whole eval. This also removes the inefficiency introduced in the last commit about uploading the intermediate paths twice. Now, only the diff is passed on. Also, technically, the diff is now run in parallel across 4 jobs. This should be *slightly* faster than before, where outpaths from all systems were combined first and then diffed. It's probably only a few seconds, though.
2025-06-12 04:35:41 +03:00 · 2025-05-18 15:31:48 +02:00 · 2025-05-18 15:31:48 +02:00 · 8a39ce4a48
commit 8a39ce4a48
parent a6b659b08a
5 changed files with 117 additions and 79 deletions
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@ -135,12 +135,23 @@ jobs:
          github-token: ${{ github.token }}
          merge-multiple: true
-      - name: Upload the output paths and eval stats
+      - name: Compare outpaths against the target branch
        if: steps.targetRunId.outputs.targetRunId
        env:
          MATRIX_SYSTEM: ${{ matrix.system }}
        run: |
          nix-build untrusted/ci -A eval.diff \
            --arg beforeDir ./target \
            --arg afterDir "$(readlink ./merged)" \
            --argstr evalSystem "$MATRIX_SYSTEM" \
            --out-link diff
      - name: Upload outpaths diff and stats
        if: steps.targetRunId.outputs.targetRunId
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
-          name: target-${{ matrix.system }}
+          name: diff-${{ matrix.system }}
-          path: target/*
+          path: diff/*
  process:
    name: Process
@ -148,18 +159,11 @@ jobs:
    needs: [ prepare, outpaths ]
    if: needs.prepare.outputs.targetSha
    steps:
-      - name: Download output paths and eval stats for all systems (PR)
+      - name: Download output paths and eval stats for all systems
        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
        with:
-          pattern: merged-*
+          pattern: diff-*
-          path: merged
+          path: diff
          merge-multiple: true
      - name: Download output paths and eval stats for all systems (target)
        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
        with:
          pattern: target-*
          path: target
          merge-multiple: true
      - name: Check out the PR at the target commit
@ -173,18 +177,11 @@ jobs:
        with:
          extra_nix_config: sandbox = true
-      - name: Combine all output paths and eval stats (PR)
+      - name: Combine all output paths and eval stats
        run: |
          nix-build trusted/ci -A eval.combine \
-            --arg evalDir ./merged \
+            --arg diffDir ./diff \
-            --out-link combinedMerged
+            --out-link combined
      - name: Combine all output paths and eval stats (target)
        if: needs.prepare.outputs.targetSha
        run: |
          nix-build trusted/ci -A eval.combine \
            --arg evalDir ./target \
            -o combinedTarget
      - name: Compare against the target branch
        env:
@ -196,8 +193,7 @@ jobs:
          # Use the target branch to get accurate maintainer info
          nix-build trusted/ci -A eval.compare \
-            --arg beforeDir "$(realpath combinedTarget)" \
+            --arg combinedDir "$(realpath ./combined)" \
            --arg afterDir "$(realpath combinedMerged)" \
            --arg touchedFilesJson ./touched-files.json \
            --argstr githubAuthorId "$AUTHOR_ID" \
            --out-link comparison
--- a/ci/eval/compare/default.nix
+++ b/ci/eval/compare/default.nix
@ -7,8 +7,7 @@
  python3,
 }:
 {
-  beforeDir,
+  combinedDir,
  afterDir,
  touchedFilesJson,
  githubAuthorId,
  byName ? false,
@ -66,7 +65,6 @@ let
      Example: { name = "python312Packages.numpy"; platform = "x86_64-linux"; }
  */
  inherit (import ./utils.nix { inherit lib; })
    diff
    groupByKernel
    convertToPackagePlatformAttrs
    groupByPlatform
@ -74,22 +72,10 @@ let
    getLabels
    ;
  getAttrs =
    dir:
    let
      raw = builtins.readFile "${dir}/outpaths.json";
      # The file contains Nix paths; we need to ignore them for evaluation purposes,
      # else there will be a "is not allowed to refer to a store path" error.
      data = builtins.unsafeDiscardStringContext raw;
    in
    builtins.fromJSON data;
  beforeAttrs = getAttrs beforeDir;
  afterAttrs = getAttrs afterDir;
  # Attrs
  # - keys: "added", "changed" and "removed"
  # - values: lists of `packagePlatformPath`s
-  diffAttrs = diff beforeAttrs afterAttrs;
+  diffAttrs = builtins.fromJSON (builtins.readFile "${combinedDir}/combined-diff.json");
  rebuilds = diffAttrs.added ++ diffAttrs.changed;
  rebuildsPackagePlatformAttrs = convertToPackagePlatformAttrs rebuilds;
@ -149,8 +135,8 @@ runCommand "compare"
    maintainers = builtins.toJSON maintainers;
    passAsFile = [ "maintainers" ];
    env = {
-      BEFORE_DIR = "${beforeDir}";
+      BEFORE_DIR = "${combinedDir}/before";
-      AFTER_DIR = "${afterDir}";
+      AFTER_DIR = "${combinedDir}/after";
    };
  }
  ''
--- a/ci/eval/compare/utils.nix
+++ b/ci/eval/compare/utils.nix
@ -93,32 +93,6 @@ rec {
    in
    uniqueStrings (builtins.map (p: p.name) packagePlatformAttrs);
  /*
    Computes the key difference between two attrs
    {
      added: [ <keys only in the second object> ],
      removed: [ <keys only in the first object> ],
      changed: [ <keys with different values between the two objects> ],
    }
  */
  diff =
    let
      filterKeys = cond: attrs: lib.attrNames (lib.filterAttrs cond attrs);
    in
    old: new: {
      added = filterKeys (n: _: !(old ? ${n})) new;
      removed = filterKeys (n: _: !(new ? ${n})) old;
      changed = filterKeys (
        n: v:
        # Filter out attributes that don't exist anymore
        (new ? ${n})
        # Filter out attributes that are the same as the new value
        && (v != (new.${n}))
      ) old;
    };
  /*
    Group a list of `packagePlatformAttr`s by platforms
--- a/ci/eval/default.nix
+++ b/ci/eval/default.nix
@ -191,9 +191,11 @@ let
        cat "$chunkOutputDir"/result/* | jq -s 'add | map_values(.outputs)' > $out/${evalSystem}/paths.json
      '';
  diff = callPackage ./diff.nix { };
  combine =
    {
-      evalDir,
+      diffDir,
    }:
    runCommand "combined-eval"
      {
@ -205,12 +207,22 @@ let
        mkdir -p $out
        # Combine output paths from all systems
-        cat ${evalDir}/*/paths.json | jq -s add > $out/outpaths.json
+        cat ${diffDir}/*/diff.json | jq -s '
          reduce .[] as $item ({}; {
            added: (.added + $item.added),
            changed: (.changed + $item.changed),
            removed: (.removed + $item.removed)
          })
        ' > $out/combined-diff.json
-        mkdir -p $out/stats
+        mkdir -p $out/before/stats
        for d in ${diffDir}/before/*; do
          cp -r "$d"/stats-by-chunk $out/before/stats/$(basename "$d")
        done
-        for d in ${evalDir}/*; do
+        mkdir -p $out/after/stats
-          cp -r "$d"/stats-by-chunk $out/stats/$(basename "$d")
+        for d in ${diffDir}/after/*; do
          cp -r "$d"/stats-by-chunk $out/after/stats/$(basename "$d")
        done
      '';
@ -225,18 +237,26 @@ let
      quickTest ? false,
    }:
    let
-      evals = symlinkJoin {
+      diffs = symlinkJoin {
-        name = "evals";
+        name = "diffs";
        paths = map (
          evalSystem:
-          singleSystem {
+          let
-            inherit quickTest evalSystem chunkSize;
+            eval = singleSystem {
              inherit quickTest evalSystem chunkSize;
            };
          in
          diff {
            inherit evalSystem;
            # Local "full" evaluation doesn't do a real diff.
            beforeDir = eval;
            afterDir = eval;
          }
        ) evalSystems;
      };
    in
    combine {
-      evalDir = evals;
+      diffDir = diffs;
    };
 in
@ -244,6 +264,7 @@ in
  inherit
    attrpathsSuperset
    singleSystem
    diff
    combine
    compare
    # The above three are used by separate VMs in a GitHub workflow,
--- a/ci/eval/diff.nix
+++ b/ci/eval/diff.nix
@ -0,0 +1,61 @@
 {
  lib,
  runCommand,
  writeText,
 }:
 {
  beforeDir,
  afterDir,
  evalSystem,
 }:
 let
  /*
    Computes the key difference between two attrs
    {
      added: [ <keys only in the second object> ],
      removed: [ <keys only in the first object> ],
      changed: [ <keys with different values between the two objects> ],
    }
  */
  diff =
    let
      filterKeys = cond: attrs: lib.attrNames (lib.filterAttrs cond attrs);
    in
    old: new: {
      added = filterKeys (n: _: !(old ? ${n})) new;
      removed = filterKeys (n: _: !(new ? ${n})) old;
      changed = filterKeys (
        n: v:
        # Filter out attributes that don't exist anymore
        (new ? ${n})
        # Filter out attributes that are the same as the new value
        && (v != (new.${n}))
      ) old;
    };
  getAttrs =
    dir:
    let
      raw = builtins.readFile "${dir}/${evalSystem}/paths.json";
      # The file contains Nix paths; we need to ignore them for evaluation purposes,
      # else there will be a "is not allowed to refer to a store path" error.
      data = builtins.unsafeDiscardStringContext raw;
    in
    builtins.fromJSON data;
  beforeAttrs = getAttrs beforeDir;
  afterAttrs = getAttrs afterDir;
  diffAttrs = diff beforeAttrs afterAttrs;
  diffJson = writeText "diff.json" (builtins.toJSON diffAttrs);
 in
 runCommand "diff" { } ''
  mkdir -p $out/${evalSystem}
  cp -r ${beforeDir} $out/before
  cp -r ${afterDir} $out/after
  cp ${diffJson} $out/${evalSystem}/diff.json
 ''