Merge pull request #261732 from tweag/fileset.fromSource

`lib.fileset.fromSource`: init
This commit is contained in:
Robert Hensing 2023-11-10 20:32:51 +01:00 committed by GitHub
commit cfd83c931f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 400 additions and 31 deletions

View file

@ -244,5 +244,4 @@ Here's a list of places in the library that need to be updated in the future:
- > The file set library is currently somewhat limited but is being expanded to include more functions over time. - > The file set library is currently somewhat limited but is being expanded to include more functions over time.
in [the manual](../../doc/functions/fileset.section.md) in [the manual](../../doc/functions/fileset.section.md)
- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed
- If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function - If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function

View file

@ -3,8 +3,10 @@ let
inherit (import ./internal.nix { inherit lib; }) inherit (import ./internal.nix { inherit lib; })
_coerce _coerce
_singleton
_coerceMany _coerceMany
_toSourceFilter _toSourceFilter
_fromSourceFilter
_unionMany _unionMany
_fileFilter _fileFilter
_printFileset _printFileset
@ -152,7 +154,12 @@ in {
sourceFilter = _toSourceFilter fileset; sourceFilter = _toSourceFilter fileset;
in in
if ! isPath root then if ! isPath root then
if isStringLike root then if root ? _isLibCleanSourceWith then
throw ''
lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
Note that this only works for sources created from paths.''
else if isStringLike root then
throw '' throw ''
lib.fileset.toSource: `root` (${toString root}) is a string-like value, but it should be a path instead. lib.fileset.toSource: `root` (${toString root}) is a string-like value, but it should be a path instead.
Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@ -188,6 +195,75 @@ in {
filter = sourceFilter; filter = sourceFilter;
}; };
/*
Create a file set with the same files as a `lib.sources`-based value.
This does not import any of the files into the store.
This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`.
A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource).
:::{.note}
File sets cannot represent empty directories.
Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories.
:::
Type:
fromSource :: SourceLike -> FileSet
Example:
# There's no cleanSource-like function for file sets yet,
# but we can just convert cleanSource to a file set and use it that way
toSource {
root = ./.;
fileset = fromSource (lib.sources.cleanSource ./.);
}
# Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`),
# but removing a subdirectory using file set functions
difference
(fromSource (lib.sources.sourceByRegex ./. [
"^README\.md$"
# This regex includes everything in ./doc
"^doc(/.*)?$"
])
./doc/generated
# Use cleanSource, but limit it to only include ./Makefile and files under ./src
intersection
(fromSource (lib.sources.cleanSource ./.))
(unions [
./Makefile
./src
]);
*/
fromSource = source:
let
# This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`,
# which are technically internal to lib.sources,
# but we'll allow this since both libraries are in the same code base
# and this function is a bridge between them.
isFiltered = source ? _isLibCleanSourceWith;
path = if isFiltered then source.origSrc else source;
in
# We can only support sources created from paths
if ! isPath path then
if isStringLike path then
throw ''
lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead.
Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.''
else
throw ''
lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.''
else if ! pathExists path then
throw ''
lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.''
else if isFiltered then
_fromSourceFilter path source.filter
else
# If there's no filter, no need to run the expensive conversion, all subpaths will be included
_singleton path;
/* /*
The file set containing all files that are in either of two given file sets. The file set containing all files that are in either of two given file sets.
This is the same as [`unions`](#function-library-lib.fileset.unions), This is the same as [`unions`](#function-library-lib.fileset.unions),

View file

@ -167,7 +167,12 @@ rec {
else else
value value
else if ! isPath value then else if ! isPath value then
if isStringLike value then if value ? _isLibCleanSourceWith then
throw ''
${context} is a `lib.sources`-based value, but it should be a file set or a path instead.
To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
Note that this only works for sources created from paths.''
else if isStringLike value then
throw '' throw ''
${context} ("${toString value}") is a string-like value, but it should be a file set or a path instead. ${context} ("${toString value}") is a string-like value, but it should be a file set or a path instead.
Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'' Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@ -470,6 +475,59 @@ rec {
else else
nonEmpty; nonEmpty;
# Turn a builtins.filterSource-based source filter on a root path into a file set
# containing only files included by the filter.
# The filter is lazily called as necessary to determine whether paths are included
# Type: Path -> (String -> String -> Bool) -> fileset
_fromSourceFilter = root: sourceFilter:
let
# During the recursion we need to track both:
# - The path value such that we can safely call `readDir` on it
# - The path string value such that we can correctly call the `filter` with it
#
# While we could just recurse with the path value,
# this would then require converting it to a path string for every path,
# which is a fairly expensive operation
# Create a file set from a directory entry
fromDirEntry = path: pathString: type:
# The filter needs to run on the path as a string
if ! sourceFilter pathString type then
null
else if type == "directory" then
fromDir path pathString
else
type;
# Create a file set from a directory
fromDir = path: pathString:
mapAttrs
# This looks a bit funny, but we need both the path-based and the path string-based values
(name: fromDirEntry (path + "/${name}") (pathString + "/${name}"))
# We need to readDir on the path value, because reading on a path string
# would be unspecified if there are multiple filesystem roots
(readDir path);
rootPathType = pathType root;
# We need to convert the path to a string to imitate what builtins.path calls the filter function with.
# We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md
# So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath
# We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter.
# So we only need the components, which we then turn into a string as one would expect.
rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath);
in
if rootPathType == "directory" then
# We imitate builtins.path not calling the filter on the root path
_create root (fromDir root rootString)
else
# Direct files are always included by builtins.path without calling the filter
# But we need to lift up the base path to its parent to satisfy the base path invariant
_create (dirOf root)
{
${baseNameOf root} = rootPathType;
};
# Transforms the filesetTree of a file set to a shorter base path, e.g. # Transforms the filesetTree of a file set to a shorter base path, e.g.
# _shortenTreeBase [ "foo" ] (_create /foo/bar null) # _shortenTreeBase [ "foo" ] (_create /foo/bar null)
# => { bar = null; } # => { bar = null; }

View file

@ -1,5 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# shellcheck disable=SC2016 # shellcheck disable=SC2016
# shellcheck disable=SC2317
# shellcheck disable=SC2192
# Tests lib.fileset # Tests lib.fileset
# Run: # Run:
@ -224,23 +226,17 @@ withFileMonitor() {
fi fi
} }
# Check whether a file set includes/excludes declared paths as expected, usage:
# Create the tree structure declared in the tree variable, usage:
# #
# tree=( # tree=(
# [a/b] =1 # Declare that file a/b should exist and expect it to be included in the store path # [a/b] = # Declare that file a/b should exist
# [c/a] = # Declare that file c/a should exist and expect it to be excluded in the store path # [c/a] = # Declare that file c/a should exist
# [c/d/]= # Declare that directory c/d/ should exist and expect it to be excluded in the store path # [c/d/]= # Declare that directory c/d/ should exist
# ) # )
# checkFileset './a' # Pass the fileset as the argument # createTree
declare -A tree declare -A tree
checkFileset() { createTree() {
# New subshell so that we can have a separate trap handler, see `trap` below
local fileset=$1
# Process the tree into separate arrays for included paths, excluded paths and excluded files.
local -a included=()
local -a excluded=()
local -a excludedFiles=()
# Track which paths need to be created # Track which paths need to be created
local -a dirsToCreate=() local -a dirsToCreate=()
local -a filesToCreate=() local -a filesToCreate=()
@ -248,24 +244,9 @@ checkFileset() {
# If keys end with a `/` we treat them as directories, otherwise files # If keys end with a `/` we treat them as directories, otherwise files
if [[ "$p" =~ /$ ]]; then if [[ "$p" =~ /$ ]]; then
dirsToCreate+=("$p") dirsToCreate+=("$p")
isFile=
else else
filesToCreate+=("$p") filesToCreate+=("$p")
isFile=1
fi fi
case "${tree[$p]}" in
1)
included+=("$p")
;;
0)
excluded+=("$p")
if [[ -n "$isFile" ]]; then
excludedFiles+=("$p")
fi
;;
*)
die "Unsupported tree value: ${tree[$p]}"
esac
done done
# Create all the necessary paths. # Create all the necessary paths.
@ -280,6 +261,43 @@ checkFileset() {
mkdir -p "${parentsToCreate[@]}" mkdir -p "${parentsToCreate[@]}"
touch "${filesToCreate[@]}" touch "${filesToCreate[@]}"
fi fi
}
# Check whether a file set includes/excludes declared paths as expected, usage:
#
# tree=(
# [a/b] =1 # Declare that file a/b should exist and expect it to be included in the store path
# [c/a] = # Declare that file c/a should exist and expect it to be excluded in the store path
# [c/d/]= # Declare that directory c/d/ should exist and expect it to be excluded in the store path
# )
# checkFileset './a' # Pass the fileset as the argument
checkFileset() {
# New subshell so that we can have a separate trap handler, see `trap` below
local fileset=$1
# Create the tree
createTree
# Process the tree into separate arrays for included paths, excluded paths and excluded files.
local -a included=()
local -a excluded=()
local -a excludedFiles=()
for p in "${!tree[@]}"; do
case "${tree[$p]}" in
1)
included+=("$p")
;;
0)
excluded+=("$p")
# If keys end with a `/` we treat them as directories, otherwise files
if [[ ! "$p" =~ /$ ]]; then
excludedFiles+=("$p")
fi
;;
*)
die "Unsupported tree value: ${tree[$p]}"
esac
done
expression="toSource { root = ./.; fileset = $fileset; }" expression="toSource { root = ./.; fileset = $fileset; }"
@ -321,6 +339,10 @@ checkFileset() {
expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \(/nix/store/foobar\) is a string-like value, but it should be a path instead. expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \(/nix/store/foobar\) is a string-like value, but it should be a path instead.
\s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' \s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
expectFailure 'toSource { root = cleanSourceWith { src = ./.; }; fileset = ./.; }' 'lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
\s*To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
\s*Note that this only works for sources created from paths.'
# Only paths are accepted as `root` # Only paths are accepted as `root`
expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.' expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.'
@ -365,6 +387,9 @@ rm -rf -- *
expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a file set or a path instead.' expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a file set or a path instead.'
expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a file set or a path instead. expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a file set or a path instead.
\s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.' \s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
expectFailure 'toSource { root = ./.; fileset = cleanSourceWith { src = ./.; }; }' 'lib.fileset.toSource: `fileset` is a `lib.sources`-based value, but it should be a file set or a path instead.
\s*To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
\s*Note that this only works for sources created from paths.'
# Path coercion errors for non-existent paths # Path coercion errors for non-existent paths
expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) is a path that does not exist.' expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) is a path that does not exist.'
@ -995,6 +1020,217 @@ touch 0 "${filesToCreate[@]}"
expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace" expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace"
rm -rf -- * rm -rf -- *
## lib.fileset.fromSource
# Check error messages
expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead.
\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'
expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'
# fromSource on a path works and is the same as coercing that path
mkdir a
touch a/b c
expectEqual 'trace (fromSource ./.) null' 'trace ./. null'
rm -rf -- *
# Check that converting to a file set doesn't read the included files
mkdir a
touch a/b
run() {
expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null"
rm a/b
}
withFileMonitor run a/b
rm -rf -- *
# Check that converting to a file set doesn't read entries for directories that are filtered out
mkdir -p a/b
touch a/b/c
run() {
expectEqual "trace (fromSource (lib.cleanSourceWith {
src = ./a;
filter = pathString: type: false;
})) null" "builtins.trace \"(empty)\" null"
rm a/b/c
rmdir a/b
}
withFileMonitor run a/b
rm -rf -- *
# The filter is not needed on empty directories
expectEqual 'trace (fromSource (lib.cleanSourceWith {
src = ./.;
filter = abort "filter should not be needed";
})) null' 'trace _emptyWithoutBase null'
# Single files also work
touch a b
expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null'
rm -rf -- *
# For a tree assigning each subpath true/false,
# check whether a source filter with those results includes the same files
# as a file set created using fromSource. Usage:
#
# tree=(
# [a]=1 # ./a is a file and the filter should return true for it
# [b/]=0 # ./b is a directory and the filter should return false for it
# )
# checkSource
checkSource() {
createTree
# Serialise the tree as JSON (there's only minimal savings with jq,
# and we don't need to handle escapes)
{
echo "{"
first=1
for p in "${!tree[@]}"; do
if [[ -z "$first" ]]; then
echo ","
else
first=
fi
echo "\"$p\":"
case "${tree[$p]}" in
1)
echo "true"
;;
0)
echo "false"
;;
*)
die "Unsupported tree value: ${tree[$p]}"
esac
done
echo "}"
} > "$tmp/tree.json"
# An expression to create a source value with a filter matching the tree
sourceExpr='
let
tree = importJSON '"$tmp"'/tree.json;
in
cleanSourceWith {
src = ./.;
filter =
pathString: type:
let
stripped = removePrefix (toString ./. + "/") pathString;
key = stripped + optionalString (type == "directory") "/";
in
tree.${key} or
(throw "tree key ${key} missing");
}
'
filesetExpr='
toSource {
root = ./.;
fileset = fromSource ('"$sourceExpr"');
}
'
# Turn both into store paths
sourceStorePath=$(expectStorePath "$sourceExpr")
filesetStorePath=$(expectStorePath "$filesetExpr")
# Loop through each path in the tree
while IFS= read -r -d $'\0' subpath; do
if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then
# If it's not in the source store path, it's also not in the file set store path
if [[ -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath"
fi
elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then
# If it's an empty directory in the source store path, it shouldn't be in the file set store path
if [[ -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it"
fi
else
# If it's non-empty directory or a file, it should be in the file set store path
if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then
die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it"
fi
fi
done < <(find . -mindepth 1 -print0)
rm -rf -- *
}
# Check whether the filter is evaluated correctly
tree=(
[a]=
[b/]=
[b/c]=
[b/d]=
[e/]=
[e/e/]=
)
# We fill out the above tree values with all possible combinations of 0 and 1
# Then check whether a filter based on those return values gets turned into the corresponding file set
for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do
for p in "${!tree[@]}"; do
tree[$p]=$(( i % 2 ))
(( i /= 2 )) || true
done
checkSource
done
# The filter is called with the same arguments in the same order
mkdir a e
touch a/b a/c d e
expectEqual '
trace (fromSource (cleanSourceWith {
src = ./.;
filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
})) null
' '
builtins.seq (cleanSourceWith {
src = ./.;
filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
}).outPath
builtins.trace "'"$work"' (all files in directory)"
null
'
rm -rf -- *
# Test that if a directory is not included, the filter isn't called on its contents
mkdir a b
touch a/c b/d
expectEqual 'trace (fromSource (cleanSourceWith {
src = ./.;
filter = pathString: type:
if pathString == toString ./a then
false
else if pathString == toString ./b then
true
else if pathString == toString ./b/d then
true
else
abort "This filter should not be called with path ${pathString}";
})) null' 'trace (_create ./. { b = "directory"; }) null'
rm -rf -- *
# The filter is called lazily:
# If a later say intersection removes a part of the tree, the filter won't run on it
mkdir a d
touch a/{b,c} d/e
expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith {
src = ./.;
filter = pathString: type:
if pathString == toString ./a || pathString == toString ./a/b then
true
else if pathString == toString ./a/c then
false
else
abort "filter should not be called on ${pathString}";
}))) null' 'trace ./a/b null'
rm -rf -- *
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
echo >&2 tests ok echo >&2 tests ok