lib.strings: init splitStringBy (#385643)

This commit is contained in:
Pol Dellaiera 2025-04-23 04:51:38 +00:00 committed by GitHub
commit a9320986b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 187 additions and 0 deletions

View file

@ -347,6 +347,7 @@ let
toSentenceCase
addContextFrom
splitString
splitStringBy
removePrefix
removeSuffix
versionOlder

View file

@ -1592,6 +1592,97 @@ rec {
in
map (addContextFrom s) splits;
/**
Splits a string into substrings based on a predicate that examines adjacent characters.
This function provides a flexible way to split strings by checking pairs of characters
against a custom predicate function. Unlike simpler splitting functions, this allows
for context-aware splitting based on character transitions and patterns.
# Inputs
`predicate`
: Function that takes two arguments (previous character and current character)
and returns true when the string should be split at the current position.
For the first character, previous will be "" (empty string).
`keepSplit`
: Boolean that determines whether the splitting character should be kept as
part of the result. If true, the character will be included at the beginning
of the next substring; if false, it will be discarded.
`str`
: The input string to split.
# Return
A list of substrings from the original string, split according to the predicate.
# Type
```
splitStringBy :: (string -> string -> bool) -> bool -> string -> [string]
```
# Examples
:::{.example}
## `lib.strings.splitStringBy` usage example
Split on periods and hyphens, discarding the separators:
```nix
splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz"
=> [ "foo" "bar" "baz" ]
```
Split on transitions from lowercase to uppercase, keeping the uppercase characters:
```nix
splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz"
=> [ "foo" "Bar" "Baz" ]
```
Handle leading separators correctly:
```nix
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"
=> [ "" "foo" "bar" "baz" ]
```
Handle trailing separators correctly:
```nix
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."
=> [ "foo" "bar" "baz" "" ]
```
:::
*/
splitStringBy =
predicate: keepSplit: str:
let
len = stringLength str;
# Helper function that processes the string character by character
go =
pos: currentPart: result:
# Base case: reached end of string
if pos == len then
result ++ [ currentPart ]
else
let
currChar = substring pos 1 str;
prevChar = if pos > 0 then substring (pos - 1) 1 str else "";
isSplit = predicate prevChar currChar;
in
if isSplit then
# Split here - add current part to results and start a new one
let
newResult = result ++ [ currentPart ];
newCurrentPart = if keepSplit then currChar else "";
in
go (pos + 1) newCurrentPart newResult
else
# Keep building current part
go (pos + 1) (currentPart + currChar) result;
in
if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]);
/**
Return a string without the specified prefix, if the prefix matches.

View file

@ -631,6 +631,101 @@ runTests {
];
};
testSplitStringBySimpleDelimiter = {
expr = strings.splitStringBy (
prev: curr:
builtins.elem curr [
"."
"-"
]
) false "foo.bar-baz";
expected = [
"foo"
"bar"
"baz"
];
};
testSplitStringByLeadingDelimiter = {
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz";
expected = [
""
"foo"
"bar"
"baz"
];
};
testSplitStringByTrailingDelimiter = {
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz.";
expected = [
"foo"
"bar"
"baz"
""
];
};
testSplitStringByMultipleConsecutiveDelimiters = {
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar";
expected = [
"foo"
""
""
"bar"
];
};
testSplitStringByKeepingSplitChar = {
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz";
expected = [
"foo"
".bar"
".baz"
];
};
testSplitStringByCaseTransition = {
expr = strings.splitStringBy (
prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null
) true "fooBarBaz";
expected = [
"foo"
"Bar"
"Baz"
];
};
testSplitStringByEmptyString = {
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "";
expected = [ "" ];
};
testSplitStringByComplexPredicate = {
expr = strings.splitStringBy (
prev: curr:
prev != ""
&& curr != ""
&& builtins.match "[0-9]" prev != null
&& builtins.match "[a-z]" curr != null
) true "123abc456def";
expected = [
"123"
"abc456"
"def"
];
};
testSplitStringByUpperCaseStart = {
expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz";
expected = [
""
"Foo"
"Bar"
"Baz"
];
};
testEscapeShellArg = {
expr = strings.escapeShellArg "esc'ape\nme";
expected = "'esc'\\''ape\nme'";