mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-06-10 03:23:29 +03:00
lib.strings: init splitStringBy (#385643)
This commit is contained in:
commit
a9320986b3
3 changed files with 187 additions and 0 deletions
|
@ -347,6 +347,7 @@ let
|
||||||
toSentenceCase
|
toSentenceCase
|
||||||
addContextFrom
|
addContextFrom
|
||||||
splitString
|
splitString
|
||||||
|
splitStringBy
|
||||||
removePrefix
|
removePrefix
|
||||||
removeSuffix
|
removeSuffix
|
||||||
versionOlder
|
versionOlder
|
||||||
|
|
|
@ -1592,6 +1592,97 @@ rec {
|
||||||
in
|
in
|
||||||
map (addContextFrom s) splits;
|
map (addContextFrom s) splits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
Splits a string into substrings based on a predicate that examines adjacent characters.
|
||||||
|
|
||||||
|
This function provides a flexible way to split strings by checking pairs of characters
|
||||||
|
against a custom predicate function. Unlike simpler splitting functions, this allows
|
||||||
|
for context-aware splitting based on character transitions and patterns.
|
||||||
|
|
||||||
|
# Inputs
|
||||||
|
|
||||||
|
`predicate`
|
||||||
|
: Function that takes two arguments (previous character and current character)
|
||||||
|
and returns true when the string should be split at the current position.
|
||||||
|
For the first character, previous will be "" (empty string).
|
||||||
|
|
||||||
|
`keepSplit`
|
||||||
|
: Boolean that determines whether the splitting character should be kept as
|
||||||
|
part of the result. If true, the character will be included at the beginning
|
||||||
|
of the next substring; if false, it will be discarded.
|
||||||
|
|
||||||
|
`str`
|
||||||
|
: The input string to split.
|
||||||
|
|
||||||
|
# Return
|
||||||
|
|
||||||
|
A list of substrings from the original string, split according to the predicate.
|
||||||
|
|
||||||
|
# Type
|
||||||
|
|
||||||
|
```
|
||||||
|
splitStringBy :: (string -> string -> bool) -> bool -> string -> [string]
|
||||||
|
```
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
:::{.example}
|
||||||
|
## `lib.strings.splitStringBy` usage example
|
||||||
|
|
||||||
|
Split on periods and hyphens, discarding the separators:
|
||||||
|
```nix
|
||||||
|
splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz"
|
||||||
|
=> [ "foo" "bar" "baz" ]
|
||||||
|
```
|
||||||
|
|
||||||
|
Split on transitions from lowercase to uppercase, keeping the uppercase characters:
|
||||||
|
```nix
|
||||||
|
splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz"
|
||||||
|
=> [ "foo" "Bar" "Baz" ]
|
||||||
|
```
|
||||||
|
|
||||||
|
Handle leading separators correctly:
|
||||||
|
```nix
|
||||||
|
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"
|
||||||
|
=> [ "" "foo" "bar" "baz" ]
|
||||||
|
```
|
||||||
|
|
||||||
|
Handle trailing separators correctly:
|
||||||
|
```nix
|
||||||
|
splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."
|
||||||
|
=> [ "foo" "bar" "baz" "" ]
|
||||||
|
```
|
||||||
|
:::
|
||||||
|
*/
|
||||||
|
splitStringBy =
|
||||||
|
predicate: keepSplit: str:
|
||||||
|
let
|
||||||
|
len = stringLength str;
|
||||||
|
|
||||||
|
# Helper function that processes the string character by character
|
||||||
|
go =
|
||||||
|
pos: currentPart: result:
|
||||||
|
# Base case: reached end of string
|
||||||
|
if pos == len then
|
||||||
|
result ++ [ currentPart ]
|
||||||
|
else
|
||||||
|
let
|
||||||
|
currChar = substring pos 1 str;
|
||||||
|
prevChar = if pos > 0 then substring (pos - 1) 1 str else "";
|
||||||
|
isSplit = predicate prevChar currChar;
|
||||||
|
in
|
||||||
|
if isSplit then
|
||||||
|
# Split here - add current part to results and start a new one
|
||||||
|
let
|
||||||
|
newResult = result ++ [ currentPart ];
|
||||||
|
newCurrentPart = if keepSplit then currChar else "";
|
||||||
|
in
|
||||||
|
go (pos + 1) newCurrentPart newResult
|
||||||
|
else
|
||||||
|
# Keep building current part
|
||||||
|
go (pos + 1) (currentPart + currChar) result;
|
||||||
|
in
|
||||||
|
if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Return a string without the specified prefix, if the prefix matches.
|
Return a string without the specified prefix, if the prefix matches.
|
||||||
|
|
||||||
|
|
|
@ -631,6 +631,101 @@ runTests {
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
testSplitStringBySimpleDelimiter = {
|
||||||
|
expr = strings.splitStringBy (
|
||||||
|
prev: curr:
|
||||||
|
builtins.elem curr [
|
||||||
|
"."
|
||||||
|
"-"
|
||||||
|
]
|
||||||
|
) false "foo.bar-baz";
|
||||||
|
expected = [
|
||||||
|
"foo"
|
||||||
|
"bar"
|
||||||
|
"baz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByLeadingDelimiter = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz";
|
||||||
|
expected = [
|
||||||
|
""
|
||||||
|
"foo"
|
||||||
|
"bar"
|
||||||
|
"baz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByTrailingDelimiter = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz.";
|
||||||
|
expected = [
|
||||||
|
"foo"
|
||||||
|
"bar"
|
||||||
|
"baz"
|
||||||
|
""
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByMultipleConsecutiveDelimiters = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar";
|
||||||
|
expected = [
|
||||||
|
"foo"
|
||||||
|
""
|
||||||
|
""
|
||||||
|
"bar"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByKeepingSplitChar = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz";
|
||||||
|
expected = [
|
||||||
|
"foo"
|
||||||
|
".bar"
|
||||||
|
".baz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByCaseTransition = {
|
||||||
|
expr = strings.splitStringBy (
|
||||||
|
prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null
|
||||||
|
) true "fooBarBaz";
|
||||||
|
expected = [
|
||||||
|
"foo"
|
||||||
|
"Bar"
|
||||||
|
"Baz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByEmptyString = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "";
|
||||||
|
expected = [ "" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByComplexPredicate = {
|
||||||
|
expr = strings.splitStringBy (
|
||||||
|
prev: curr:
|
||||||
|
prev != ""
|
||||||
|
&& curr != ""
|
||||||
|
&& builtins.match "[0-9]" prev != null
|
||||||
|
&& builtins.match "[a-z]" curr != null
|
||||||
|
) true "123abc456def";
|
||||||
|
expected = [
|
||||||
|
"123"
|
||||||
|
"abc456"
|
||||||
|
"def"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
testSplitStringByUpperCaseStart = {
|
||||||
|
expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz";
|
||||||
|
expected = [
|
||||||
|
""
|
||||||
|
"Foo"
|
||||||
|
"Bar"
|
||||||
|
"Baz"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
testEscapeShellArg = {
|
testEscapeShellArg = {
|
||||||
expr = strings.escapeShellArg "esc'ape\nme";
|
expr = strings.escapeShellArg "esc'ape\nme";
|
||||||
expected = "'esc'\\''ape\nme'";
|
expected = "'esc'\\''ape\nme'";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue