From bb4be9a474c87dd9366eca69fa8e2f0aa16fcc83 Mon Sep 17 00:00:00 2001 From: Heitor Augusto Date: Thu, 27 Feb 2025 16:27:32 -0300 Subject: [PATCH] lib.strings: init splitStringBy --- lib/default.nix | 1 + lib/strings.nix | 91 ++++++++++++++++++++++++++++++++++++++++++++ lib/tests/misc.nix | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+) diff --git a/lib/default.nix b/lib/default.nix index 19316addb8cb..c433ca6a3e09 100644 --- a/lib/default.nix +++ b/lib/default.nix @@ -347,6 +347,7 @@ let toSentenceCase addContextFrom splitString + splitStringBy removePrefix removeSuffix versionOlder diff --git a/lib/strings.nix b/lib/strings.nix index d281120cad7f..70f3a6cc8a25 100644 --- a/lib/strings.nix +++ b/lib/strings.nix @@ -1588,6 +1588,97 @@ rec { in map (addContextFrom s) splits; + /** + Splits a string into substrings based on a predicate that examines adjacent characters. + + This function provides a flexible way to split strings by checking pairs of characters + against a custom predicate function. Unlike simpler splitting functions, this allows + for context-aware splitting based on character transitions and patterns. + + # Inputs + + `predicate` + : Function that takes two arguments (previous character and current character) + and returns true when the string should be split at the current position. + For the first character, previous will be "" (empty string). + + `keepSplit` + : Boolean that determines whether the splitting character should be kept as + part of the result. If true, the character will be included at the beginning + of the next substring; if false, it will be discarded. + + `str` + : The input string to split. + + # Return + + A list of substrings from the original string, split according to the predicate. + + # Type + + ``` + splitStringBy :: (string -> string -> bool) -> bool -> string -> [string] + ``` + + # Examples + :::{.example} + ## `lib.strings.splitStringBy` usage example + + Split on periods and hyphens, discarding the separators: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz" + => [ "foo" "bar" "baz" ] + ``` + + Split on transitions from lowercase to uppercase, keeping the uppercase characters: + ```nix + splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz" + => [ "foo" "Bar" "Baz" ] + ``` + + Handle leading separators correctly: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz" + => [ "" "foo" "bar" "baz" ] + ``` + + Handle trailing separators correctly: + ```nix + splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz." + => [ "foo" "bar" "baz" "" ] + ``` + ::: + */ + splitStringBy = + predicate: keepSplit: str: + let + len = stringLength str; + + # Helper function that processes the string character by character + go = + pos: currentPart: result: + # Base case: reached end of string + if pos == len then + result ++ [ currentPart ] + else + let + currChar = substring pos 1 str; + prevChar = if pos > 0 then substring (pos - 1) 1 str else ""; + isSplit = predicate prevChar currChar; + in + if isSplit then + # Split here - add current part to results and start a new one + let + newResult = result ++ [ currentPart ]; + newCurrentPart = if keepSplit then currChar else ""; + in + go (pos + 1) newCurrentPart newResult + else + # Keep building current part + go (pos + 1) (currentPart + currChar) result; + in + if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]); + /** Return a string without the specified prefix, if the prefix matches. diff --git a/lib/tests/misc.nix b/lib/tests/misc.nix index f5f1fb5e7c2d..f9f2b0264b9e 100644 --- a/lib/tests/misc.nix +++ b/lib/tests/misc.nix @@ -631,6 +631,101 @@ runTests { ]; }; + testSplitStringBySimpleDelimiter = { + expr = strings.splitStringBy ( + prev: curr: + builtins.elem curr [ + "." + "-" + ] + ) false "foo.bar-baz"; + expected = [ + "foo" + "bar" + "baz" + ]; + }; + + testSplitStringByLeadingDelimiter = { + expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"; + expected = [ + "" + "foo" + "bar" + "baz" + ]; + }; + + testSplitStringByTrailingDelimiter = { + expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."; + expected = [ + "foo" + "bar" + "baz" + "" + ]; + }; + + testSplitStringByMultipleConsecutiveDelimiters = { + expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar"; + expected = [ + "foo" + "" + "" + "bar" + ]; + }; + + testSplitStringByKeepingSplitChar = { + expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz"; + expected = [ + "foo" + ".bar" + ".baz" + ]; + }; + + testSplitStringByCaseTransition = { + expr = strings.splitStringBy ( + prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null + ) true "fooBarBaz"; + expected = [ + "foo" + "Bar" + "Baz" + ]; + }; + + testSplitStringByEmptyString = { + expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ""; + expected = [ "" ]; + }; + + testSplitStringByComplexPredicate = { + expr = strings.splitStringBy ( + prev: curr: + prev != "" + && curr != "" + && builtins.match "[0-9]" prev != null + && builtins.match "[a-z]" curr != null + ) true "123abc456def"; + expected = [ + "123" + "abc456" + "def" + ]; + }; + + testSplitStringByUpperCaseStart = { + expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz"; + expected = [ + "" + "Foo" + "Bar" + "Baz" + ]; + }; + testEscapeShellArg = { expr = strings.escapeShellArg "esc'ape\nme"; expected = "'esc'\\''ape\nme'";