lib.strings: init splitStringBy (#385643)

2025-06-10 03:23:29 +03:00 · 2025-04-23 04:51:38 +00:00 · 2025-04-23 04:51:38 +00:00 · a9320986b3
commit a9320986b3
parent e4a23a16f9 bb4be9a474
3 changed files with 187 additions and 0 deletions
--- a/lib/default.nix
+++ b/lib/default.nix
@ -347,6 +347,7 @@ let
        toSentenceCase
        addContextFrom
        splitString
        splitStringBy
        removePrefix
        removeSuffix
        versionOlder
--- a/lib/strings.nix
+++ b/lib/strings.nix
@ -1592,6 +1592,97 @@ rec {
    in
    map (addContextFrom s) splits;
  /**
    Splits a string into substrings based on a predicate that examines adjacent characters.
    This function provides a flexible way to split strings by checking pairs of characters
    against a custom predicate function. Unlike simpler splitting functions, this allows
    for context-aware splitting based on character transitions and patterns.
    # Inputs
    `predicate`
    : Function that takes two arguments (previous character and current character)
      and returns true when the string should be split at the current position.
      For the first character, previous will be "" (empty string).
    `keepSplit`
    : Boolean that determines whether the splitting character should be kept as
      part of the result. If true, the character will be included at the beginning
      of the next substring; if false, it will be discarded.
    `str`
    : The input string to split.
    # Return
    A list of substrings from the original string, split according to the predicate.
    # Type
    ```
    splitStringBy :: (string -> string -> bool) -> bool -> string -> [string]
    ```
    # Examples
    :::{.example}
    ## `lib.strings.splitStringBy` usage example
    Split on periods and hyphens, discarding the separators:
    ```nix
    splitStringBy (prev: curr: builtins.elem curr [ "." "-" ]) false "foo.bar-baz"
    => [ "foo" "bar" "baz" ]
    ```
    Split on transitions from lowercase to uppercase, keeping the uppercase characters:
    ```nix
    splitStringBy (prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null) true "fooBarBaz"
    => [ "foo" "Bar" "Baz" ]
    ```
    Handle leading separators correctly:
    ```nix
    splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz"
    => [ "" "foo" "bar" "baz" ]
    ```
    Handle trailing separators correctly:
    ```nix
    splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz."
    => [ "foo" "bar" "baz" "" ]
    ```
    :::
  */
  splitStringBy =
    predicate: keepSplit: str:
    let
      len = stringLength str;
      # Helper function that processes the string character by character
      go =
        pos: currentPart: result:
        # Base case: reached end of string
        if pos == len then
          result ++ [ currentPart ]
        else
          let
            currChar = substring pos 1 str;
            prevChar = if pos > 0 then substring (pos - 1) 1 str else "";
            isSplit = predicate prevChar currChar;
          in
          if isSplit then
            # Split here - add current part to results and start a new one
            let
              newResult = result ++ [ currentPart ];
              newCurrentPart = if keepSplit then currChar else "";
            in
            go (pos + 1) newCurrentPart newResult
          else
            # Keep building current part
            go (pos + 1) (currentPart + currChar) result;
    in
    if len == 0 then [ (addContextFrom str "") ] else map (addContextFrom str) (go 0 "" [ ]);
  /**
    Return a string without the specified prefix, if the prefix matches.
--- a/lib/tests/misc.nix
+++ b/lib/tests/misc.nix
@ -631,6 +631,101 @@ runTests {
    ];
  };
  testSplitStringBySimpleDelimiter = {
    expr = strings.splitStringBy (
      prev: curr:
      builtins.elem curr [
        "."
        "-"
      ]
    ) false "foo.bar-baz";
    expected = [
      "foo"
      "bar"
      "baz"
    ];
  };
  testSplitStringByLeadingDelimiter = {
    expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false ".foo.bar.baz";
    expected = [
      ""
      "foo"
      "bar"
      "baz"
    ];
  };
  testSplitStringByTrailingDelimiter = {
    expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo.bar.baz.";
    expected = [
      "foo"
      "bar"
      "baz"
      ""
    ];
  };
  testSplitStringByMultipleConsecutiveDelimiters = {
    expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "foo...bar";
    expected = [
      "foo"
      ""
      ""
      "bar"
    ];
  };
  testSplitStringByKeepingSplitChar = {
    expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) true "foo.bar.baz";
    expected = [
      "foo"
      ".bar"
      ".baz"
    ];
  };
  testSplitStringByCaseTransition = {
    expr = strings.splitStringBy (
      prev: curr: builtins.match "[a-z]" prev != null && builtins.match "[A-Z]" curr != null
    ) true "fooBarBaz";
    expected = [
      "foo"
      "Bar"
      "Baz"
    ];
  };
  testSplitStringByEmptyString = {
    expr = strings.splitStringBy (prev: curr: builtins.elem curr [ "." ]) false "";
    expected = [ "" ];
  };
  testSplitStringByComplexPredicate = {
    expr = strings.splitStringBy (
      prev: curr:
      prev != ""
      && curr != ""
      && builtins.match "[0-9]" prev != null
      && builtins.match "[a-z]" curr != null
    ) true "123abc456def";
    expected = [
      "123"
      "abc456"
      "def"
    ];
  };
  testSplitStringByUpperCaseStart = {
    expr = strings.splitStringBy (prev: curr: builtins.match "[A-Z]" curr != null) true "FooBarBaz";
    expected = [
      ""
      "Foo"
      "Bar"
      "Baz"
    ];
  };
  testEscapeShellArg = {
    expr = strings.escapeShellArg "esc'ape\nme";
    expected = "'esc'\\''ape\nme'";