diff --git a/nixos/doc/manual/redirects.json b/nixos/doc/manual/redirects.json index 29f7f9d51996..4995273654d0 100644 --- a/nixos/doc/manual/redirects.json +++ b/nixos/doc/manual/redirects.json @@ -2,6 +2,15 @@ "book-nixos-manual": [ "index.html#book-nixos-manual" ], + "module-services-anubis": [ + "index.html#module-services-anubis" + ], + "module-services-anubis-configuration": [ + "index.html#module-services-anubis-configuration" + ], + "module-services-anubis-quickstart": [ + "index.html#module-services-anubis-quickstart" + ], "module-services-crab-hole": [ "index.html#module-services-crab-hole" ], diff --git a/nixos/doc/manual/release-notes/rl-2505.section.md b/nixos/doc/manual/release-notes/rl-2505.section.md index 269b308fc3f8..2837e4e50ce6 100644 --- a/nixos/doc/manual/release-notes/rl-2505.section.md +++ b/nixos/doc/manual/release-notes/rl-2505.section.md @@ -168,6 +168,8 @@ - [PDS](https://github.com/bluesky-social/pds), Personal Data Server for [bsky](https://bsky.social/). Available as [services.pds](option.html#opt-services.pds). +- [Anubis](https://github.com/TecharoHQ/anubis), a scraper defense software. Available as [services.anubis](options.html#opt-services.anubis). + - [synapse-auto-compressor](https://github.com/matrix-org/rust-synapse-compress-state?tab=readme-ov-file#automated-tool-synapse_auto_compressor), a rust-based matrix-synapse state compressor for postgresql. Available as [services.synapse-auto-compressor](#opt-services.synapse-auto-compressor.enable). - [mqtt-exporter](https://github.com/kpetremann/mqtt-exporter/), a Prometheus exporter for exposing messages from MQTT. Available as [services.prometheus.exporters.mqtt](#opt-services.prometheus.exporters.mqtt.enable). diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index daad3727a341..9ac296973411 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -1038,6 +1038,7 @@ ./services/networking/adguardhome.nix ./services/networking/alice-lg.nix ./services/networking/amuled.nix + ./services/networking/anubis.nix ./services/networking/aria2.nix ./services/networking/asterisk.nix ./services/networking/atftpd.nix diff --git a/nixos/modules/services/networking/anubis.md b/nixos/modules/services/networking/anubis.md new file mode 100644 index 000000000000..8a9a2ea76aa6 --- /dev/null +++ b/nixos/modules/services/networking/anubis.md @@ -0,0 +1,61 @@ +# Anubis {#module-services-anubis} + +[Anubis](https://anubis.techaro.lol) is a scraper defense software that blocks AI scrapers. It is designed to sit +between a reverse proxy and the service to be protected. + +## Quickstart {#module-services-anubis-quickstart} + +This module is designed to use Unix domain sockets as the socket paths can be automatically configured for multiple +instances, but TCP sockets are also supported. + +A minimal configuration with [nginx](#opt-services.nginx.enable) may look like the following: + +```nix +{ config, ... }: { + services.anubis.instances.default.settings.TARGET = "http://localhost:8000"; + + # required due to unix socket permissions + users.users.nginx.extraGroups = [ config.users.groups.anubis.name ]; + services.nginx.virtualHosts."example.com" = { + locations = { + "/".proxyPass = "http://unix:${config.services.anubis.instances.default.settings.BIND}"; + }; + }; +} +``` + +If Unix domain sockets are not needed or desired, this module supports operating with only TCP sockets. + +```nix +{ + services.anubis = { + instances.default = { + settings = { + TARGET = "http://localhost:8080"; + BIND = ":9000"; + BIND_NETWORK = "tcp"; + METRICS_BIND = "127.0.0.1:9001"; + METRICS_BIND_NETWORK = "tcp"; + }; + }; + }; +} +``` + +## Configuration {#module-services-anubis-configuration} + +It is possible to configure default settings for all instances of Anubis, via {option}`services.anubis.defaultOptions`. + +```nix +{ + services.anubis.defaultOptions = { + botPolicy = { dnsbl = false; }; + settings.DIFFICULTY = 3; + }; +} +``` + +Note that at the moment, a custom bot policy is not merged with the baked-in one. That means to only override a setting +like `dnsbl`, copying the entire bot policy is required. Check +[the upstream repository](https://github.com/TecharoHQ/anubis/blob/1509b06cb921aff842e71fbb6636646be6ed5b46/cmd/anubis/botPolicies.json) +for the policy. diff --git a/nixos/modules/services/networking/anubis.nix b/nixos/modules/services/networking/anubis.nix new file mode 100644 index 000000000000..e2d9fdc0f290 --- /dev/null +++ b/nixos/modules/services/networking/anubis.nix @@ -0,0 +1,314 @@ +{ + config, + lib, + pkgs, + ... +}: +let + inherit (lib) types; + jsonFormat = pkgs.formats.json { }; + + cfg = config.services.anubis; + enabledInstances = lib.filterAttrs (_: conf: conf.enable) cfg.instances; + instanceName = name: if name == "" then "anubis" else "anubis-${name}"; + + commonSubmodule = + isDefault: + let + mkDefaultOption = + path: opts: + lib.mkOption ( + opts + // lib.optionalAttrs (!isDefault && opts ? default) { + default = + lib.attrByPath (lib.splitString "." path) + (throw "This is a bug in the Anubis module. Please report this as an issue.") + cfg.defaultOptions; + defaultText = lib.literalExpression "config.services.anubis.defaultOptions.${path}"; + } + ); + in + { name, ... }: + { + options = { + enable = lib.mkEnableOption "this instance of Anubis" // { + default = true; + }; + user = mkDefaultOption "user" { + default = "anubis"; + description = '' + The user under which Anubis is run. + + This module utilizes systemd's DynamicUser feature. See the corresponding section in + {manpage}`systemd.exec(5)` for more details. + ''; + type = types.str; + }; + group = mkDefaultOption "group" { + default = "anubis"; + description = '' + The group under which Anubis is run. + + This module utilizes systemd's DynamicUser feature. See the corresponding section in + {manpage}`systemd.exec(5)` for more details. + ''; + type = types.str; + }; + + botPolicy = lib.mkOption { + default = null; + description = '' + Anubis policy configuration in Nix syntax. Set to `null` to use the baked-in policy which should be + sufficient for most use-cases. + + This option has no effect if `settings.POLICY_FNAME` is set to a different value, which is useful for + importing an existing configuration. + + See [the documentation](https://anubis.techaro.lol/docs/admin/policies) for details. + ''; + type = types.nullOr jsonFormat.type; + }; + + extraFlags = mkDefaultOption "extraFlags" { + default = [ ]; + description = "A list of extra flags to be passed to Anubis."; + example = [ "-metrics-bind \"\"" ]; + type = types.listOf types.str; + }; + + settings = lib.mkOption { + default = { }; + description = '' + Freeform configuration via environment variables for Anubis. + + See [the documentation](https://anubis.techaro.lol/docs/admin/installation) for a complete list of + available environment variables. + ''; + type = types.submodule [ + { + freeformType = + with types; + attrsOf ( + nullOr (oneOf [ + str + int + bool + ]) + ); + + options = { + # BIND and METRICS_BIND are defined in instance specific options, since global defaults don't make sense + BIND_NETWORK = mkDefaultOption "settings.BIND_NETWORK" { + default = "unix"; + description = '' + The network family that Anubis should bind to. + + Accepts anything supported by Go's [`net.Listen`](https://pkg.go.dev/net#Listen). + + Common values are `tcp` and `unix`. + ''; + example = "tcp"; + type = types.str; + }; + METRICS_BIND_NETWORK = mkDefaultOption "settings.METRICS_BIND_NETWORK" { + default = "unix"; + description = '' + The network family that the metrics server should bind to. + + Accepts anything supported by Go's [`net.Listen`](https://pkg.go.dev/net#Listen). + + Common values are `tcp` and `unix`. + ''; + example = "tcp"; + type = types.str; + }; + SOCKET_MODE = mkDefaultOption "settings.SOCKET_MODE" { + default = "0770"; + description = "The permissions on the Unix domain sockets created."; + example = "0700"; + type = types.str; + }; + DIFFICULTY = mkDefaultOption "settings.DIFFICULTY" { + default = 4; + description = '' + The difficulty required for clients to solve the challenge. + + Currently, this means the amount of leading zeros in a successful response. + ''; + type = types.int; + example = 5; + }; + SERVE_ROBOTS_TXT = mkDefaultOption "settings.SERVE_ROBOTS_TXT" { + default = false; + description = '' + Whether to serve a default robots.txt that denies access to common AI bots by name and all other + bots by wildcard. + ''; + type = types.bool; + }; + + # generated by default + POLICY_FNAME = mkDefaultOption "settings.POLICY_FNAME" { + default = null; + description = '' + The bot policy file to use. Leave this as `null` to respect the value set in + {option}`services.anubis.instances..botPolicy`. + ''; + type = types.nullOr types.path; + }; + }; + } + (lib.optionalAttrs (!isDefault) (instanceSpecificOptions name)) + ]; + }; + }; + }; + + instanceSpecificOptions = name: { + options = { + # see other options above + BIND = lib.mkOption { + default = "/run/anubis/${instanceName name}.sock"; + description = '' + The address that Anubis listens to. See Go's [`net.Listen`](https://pkg.go.dev/net#Listen) for syntax. + + Defaults to Unix domain sockets. To use TCP sockets, set this to a TCP address and `BIND_NETWORK` to `"tcp"`. + ''; + example = ":8080"; + type = types.str; + }; + METRICS_BIND = lib.mkOption { + default = "/run/anubis/${instanceName name}-metrics.sock"; + description = '' + The address Anubis' metrics server listens to. See Go's [`net.Listen`](https://pkg.go.dev/net#Listen) for + syntax. + + The metrics server is enabled by default and may be disabled. However, due to implementation details, this is + only possible by setting a command line flag. See {option}`services.anubis.defaultOptions.extraFlags` for an + example. + + Defaults to Unix domain sockets. To use TCP sockets, set this to a TCP address and `METRICS_BIND_NETWORK` to + `"tcp"`. + ''; + example = "127.0.0.1:8081"; + type = types.str; + }; + TARGET = lib.mkOption { + description = '' + The reverse proxy target that Anubis is protecting. This is a required option. + + The usage of Unix domain sockets is supported by the following syntax: `unix:///path/to/socket.sock`. + ''; + example = "http://127.0.0.1:8000"; + type = types.str; + }; + }; + }; +in +{ + options.services.anubis = { + package = lib.mkPackageOption pkgs "anubis" { }; + + defaultOptions = lib.mkOption { + default = { }; + description = "Default options for all instances of Anubis."; + type = types.submodule (commonSubmodule true); + }; + + instances = lib.mkOption { + default = { }; + description = '' + An attribute set of Anubis instances. + + The attribute name may be an empty string, in which case the `-` suffix is not added to the service name + and socket paths. + ''; + type = types.attrsOf (types.submodule (commonSubmodule false)); + }; + }; + + config = lib.mkIf (enabledInstances != { }) { + users.users = lib.mkIf (cfg.defaultOptions.user == "anubis") { + anubis = { + isSystemUser = true; + group = cfg.defaultOptions.group; + }; + }; + + users.groups = lib.mkIf (cfg.defaultOptions.group == "anubis") { + anubis = { }; + }; + + systemd.services = lib.mapAttrs' ( + name: instance: + lib.nameValuePair "${instanceName name}" { + description = "Anubis (${if name == "" then "default" else name} instance)"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + environment = lib.mapAttrs (lib.const (lib.generators.mkValueStringDefault { })) ( + lib.filterAttrs (_: v: v != null) instance.settings + ); + + serviceConfig = { + User = instance.user; + Group = instance.group; + DynamicUser = true; + + ExecStart = lib.concatStringsSep " " ( + (lib.singleton (lib.getExe cfg.package)) ++ instance.extraFlags + ); + RuntimeDirectory = + if + lib.any (lib.hasPrefix "/run/anubis") ( + with instance.settings; + [ + BIND + METRICS_BIND + ] + ) + then + "anubis" + else + null; + + # hardening + NoNewPrivileges = true; + CapabilityBoundingSet = null; + SystemCallFilter = [ + "@system-service" + "~@privileged" + ]; + SystemCallArchitectures = "native"; + MemoryDenyWriteExecute = true; + + PrivateUsers = true; + PrivateTmp = true; + PrivateDevices = true; + ProtectHome = true; + ProtectClock = true; + ProtectHostname = true; + ProtectKernelLogs = true; + ProtectKernelModules = true; + ProtectKernelTunables = true; + ProtectProc = "invisible"; + ProtectSystem = "strict"; + ProtectControlGroups = "strict"; + LockPersonality = true; + RestrictRealtime = true; + RestrictSUIDSGID = true; + RestrictNamespaces = true; + RestrictAddressFamilies = [ + "AF_UNIX" + "AF_INET" + "AF_INET6" + ]; + }; + } + ) enabledInstances; + }; + + meta.maintainers = with lib.maintainers; [ soopyc ]; + meta.doc = ./anubis.md; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index d1d13a452708..82deac582e5b 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -174,6 +174,7 @@ in { amd-sev = runTest ./amd-sev.nix; angie-api = runTest ./angie-api.nix; anki-sync-server = runTest ./anki-sync-server.nix; + anubis = runTest ./anubis.nix; anuko-time-tracker = runTest ./anuko-time-tracker.nix; apcupsd = runTest ./apcupsd.nix; apfs = runTest ./apfs.nix; diff --git a/nixos/tests/anubis.nix b/nixos/tests/anubis.nix new file mode 100644 index 000000000000..f00a2a827326 --- /dev/null +++ b/nixos/tests/anubis.nix @@ -0,0 +1,98 @@ +{ lib, ... }: +{ + name = "anubis"; + meta.maintainers = [ lib.maintainers.soopyc ]; + + nodes.machine = + { + config, + pkgs, + ... + }: + { + services.anubis.instances = { + "".settings.TARGET = "http://localhost:8080"; + + "tcp" = { + user = "anubis-tcp"; + group = "anubis-tcp"; + settings = { + TARGET = "http://localhost:8080"; + BIND = ":9000"; + BIND_NETWORK = "tcp"; + METRICS_BIND = ":9001"; + METRICS_BIND_NETWORK = "tcp"; + }; + }; + + "unix-upstream" = { + group = "nginx"; + settings.TARGET = "unix:///run/nginx/nginx.sock"; + }; + }; + + # support + users.users.nginx.extraGroups = [ config.users.groups.anubis.name ]; + services.nginx = { + enable = true; + recommendedProxySettings = true; + virtualHosts."basic.localhost".locations = { + "/".proxyPass = "http://unix:${config.services.anubis.instances."".settings.BIND}"; + "/metrics".proxyPass = "http://unix:${config.services.anubis.instances."".settings.METRICS_BIND}"; + }; + + virtualHosts."tcp.localhost".locations = { + "/".proxyPass = "http://localhost:9000"; + "/metrics".proxyPass = "http://localhost:9001"; + }; + + virtualHosts."unix.localhost".locations = { + "/".proxyPass = "http://unix:${config.services.anubis.instances.unix-upstream.settings.BIND}"; + }; + + # emulate an upstream with nginx, listening on tcp and unix sockets. + virtualHosts."upstream.localhost" = { + default = true; # make nginx match this vhost for `localhost` + listen = [ + { addr = "unix:/run/nginx/nginx.sock"; } + { + addr = "localhost"; + port = 8080; + } + ]; + locations."/" = { + tryFiles = "$uri $uri/index.html =404"; + root = pkgs.runCommand "anubis-test-upstream" { } '' + mkdir $out + echo "it works" >> $out/index.html + ''; + }; + }; + }; + }; + + testScript = '' + for unit in ["nginx", "anubis", "anubis-tcp", "anubis-unix-upstream"]: + machine.wait_for_unit(unit + ".service") + + for port in [9000, 9001]: + machine.wait_for_open_port(port) + + for instance in ["anubis", "anubis-unix-upstream"]: + machine.wait_for_open_unix_socket(f"/run/anubis/{instance}.sock") + machine.wait_for_open_unix_socket(f"/run/anubis/{instance}-metrics.sock") + + # Default unix socket mode + machine.succeed('curl -f http://basic.localhost | grep "it works"') + machine.succeed('curl -f http://basic.localhost -H "User-Agent: Mozilla" | grep anubis') + machine.succeed('curl -f http://basic.localhost/metrics | grep anubis_challenges_issued') + machine.succeed('curl -f -X POST http://basic.localhost/.within.website/x/cmd/anubis/api/make-challenge | grep challenge') + + # TCP mode + machine.succeed('curl -f http://tcp.localhost -H "User-Agent: Mozilla" | grep anubis') + machine.succeed('curl -f http://tcp.localhost/metrics | grep anubis_challenges_issued') + + # Upstream is a unix socket mode + machine.succeed('curl -f http://unix.localhost/index.html | grep "it works"') + ''; +}