nixos/anubis: init module

This commit is contained in:
Cassie Cheung 2025-03-22 14:03:09 +08:00
parent 3bfdc93bc3
commit cadf4cfc83
No known key found for this signature in database
7 changed files with 486 additions and 0 deletions

View file

@ -2,6 +2,15 @@
"book-nixos-manual": [
"index.html#book-nixos-manual"
],
"module-services-anubis": [
"index.html#module-services-anubis"
],
"module-services-anubis-configuration": [
"index.html#module-services-anubis-configuration"
],
"module-services-anubis-quickstart": [
"index.html#module-services-anubis-quickstart"
],
"module-services-crab-hole": [
"index.html#module-services-crab-hole"
],

View file

@ -168,6 +168,8 @@
- [PDS](https://github.com/bluesky-social/pds), Personal Data Server for [bsky](https://bsky.social/). Available as [services.pds](option.html#opt-services.pds).
- [Anubis](https://github.com/TecharoHQ/anubis), a scraper defense software. Available as [services.anubis](options.html#opt-services.anubis).
- [synapse-auto-compressor](https://github.com/matrix-org/rust-synapse-compress-state?tab=readme-ov-file#automated-tool-synapse_auto_compressor), a rust-based matrix-synapse state compressor for postgresql. Available as [services.synapse-auto-compressor](#opt-services.synapse-auto-compressor.enable).
- [mqtt-exporter](https://github.com/kpetremann/mqtt-exporter/), a Prometheus exporter for exposing messages from MQTT. Available as [services.prometheus.exporters.mqtt](#opt-services.prometheus.exporters.mqtt.enable).

View file

@ -1038,6 +1038,7 @@
./services/networking/adguardhome.nix
./services/networking/alice-lg.nix
./services/networking/amuled.nix
./services/networking/anubis.nix
./services/networking/aria2.nix
./services/networking/asterisk.nix
./services/networking/atftpd.nix

View file

@ -0,0 +1,61 @@
# Anubis {#module-services-anubis}
[Anubis](https://anubis.techaro.lol) is a scraper defense software that blocks AI scrapers. It is designed to sit
between a reverse proxy and the service to be protected.
## Quickstart {#module-services-anubis-quickstart}
This module is designed to use Unix domain sockets as the socket paths can be automatically configured for multiple
instances, but TCP sockets are also supported.
A minimal configuration with [nginx](#opt-services.nginx.enable) may look like the following:
```nix
{ config, ... }: {
services.anubis.instances.default.settings.TARGET = "http://localhost:8000";
# required due to unix socket permissions
users.users.nginx.extraGroups = [ config.users.groups.anubis.name ];
services.nginx.virtualHosts."example.com" = {
locations = {
"/".proxyPass = "http://unix:${config.services.anubis.instances.default.settings.BIND}";
};
};
}
```
If Unix domain sockets are not needed or desired, this module supports operating with only TCP sockets.
```nix
{
services.anubis = {
instances.default = {
settings = {
TARGET = "http://localhost:8080";
BIND = ":9000";
BIND_NETWORK = "tcp";
METRICS_BIND = "127.0.0.1:9001";
METRICS_BIND_NETWORK = "tcp";
};
};
};
}
```
## Configuration {#module-services-anubis-configuration}
It is possible to configure default settings for all instances of Anubis, via {option}`services.anubis.defaultOptions`.
```nix
{
services.anubis.defaultOptions = {
botPolicy = { dnsbl = false; };
settings.DIFFICULTY = 3;
};
}
```
Note that at the moment, a custom bot policy is not merged with the baked-in one. That means to only override a setting
like `dnsbl`, copying the entire bot policy is required. Check
[the upstream repository](https://github.com/TecharoHQ/anubis/blob/1509b06cb921aff842e71fbb6636646be6ed5b46/cmd/anubis/botPolicies.json)
for the policy.

View file

@ -0,0 +1,314 @@
{
config,
lib,
pkgs,
...
}:
let
inherit (lib) types;
jsonFormat = pkgs.formats.json { };
cfg = config.services.anubis;
enabledInstances = lib.filterAttrs (_: conf: conf.enable) cfg.instances;
instanceName = name: if name == "" then "anubis" else "anubis-${name}";
commonSubmodule =
isDefault:
let
mkDefaultOption =
path: opts:
lib.mkOption (
opts
// lib.optionalAttrs (!isDefault && opts ? default) {
default =
lib.attrByPath (lib.splitString "." path)
(throw "This is a bug in the Anubis module. Please report this as an issue.")
cfg.defaultOptions;
defaultText = lib.literalExpression "config.services.anubis.defaultOptions.${path}";
}
);
in
{ name, ... }:
{
options = {
enable = lib.mkEnableOption "this instance of Anubis" // {
default = true;
};
user = mkDefaultOption "user" {
default = "anubis";
description = ''
The user under which Anubis is run.
This module utilizes systemd's DynamicUser feature. See the corresponding section in
{manpage}`systemd.exec(5)` for more details.
'';
type = types.str;
};
group = mkDefaultOption "group" {
default = "anubis";
description = ''
The group under which Anubis is run.
This module utilizes systemd's DynamicUser feature. See the corresponding section in
{manpage}`systemd.exec(5)` for more details.
'';
type = types.str;
};
botPolicy = lib.mkOption {
default = null;
description = ''
Anubis policy configuration in Nix syntax. Set to `null` to use the baked-in policy which should be
sufficient for most use-cases.
This option has no effect if `settings.POLICY_FNAME` is set to a different value, which is useful for
importing an existing configuration.
See [the documentation](https://anubis.techaro.lol/docs/admin/policies) for details.
'';
type = types.nullOr jsonFormat.type;
};
extraFlags = mkDefaultOption "extraFlags" {
default = [ ];
description = "A list of extra flags to be passed to Anubis.";
example = [ "-metrics-bind \"\"" ];
type = types.listOf types.str;
};
settings = lib.mkOption {
default = { };
description = ''
Freeform configuration via environment variables for Anubis.
See [the documentation](https://anubis.techaro.lol/docs/admin/installation) for a complete list of
available environment variables.
'';
type = types.submodule [
{
freeformType =
with types;
attrsOf (
nullOr (oneOf [
str
int
bool
])
);
options = {
# BIND and METRICS_BIND are defined in instance specific options, since global defaults don't make sense
BIND_NETWORK = mkDefaultOption "settings.BIND_NETWORK" {
default = "unix";
description = ''
The network family that Anubis should bind to.
Accepts anything supported by Go's [`net.Listen`](https://pkg.go.dev/net#Listen).
Common values are `tcp` and `unix`.
'';
example = "tcp";
type = types.str;
};
METRICS_BIND_NETWORK = mkDefaultOption "settings.METRICS_BIND_NETWORK" {
default = "unix";
description = ''
The network family that the metrics server should bind to.
Accepts anything supported by Go's [`net.Listen`](https://pkg.go.dev/net#Listen).
Common values are `tcp` and `unix`.
'';
example = "tcp";
type = types.str;
};
SOCKET_MODE = mkDefaultOption "settings.SOCKET_MODE" {
default = "0770";
description = "The permissions on the Unix domain sockets created.";
example = "0700";
type = types.str;
};
DIFFICULTY = mkDefaultOption "settings.DIFFICULTY" {
default = 4;
description = ''
The difficulty required for clients to solve the challenge.
Currently, this means the amount of leading zeros in a successful response.
'';
type = types.int;
example = 5;
};
SERVE_ROBOTS_TXT = mkDefaultOption "settings.SERVE_ROBOTS_TXT" {
default = false;
description = ''
Whether to serve a default robots.txt that denies access to common AI bots by name and all other
bots by wildcard.
'';
type = types.bool;
};
# generated by default
POLICY_FNAME = mkDefaultOption "settings.POLICY_FNAME" {
default = null;
description = ''
The bot policy file to use. Leave this as `null` to respect the value set in
{option}`services.anubis.instances.<name>.botPolicy`.
'';
type = types.nullOr types.path;
};
};
}
(lib.optionalAttrs (!isDefault) (instanceSpecificOptions name))
];
};
};
};
instanceSpecificOptions = name: {
options = {
# see other options above
BIND = lib.mkOption {
default = "/run/anubis/${instanceName name}.sock";
description = ''
The address that Anubis listens to. See Go's [`net.Listen`](https://pkg.go.dev/net#Listen) for syntax.
Defaults to Unix domain sockets. To use TCP sockets, set this to a TCP address and `BIND_NETWORK` to `"tcp"`.
'';
example = ":8080";
type = types.str;
};
METRICS_BIND = lib.mkOption {
default = "/run/anubis/${instanceName name}-metrics.sock";
description = ''
The address Anubis' metrics server listens to. See Go's [`net.Listen`](https://pkg.go.dev/net#Listen) for
syntax.
The metrics server is enabled by default and may be disabled. However, due to implementation details, this is
only possible by setting a command line flag. See {option}`services.anubis.defaultOptions.extraFlags` for an
example.
Defaults to Unix domain sockets. To use TCP sockets, set this to a TCP address and `METRICS_BIND_NETWORK` to
`"tcp"`.
'';
example = "127.0.0.1:8081";
type = types.str;
};
TARGET = lib.mkOption {
description = ''
The reverse proxy target that Anubis is protecting. This is a required option.
The usage of Unix domain sockets is supported by the following syntax: `unix:///path/to/socket.sock`.
'';
example = "http://127.0.0.1:8000";
type = types.str;
};
};
};
in
{
options.services.anubis = {
package = lib.mkPackageOption pkgs "anubis" { };
defaultOptions = lib.mkOption {
default = { };
description = "Default options for all instances of Anubis.";
type = types.submodule (commonSubmodule true);
};
instances = lib.mkOption {
default = { };
description = ''
An attribute set of Anubis instances.
The attribute name may be an empty string, in which case the `-<name>` suffix is not added to the service name
and socket paths.
'';
type = types.attrsOf (types.submodule (commonSubmodule false));
};
};
config = lib.mkIf (enabledInstances != { }) {
users.users = lib.mkIf (cfg.defaultOptions.user == "anubis") {
anubis = {
isSystemUser = true;
group = cfg.defaultOptions.group;
};
};
users.groups = lib.mkIf (cfg.defaultOptions.group == "anubis") {
anubis = { };
};
systemd.services = lib.mapAttrs' (
name: instance:
lib.nameValuePair "${instanceName name}" {
description = "Anubis (${if name == "" then "default" else name} instance)";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
environment = lib.mapAttrs (lib.const (lib.generators.mkValueStringDefault { })) (
lib.filterAttrs (_: v: v != null) instance.settings
);
serviceConfig = {
User = instance.user;
Group = instance.group;
DynamicUser = true;
ExecStart = lib.concatStringsSep " " (
(lib.singleton (lib.getExe cfg.package)) ++ instance.extraFlags
);
RuntimeDirectory =
if
lib.any (lib.hasPrefix "/run/anubis") (
with instance.settings;
[
BIND
METRICS_BIND
]
)
then
"anubis"
else
null;
# hardening
NoNewPrivileges = true;
CapabilityBoundingSet = null;
SystemCallFilter = [
"@system-service"
"~@privileged"
];
SystemCallArchitectures = "native";
MemoryDenyWriteExecute = true;
PrivateUsers = true;
PrivateTmp = true;
PrivateDevices = true;
ProtectHome = true;
ProtectClock = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "strict";
ProtectControlGroups = "strict";
LockPersonality = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
RestrictNamespaces = true;
RestrictAddressFamilies = [
"AF_UNIX"
"AF_INET"
"AF_INET6"
];
};
}
) enabledInstances;
};
meta.maintainers = with lib.maintainers; [ soopyc ];
meta.doc = ./anubis.md;
}

View file

@ -174,6 +174,7 @@ in {
amd-sev = runTest ./amd-sev.nix;
angie-api = runTest ./angie-api.nix;
anki-sync-server = runTest ./anki-sync-server.nix;
anubis = runTest ./anubis.nix;
anuko-time-tracker = runTest ./anuko-time-tracker.nix;
apcupsd = runTest ./apcupsd.nix;
apfs = runTest ./apfs.nix;

98
nixos/tests/anubis.nix Normal file
View file

@ -0,0 +1,98 @@
{ lib, ... }:
{
name = "anubis";
meta.maintainers = [ lib.maintainers.soopyc ];
nodes.machine =
{
config,
pkgs,
...
}:
{
services.anubis.instances = {
"".settings.TARGET = "http://localhost:8080";
"tcp" = {
user = "anubis-tcp";
group = "anubis-tcp";
settings = {
TARGET = "http://localhost:8080";
BIND = ":9000";
BIND_NETWORK = "tcp";
METRICS_BIND = ":9001";
METRICS_BIND_NETWORK = "tcp";
};
};
"unix-upstream" = {
group = "nginx";
settings.TARGET = "unix:///run/nginx/nginx.sock";
};
};
# support
users.users.nginx.extraGroups = [ config.users.groups.anubis.name ];
services.nginx = {
enable = true;
recommendedProxySettings = true;
virtualHosts."basic.localhost".locations = {
"/".proxyPass = "http://unix:${config.services.anubis.instances."".settings.BIND}";
"/metrics".proxyPass = "http://unix:${config.services.anubis.instances."".settings.METRICS_BIND}";
};
virtualHosts."tcp.localhost".locations = {
"/".proxyPass = "http://localhost:9000";
"/metrics".proxyPass = "http://localhost:9001";
};
virtualHosts."unix.localhost".locations = {
"/".proxyPass = "http://unix:${config.services.anubis.instances.unix-upstream.settings.BIND}";
};
# emulate an upstream with nginx, listening on tcp and unix sockets.
virtualHosts."upstream.localhost" = {
default = true; # make nginx match this vhost for `localhost`
listen = [
{ addr = "unix:/run/nginx/nginx.sock"; }
{
addr = "localhost";
port = 8080;
}
];
locations."/" = {
tryFiles = "$uri $uri/index.html =404";
root = pkgs.runCommand "anubis-test-upstream" { } ''
mkdir $out
echo "it works" >> $out/index.html
'';
};
};
};
};
testScript = ''
for unit in ["nginx", "anubis", "anubis-tcp", "anubis-unix-upstream"]:
machine.wait_for_unit(unit + ".service")
for port in [9000, 9001]:
machine.wait_for_open_port(port)
for instance in ["anubis", "anubis-unix-upstream"]:
machine.wait_for_open_unix_socket(f"/run/anubis/{instance}.sock")
machine.wait_for_open_unix_socket(f"/run/anubis/{instance}-metrics.sock")
# Default unix socket mode
machine.succeed('curl -f http://basic.localhost | grep "it works"')
machine.succeed('curl -f http://basic.localhost -H "User-Agent: Mozilla" | grep anubis')
machine.succeed('curl -f http://basic.localhost/metrics | grep anubis_challenges_issued')
machine.succeed('curl -f -X POST http://basic.localhost/.within.website/x/cmd/anubis/api/make-challenge | grep challenge')
# TCP mode
machine.succeed('curl -f http://tcp.localhost -H "User-Agent: Mozilla" | grep anubis')
machine.succeed('curl -f http://tcp.localhost/metrics | grep anubis_challenges_issued')
# Upstream is a unix socket mode
machine.succeed('curl -f http://unix.localhost/index.html | grep "it works"')
'';
}