mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-07-13 21:50:33 +03:00
Merge pull request #104094 from flokli/systemd-unified-cgroup-hierarchy
systemd: switch to unified cgroup hierarchy by default
This commit is contained in:
commit
c76891314d
8 changed files with 78 additions and 23 deletions
|
@ -277,6 +277,19 @@
|
||||||
<literal>unbound-control</literal> without passing a custom configuration location.
|
<literal>unbound-control</literal> without passing a custom configuration location.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
NixOS now defaults to the unified cgroup hierarchy (cgroupsv2).
|
||||||
|
See the <link xlink:href="https://www.redhat.com/sysadmin/fedora-31-control-group-v2">Fedora Article for 31</link>
|
||||||
|
for details on why this is desirable, and how it impacts containers.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If you want to run containers with a runtime that does not yet support cgroupsv2,
|
||||||
|
you can switch back to the old behaviour by setting
|
||||||
|
<xref linkend="opt-systemd.enableUnifiedCgroupHierarchy"/> = <literal>false</literal>;
|
||||||
|
and rebooting.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</section>
|
</section>
|
||||||
</section>
|
</section>
|
||||||
|
|
|
@ -23,5 +23,9 @@ with lib;
|
||||||
|
|
||||||
boot.specialFileSystems."/proc".options = [ "hidepid=2" "gid=${toString config.ids.gids.proc}" ];
|
boot.specialFileSystems."/proc".options = [ "hidepid=2" "gid=${toString config.ids.gids.proc}" ];
|
||||||
systemd.services.systemd-logind.serviceConfig.SupplementaryGroups = [ "proc" ];
|
systemd.services.systemd-logind.serviceConfig.SupplementaryGroups = [ "proc" ];
|
||||||
|
|
||||||
|
# Disable cgroupsv2, which doesn't work with hidepid.
|
||||||
|
# https://github.com/NixOS/nixpkgs/pull/104094#issuecomment-729996203
|
||||||
|
systemd.enableUnifiedCgroupHierarchy = false;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,10 @@ in
|
||||||
enable = mkDefault true;
|
enable = mkDefault true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# TODO: disable this once k3s supports cgroupsv2, either by docker
|
||||||
|
# supporting it, or their bundled containerd
|
||||||
|
systemd.enableUnifiedCgroupHierarchy = false;
|
||||||
|
|
||||||
systemd.services.k3s = {
|
systemd.services.k3s = {
|
||||||
description = "k3s service";
|
description = "k3s service";
|
||||||
after = mkIf cfg.docker [ "docker.service" ];
|
after = mkIf cfg.docker [ "docker.service" ];
|
||||||
|
|
|
@ -550,6 +550,14 @@ in
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
systemd.enableUnifiedCgroupHierarchy = mkOption {
|
||||||
|
default = true;
|
||||||
|
type = types.bool;
|
||||||
|
description = ''
|
||||||
|
Whether to enable the unified cgroup hierarchy (cgroupsv2).
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
systemd.coredump.enable = mkOption {
|
systemd.coredump.enable = mkOption {
|
||||||
default = true;
|
default = true;
|
||||||
type = types.bool;
|
type = types.bool;
|
||||||
|
@ -1178,6 +1186,7 @@ in
|
||||||
boot.kernel.sysctl = mkIf (!cfg.coredump.enable) {
|
boot.kernel.sysctl = mkIf (!cfg.coredump.enable) {
|
||||||
"kernel.core_pattern" = "core";
|
"kernel.core_pattern" = "core";
|
||||||
};
|
};
|
||||||
|
boot.kernelParams = optional (!cfg.enableUnifiedCgroupHierarchy) "systemd.unified_cgroup_hierarchy=0";
|
||||||
};
|
};
|
||||||
|
|
||||||
# FIXME: Remove these eventually.
|
# FIXME: Remove these eventually.
|
||||||
|
|
|
@ -155,6 +155,9 @@ in
|
||||||
users.groups.docker.gid = config.ids.gids.docker;
|
users.groups.docker.gid = config.ids.gids.docker;
|
||||||
systemd.packages = [ cfg.package ];
|
systemd.packages = [ cfg.package ];
|
||||||
|
|
||||||
|
# TODO: remove once docker 20.10 is released
|
||||||
|
systemd.enableUnifiedCgroupHierarchy = false;
|
||||||
|
|
||||||
systemd.services.docker = {
|
systemd.services.docker = {
|
||||||
wantedBy = optional cfg.enableOnBoot "multi-user.target";
|
wantedBy = optional cfg.enableOnBoot "multi-user.target";
|
||||||
environment = proxy_env;
|
environment = proxy_env;
|
||||||
|
|
|
@ -34,7 +34,6 @@ import ./make-test-python.nix (
|
||||||
podman.wait_for_unit("sockets.target")
|
podman.wait_for_unit("sockets.target")
|
||||||
start_all()
|
start_all()
|
||||||
|
|
||||||
|
|
||||||
with subtest("Run container as root with runc"):
|
with subtest("Run container as root with runc"):
|
||||||
podman.succeed("tar cv --files-from /dev/null | podman import - scratchimg")
|
podman.succeed("tar cv --files-from /dev/null | podman import - scratchimg")
|
||||||
podman.succeed(
|
podman.succeed(
|
||||||
|
@ -53,16 +52,14 @@ import ./make-test-python.nix (
|
||||||
podman.succeed("podman stop sleeping")
|
podman.succeed("podman stop sleeping")
|
||||||
podman.succeed("podman rm sleeping")
|
podman.succeed("podman rm sleeping")
|
||||||
|
|
||||||
with subtest("Run container rootless with runc"):
|
with subtest("Run container as root with the default backend"):
|
||||||
podman.succeed(su_cmd("tar cv --files-from /dev/null | podman import - scratchimg"))
|
podman.succeed("tar cv --files-from /dev/null | podman import - scratchimg")
|
||||||
podman.succeed(
|
podman.succeed(
|
||||||
su_cmd(
|
"podman run -d --name=sleeping -v /nix/store:/nix/store -v /run/current-system/sw/bin:/bin scratchimg /bin/sleep 10"
|
||||||
"podman run --runtime=runc -d --name=sleeping -v /nix/store:/nix/store -v /run/current-system/sw/bin:/bin scratchimg /bin/sleep 10"
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
podman.succeed(su_cmd("podman ps | grep sleeping"))
|
podman.succeed("podman ps | grep sleeping")
|
||||||
podman.succeed(su_cmd("podman stop sleeping"))
|
podman.succeed("podman stop sleeping")
|
||||||
podman.succeed(su_cmd("podman rm sleeping"))
|
podman.succeed("podman rm sleeping")
|
||||||
|
|
||||||
with subtest("Run container rootless with crun"):
|
with subtest("Run container rootless with crun"):
|
||||||
podman.succeed(su_cmd("tar cv --files-from /dev/null | podman import - scratchimg"))
|
podman.succeed(su_cmd("tar cv --files-from /dev/null | podman import - scratchimg"))
|
||||||
|
@ -74,6 +71,18 @@ import ./make-test-python.nix (
|
||||||
podman.succeed(su_cmd("podman ps | grep sleeping"))
|
podman.succeed(su_cmd("podman ps | grep sleeping"))
|
||||||
podman.succeed(su_cmd("podman stop sleeping"))
|
podman.succeed(su_cmd("podman stop sleeping"))
|
||||||
podman.succeed(su_cmd("podman rm sleeping"))
|
podman.succeed(su_cmd("podman rm sleeping"))
|
||||||
|
# As of 2020-11-20, the runc backend doesn't work with cgroupsv2 yet, so we don't run that test.
|
||||||
|
|
||||||
|
with subtest("Run container rootless with the default backend"):
|
||||||
|
podman.succeed(su_cmd("tar cv --files-from /dev/null | podman import - scratchimg"))
|
||||||
|
podman.succeed(
|
||||||
|
su_cmd(
|
||||||
|
"podman run -d --name=sleeping -v /nix/store:/nix/store -v /run/current-system/sw/bin:/bin scratchimg /bin/sleep 10"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
podman.succeed(su_cmd("podman ps | grep sleeping"))
|
||||||
|
podman.succeed(su_cmd("podman stop sleeping"))
|
||||||
|
podman.succeed(su_cmd("podman rm sleeping"))
|
||||||
'';
|
'';
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -82,6 +82,10 @@ import ./make-test-python.nix ({ pkgs, ... }: {
|
||||||
"systemd-run --pty --property=Type=oneshot --property=DynamicUser=yes --property=User=iamatest whoami"
|
"systemd-run --pty --property=Type=oneshot --property=DynamicUser=yes --property=User=iamatest whoami"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with subtest("regression test for https://bugs.freedesktop.org/show_bug.cgi?id=77507"):
|
||||||
|
retcode, output = machine.execute("systemctl status testservice1.service")
|
||||||
|
assert retcode in [0, 3] # https://bugs.freedesktop.org/show_bug.cgi?id=77507
|
||||||
|
|
||||||
# Regression test for https://github.com/NixOS/nixpkgs/issues/35268
|
# Regression test for https://github.com/NixOS/nixpkgs/issues/35268
|
||||||
with subtest("file system with x-initrd.mount is not unmounted"):
|
with subtest("file system with x-initrd.mount is not unmounted"):
|
||||||
machine.succeed("mountpoint -q /test-x-initrd-mount")
|
machine.succeed("mountpoint -q /test-x-initrd-mount")
|
||||||
|
@ -122,17 +126,6 @@ import ./make-test-python.nix ({ pkgs, ... }: {
|
||||||
machine.wait_for_unit("multi-user.target")
|
machine.wait_for_unit("multi-user.target")
|
||||||
assert "fq_codel" in machine.succeed("sysctl net.core.default_qdisc")
|
assert "fq_codel" in machine.succeed("sysctl net.core.default_qdisc")
|
||||||
|
|
||||||
# Test cgroup accounting is enabled
|
|
||||||
with subtest("systemd cgroup accounting is enabled"):
|
|
||||||
machine.wait_for_unit("multi-user.target")
|
|
||||||
assert "yes" in machine.succeed(
|
|
||||||
"systemctl show testservice1.service -p IOAccounting"
|
|
||||||
)
|
|
||||||
|
|
||||||
retcode, output = machine.execute("systemctl status testservice1.service")
|
|
||||||
assert retcode in [0, 3] # https://bugs.freedesktop.org/show_bug.cgi?id=77507
|
|
||||||
assert "CPU:" in output
|
|
||||||
|
|
||||||
# Test systemd is configured to manage a watchdog
|
# Test systemd is configured to manage a watchdog
|
||||||
with subtest("systemd manages hardware watchdog"):
|
with subtest("systemd manages hardware watchdog"):
|
||||||
machine.wait_for_unit("multi-user.target")
|
machine.wait_for_unit("multi-user.target")
|
||||||
|
@ -168,5 +161,25 @@ import ./make-test-python.nix ({ pkgs, ... }: {
|
||||||
machine.succeed("systemctl status systemd-cryptsetup@luks1.service")
|
machine.succeed("systemctl status systemd-cryptsetup@luks1.service")
|
||||||
machine.succeed("mkdir -p /tmp/luks1")
|
machine.succeed("mkdir -p /tmp/luks1")
|
||||||
machine.succeed("mount /dev/mapper/luks1 /tmp/luks1")
|
machine.succeed("mount /dev/mapper/luks1 /tmp/luks1")
|
||||||
|
|
||||||
|
# Do some IP traffic
|
||||||
|
output_ping = machine.succeed(
|
||||||
|
"systemd-run --wait -- /run/wrappers/bin/ping -c 1 127.0.0.1 2>&1"
|
||||||
|
)
|
||||||
|
|
||||||
|
with subtest("systemd reports accounting data on system.slice"):
|
||||||
|
output = machine.succeed("systemctl status system.slice")
|
||||||
|
assert "CPU:" in output
|
||||||
|
assert "Memory:" in output
|
||||||
|
|
||||||
|
assert "IP:" in output
|
||||||
|
assert "0B in, 0B out" not in output
|
||||||
|
|
||||||
|
assert "IO:" in output
|
||||||
|
assert "0B read, 0B written" not in output
|
||||||
|
|
||||||
|
with subtest("systemd per-unit accounting works"):
|
||||||
|
assert "IP traffic received: 84B" in output_ping
|
||||||
|
assert "IP traffic sent: 84B" in output_ping
|
||||||
'';
|
'';
|
||||||
})
|
})
|
||||||
|
|
|
@ -281,9 +281,9 @@ stdenv.mkDerivation {
|
||||||
"-Dmount-path=${utillinux}/bin/mount"
|
"-Dmount-path=${utillinux}/bin/mount"
|
||||||
"-Dumount-path=${utillinux}/bin/umount"
|
"-Dumount-path=${utillinux}/bin/umount"
|
||||||
"-Dcreate-log-dirs=false"
|
"-Dcreate-log-dirs=false"
|
||||||
# Upstream uses cgroupsv2 by default. To support docker and other
|
|
||||||
# container managers we still need v1.
|
# Use cgroupsv2. This is already the upstream default, but better be explicit.
|
||||||
"-Ddefault-hierarchy=hybrid"
|
"-Ddefault-hierarchy=unified"
|
||||||
# Upstream defaulted to disable manpages since they optimize for the much
|
# Upstream defaulted to disable manpages since they optimize for the much
|
||||||
# more frequent development builds
|
# more frequent development builds
|
||||||
"-Dman=true"
|
"-Dman=true"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue