diff --git a/changelog/changes/2024-04-22-systemd-sysext.md b/changelog/changes/2024-04-22-systemd-sysext.md new file mode 100644 index 00000000000..8f67f640481 --- /dev/null +++ b/changelog/changes/2024-04-22-systemd-sysext.md @@ -0,0 +1 @@ +- Backported systemd-sysext mutable overlays functionality from yet-unreleased systemd v256. ([flatcar/scripts#1753](https://github.com/flatcar/scripts/pull/1753)) diff --git a/changelog/updates/2024-04-22-systemd-sysext.md b/changelog/updates/2024-04-22-systemd-sysext.md new file mode 100644 index 00000000000..910e1186788 --- /dev/null +++ b/changelog/updates/2024-04-22-systemd-sysext.md @@ -0,0 +1 @@ +- systemd ([255.4](https://github.com/systemd/systemd-stable/commits/v255.4/)) diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/Manifest b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/Manifest index 867980ea7b5..ad2990d127f 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/Manifest +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/Manifest @@ -1,6 +1 @@ -DIST systemd-stable-254.5.tar.gz 14334696 BLAKE2B 2f63d79ae93add69ac0b56dda9f67019340f84692de4da200557b9f5f1f16bebbad42a9a7e2d6ef7420aa37746d2ede0481fd8e39f03a31576c7e4e48e259ce3 SHA512 cac713670216add9e5473e2c86f04da441015e7cc0ac1500b9e1489a435f9b80c4c6ee24e9b22e4c4213a495bc1a0a908925df2045e344a2170d5aea6aafa16c -DIST systemd-stable-254.7.tar.gz 14411955 BLAKE2B 1213237a001fb0aef8912637f31d7d77888bc2505e1e8d8d295642a547bdebbc3a786eed095694e6a6fe2665d6e8e45e98cd883186eedeb1b4fd73daf2520dcf SHA512 2e859813f1f52fa693631ce43466875ac2ac42e09872011ee52fe4e44727663c3de9f128a47776899423188c1e99ce73a69059426a9356c930e275037d001685 -DIST systemd-stable-254.8.tar.gz 14418468 BLAKE2B e5a151ece86e57c7224fc95bda1b4ede1277fce4a2ba28d3605ab0431a2aafe1088f90c49a20e3b53a5b56aeef7c0f1f5da0601db740150f5efdf6eae7bbde80 SHA512 a3f35d9fcafcccd8d9c33ab1047241f226146017be95562a67c7dcc9eeb4b77bded92ad80e92f4767f2bf2009df0172a621d4c54a805e07ed5a5ed03940ec28e -DIST systemd-stable-254.9.tar.gz 14423806 BLAKE2B ab39c0a00b8451b24b40e39f4bf7ecb912ff23d9cd6f8d30fd0545e895936baa635b1ff63c02a83761682b72f44244aac8338bf6506885c9b07cd0c5247b6693 SHA512 a0300693a044cfe4c76deb0e3e48a927125eb97c3952c07ba68936f1e093c93506d8044b249b534b8e778ade6143b43194f8d6b721a8cd520bc7bb4cb3d3e5c1 -DIST systemd-stable-255.2.tar.gz 14864388 BLAKE2B 101da82a5d63eaa48c2dc4bad5ab713b4e8b544134de8216f315a97736eb699eaf756aef2d9a4e2126f0d248b3a7e28bc986ccc2154d5d110db733d114072eec SHA512 0a9a43adc6d23f52349d298cdff3f3ae6accd7e43a33253608f7a9d241699c7cba3c9f6a0fa6da3ae3cba0e246e272076bfa2cdf5bade7bc019406f407be0bb9 -DIST systemd-stable-255.3.tar.gz 14873273 BLAKE2B e22ef391c691fcf1e765c5112e1a55096d3bba61a9dae3ea1a3958add4e355892a97d5214e63c516ba3b70e2a83bb5d21254812d870f06c16c74a58d4f957d75 SHA512 c2868a53df2176649b0d0c94e5d451c46ba783bcdbc89ce12434ed2d11dba44b4854ffe4c2430f3f64eef2e214cbb51d5f740170afbd9edd66761a8851157453 +DIST systemd-stable-255.4.tar.gz 14952427 BLAKE2B 27f5080f83a9e870fbe8e3ebcb500a63c42022f1f96f26f35c76eeeea85dab691291c31ee716cab330b76df5e576910a6a82f51267eff4f766b1d4c304d815c9 SHA512 8a2bde11a55f7f788ba7751789a5e9be6ce9634e88d54e49f6e832c4c49020c6cacaf2a610fe26f92998b0cbf43c6c2150a96b2c0953d23261009f57d71ea979 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch index d72a506d4f8..7171955041f 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0001-wait-online-set-any-by-default.patch @@ -1,7 +1,7 @@ -From 02ebe43df912c7090a155484fbd1b422c4f438f4 Mon Sep 17 00:00:00 2001 +From 98cbd0a4576464478f0f9fcd2066efc08bef9491 Mon Sep 17 00:00:00 2001 From: David Michael Date: Tue, 16 Apr 2019 02:44:51 +0000 -Subject: [PATCH 1/7] wait-online: set --any by default +Subject: [PATCH 1/8] wait-online: set --any by default The systemd-networkd-wait-online command would normally continue waiting after a network interface is usable if other interfaces are @@ -15,10 +15,10 @@ earlier) for the original implementation. 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network/wait-online/wait-online.c b/src/network/wait-online/wait-online.c -index a679b858fa..3b6dad8d1d 100644 +index 5328bba2d8..95294df607 100644 --- a/src/network/wait-online/wait-online.c +++ b/src/network/wait-online/wait-online.c -@@ -20,7 +20,7 @@ static Hashmap *arg_interfaces = NULL; +@@ -21,7 +21,7 @@ static Hashmap *arg_interfaces = NULL; static char **arg_ignore = NULL; static LinkOperationalStateRange arg_required_operstate = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID }; static AddressFamily arg_required_family = ADDRESS_FAMILY_NO; @@ -28,5 +28,5 @@ index a679b858fa..3b6dad8d1d 100644 STATIC_DESTRUCTOR_REGISTER(arg_interfaces, hashmap_free_free_freep); STATIC_DESTRUCTOR_REGISTER(arg_ignore, strv_freep); -- -2.25.1 +2.34.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch index a6fde47f6f8..de0955b8018 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0002-networkd-default-to-kernel-IPForwarding-setting.patch @@ -1,24 +1,24 @@ -From e124d3716ada4fc7c34278435a61d51b07b61024 Mon Sep 17 00:00:00 2001 +From e3fd50ec704b5d48e9d756c1cc5c40e72b7d1fa4 Mon Sep 17 00:00:00 2001 From: Nick Owens Date: Tue, 2 Jun 2015 18:22:32 -0700 -Subject: [PATCH 2/7] networkd: default to "kernel" IPForwarding setting +Subject: [PATCH 2/8] networkd: default to "kernel" IPForwarding setting --- src/network/networkd-network.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c -index a6c5b44238..54f9d12fec 100644 +index dcd3e5ae12..2ae481d1ec 100644 --- a/src/network/networkd-network.c +++ b/src/network/networkd-network.c -@@ -465,6 +465,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi +@@ -461,6 +461,7 @@ int network_load_one(Manager *manager, OrderedHashmap **networks, const char *fi .link_local = _ADDRESS_FAMILY_INVALID, .ipv6ll_address_gen_mode = _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_INVALID, + .ip_forward = _ADDRESS_FAMILY_INVALID, .ipv4_accept_local = -1, .ipv4_route_localnet = -1, - .ipv6_privacy_extensions = IPV6_PRIVACY_EXTENSIONS_NO, + .ipv6_privacy_extensions = _IPV6_PRIVACY_EXTENSIONS_INVALID, -- -2.25.1 +2.34.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch index 96f030a3cf1..400cb96e05d 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0003-needs-update-don-t-require-strictly-newer-usr.patch @@ -1,7 +1,7 @@ -From a8366f0ddffabef08c010064ea62e64d7276a0f3 Mon Sep 17 00:00:00 2001 +From 0be1b5367c24427e3285d33fb87aa4acdf3c4dce Mon Sep 17 00:00:00 2001 From: Alex Crawford Date: Wed, 2 Mar 2016 10:46:33 -0800 -Subject: [PATCH 3/7] needs-update: don't require strictly newer usr +Subject: [PATCH 3/8] needs-update: don't require strictly newer usr Updates should be triggered whenever usr changes, not only when it is newer. --- @@ -23,10 +23,10 @@ index 3393010ff6..5478baca25 100644 This requires that updates to /usr/ are always followed by an update of the modification time of diff --git a/src/shared/condition.c b/src/shared/condition.c -index a23d6a3e45..8ca1f4606f 100644 +index d3446e8a9d..3f7cc9ea58 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c -@@ -792,7 +792,7 @@ static int condition_test_needs_update(Condition *c, char **env) { +@@ -793,7 +793,7 @@ static int condition_test_needs_update(Condition *c, char **env) { * First, compare seconds as they are always accurate... */ if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec) @@ -35,7 +35,7 @@ index a23d6a3e45..8ca1f4606f 100644 /* * ...then compare nanoseconds. -@@ -803,7 +803,7 @@ static int condition_test_needs_update(Condition *c, char **env) { +@@ -804,7 +804,7 @@ static int condition_test_needs_update(Condition *c, char **env) { * (otherwise the filesystem supports nsec timestamps, see stat(2)). */ if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0) @@ -44,7 +44,7 @@ index a23d6a3e45..8ca1f4606f 100644 _cleanup_free_ char *timestamp_str = NULL; r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", ×tamp_str); -@@ -823,7 +823,7 @@ static int condition_test_needs_update(Condition *c, char **env) { +@@ -824,7 +824,7 @@ static int condition_test_needs_update(Condition *c, char **env) { return true; } @@ -52,7 +52,7 @@ index a23d6a3e45..8ca1f4606f 100644 + return timespec_load_nsec(&usr.st_mtim) != timestamp; } - static int condition_test_first_boot(Condition *c, char **env) { + static bool in_first_boot(void) { -- -2.25.1 +2.34.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch index dfd6dbd492a..a25e8f17ac3 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0004-core-use-max-for-DefaultTasksMax.patch @@ -1,7 +1,7 @@ -From 4cdbcf5df9a2fd165385465bd5be9b8cdb78f83a Mon Sep 17 00:00:00 2001 +From d21ebfcf17ffc1dba635389193f10d2b93eba730 Mon Sep 17 00:00:00 2001 From: Adrian Vladu Date: Fri, 16 Feb 2024 11:22:08 +0000 -Subject: [PATCH] [PATCH 4/7] core: use max for DefaultTasksMax +Subject: [PATCH 4/8] core: use max for DefaultTasksMax Since systemd v228, systemd has a DefaultTasksMax which defaulted to 512, later 15% of the system's maximum number of PIDs. This @@ -21,10 +21,10 @@ Signed-off-by: Adrian Vladu 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml -index 31b6421399..52819ae8b7 100644 +index 3c06b65f93..71f38692b6 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml -@@ -515,7 +515,7 @@ +@@ -501,7 +501,7 @@ Configure the default value for the per-unit TasksMax= setting. See systemd.resource-control5 for details. This setting applies to all unit types that support resource control settings, with the exception @@ -34,7 +34,7 @@ index 31b6421399..52819ae8b7 100644 Kernel has a default value for kernel.pid_max= and an algorithm of counting in case of more than 32 cores. For example, with the default kernel.pid_max=, DefaultTasksMax= defaults to 4915, diff --git a/src/core/manager.c b/src/core/manager.c -index e8c747d96d..df9269aab8 100644 +index 88eebfc626..8992c8c3e3 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -114,7 +114,7 @@ @@ -47,10 +47,10 @@ index e8c747d96d..df9269aab8 100644 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); diff --git a/src/core/system.conf.in b/src/core/system.conf.in -index 9b89a6aa77..5a7e92ab5a 100644 +index 05eb681270..94d0365244 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in -@@ -59,7 +59,7 @@ +@@ -58,7 +58,7 @@ #DefaultIPAccounting=no #DefaultMemoryAccounting={{ 'yes' if MEMORY_ACCOUNTING_DEFAULT else 'no' }} #DefaultTasksAccounting=yes diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch index 75d421ea089..648e3fa3082 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0005-systemd-Disable-SELinux-permissions-checks.patch @@ -1,7 +1,7 @@ -From 0a5e52f5511cd7a5312d06abff12bc432bdedc96 Mon Sep 17 00:00:00 2001 +From 374cca5b2f9aea1c506352cf58b09db5c216a0d3 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 20 Dec 2016 16:43:22 +0000 -Subject: [PATCH 5/7] systemd: Disable SELinux permissions checks +Subject: [PATCH 5/8] systemd: Disable SELinux permissions checks We don't care about the interaction between systemd and SELinux policy, so let's just disable these checks rather than having to incorporate policy @@ -12,7 +12,7 @@ to limit containers and not anything running directly on the host. 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c -index 11dbf4640e..c839a4f39e 100644 +index 62181a6309..448f9211d6 100644 --- a/src/core/selinux-access.c +++ b/src/core/selinux-access.c @@ -2,7 +2,7 @@ @@ -25,5 +25,5 @@ index 11dbf4640e..c839a4f39e 100644 #include #include -- -2.25.1 +2.34.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch index 6c4f35cff46..7baa379d346 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch @@ -1,7 +1,7 @@ -From ede353ea720f07b7b19fa638d5a59a7471237e2d Mon Sep 17 00:00:00 2001 +From bffb2a48796a2736d7fb7328d2a88b1cbb812b12 Mon Sep 17 00:00:00 2001 From: Sayan Chowdhury Date: Fri, 16 Dec 2022 16:28:26 +0530 -Subject: [PATCH 6/7] Revert "getty: Pass tty to use by agetty via stdin" +Subject: [PATCH 6/8] Revert "getty: Pass tty to use by agetty via stdin" This reverts commit b4bf9007cbee7dc0b1356897344ae2a7890df84c. @@ -17,7 +17,7 @@ Signed-off-by: Sayan Chowdhury 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/units/console-getty.service.in b/units/console-getty.service.in -index 606b7dbe16..54fd7c292d 100644 +index d64112be5e..b908708d8c 100644 --- a/units/console-getty.service.in +++ b/units/console-getty.service.in @@ -22,12 +22,10 @@ ConditionPathExists=/dev/console @@ -35,7 +35,7 @@ index 606b7dbe16..54fd7c292d 100644 TTYReset=yes TTYVHangup=yes diff --git a/units/container-getty@.service.in b/units/container-getty@.service.in -index 8d7e20d5ec..5f095f48b0 100644 +index 8847d735fb..8be25663f5 100644 --- a/units/container-getty@.service.in +++ b/units/container-getty@.service.in @@ -27,13 +27,11 @@ Before=rescue.service @@ -54,7 +54,7 @@ index 8d7e20d5ec..5f095f48b0 100644 TTYReset=yes TTYVHangup=yes diff --git a/units/getty@.service.in b/units/getty@.service.in -index 21d66f9367..78deb7cffe 100644 +index 80b8f3e922..b57666c123 100644 --- a/units/getty@.service.in +++ b/units/getty@.service.in @@ -38,13 +38,11 @@ ConditionPathExists=/dev/tty0 @@ -73,7 +73,7 @@ index 21d66f9367..78deb7cffe 100644 TTYReset=yes TTYVHangup=yes diff --git a/units/serial-getty@.service.in b/units/serial-getty@.service.in -index 2433124c55..bb7af3105d 100644 +index 6bf101eac9..479b8759a9 100644 --- a/units/serial-getty@.service.in +++ b/units/serial-getty@.service.in @@ -33,12 +33,10 @@ Before=rescue.service @@ -91,5 +91,5 @@ index 2433124c55..bb7af3105d 100644 TTYReset=yes TTYVHangup=yes -- -2.25.1 +2.34.1 diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch index a6734671fa2..8471991893d 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0007-units-Keep-using-old-journal-file-format.patch @@ -1,7 +1,7 @@ -From 44374d98fb65ff5fdbc2a7d07a076b50b8f2b003 Mon Sep 17 00:00:00 2001 +From 6a4c6f97742afc9ca5de40335b2d041095990aa2 Mon Sep 17 00:00:00 2001 From: Adrian Vladu Date: Fri, 16 Feb 2024 11:29:04 +0000 -Subject: [PATCH] [PATCH 7/7] units: Keep using old journal file format +Subject: [PATCH 7/8] units: Keep using old journal file format Systemd 252 made an incompatible change in journal file format. Temporarily force journald to use the old journal format to give logging containers more diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-Revert-core-service-when-resetting-PID-also-reset-known.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-Revert-core-service-when-resetting-PID-also-reset-known.patch deleted file mode 100644 index b7938941090..00000000000 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-Revert-core-service-when-resetting-PID-also-reset-known.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 34e834f496338fdc2a8a8cc771cba4082079cf9a Mon Sep 17 00:00:00 2001 -From: msizanoen -Date: Mon, 12 Jun 2023 10:30:12 +0700 -Subject: [PATCH] Revert "core/service: when resetting PID also reset known - flag" - -This reverts commit ff32060f2ed37b68dc26256b05e2e69013b0ecfe. - -This change is incorrect as we don't want to mark the PID as invalid but -only mark it as dead. - -The change in question also breaks user level socket activation for -`podman.service` as the termination of the main `podman system service` -process is not properly handled, causing any application accessing the -socket to hang. - -This is because the user-level `podman.service` unit also hosts two -non-main processes: `rootlessport` and `rootlessport-child` which causes -the `cgroup_good` check to still succeed. - -The original submitter of this commit is recommended to find another -more correct way to fix the cgroupsv1 issue on CentOS 8. - -(cherry picked from commit f29f0877c5abfd03060838d1812ea6fdff3b0b37) ---- - src/core/service.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/core/service.c b/src/core/service.c -index c05f13c765..211f72900e 100644 ---- a/src/core/service.c -+++ b/src/core/service.c -@@ -3529,7 +3529,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { - return; - - s->main_pid = 0; -- s->main_pid_known = false; - exec_status_exit(&s->main_exec_status, &s->exec_context, pid, code, status); - - if (s->main_command) { diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch new file mode 100644 index 00000000000..f6fb957cd2f --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/0008-sysext-Mutable-overlays.patch @@ -0,0 +1,1310 @@ +From b3e3257bfa07ae9ff63f5a139a1f7b72353a456b Mon Sep 17 00:00:00 2001 +From: Krzesimir Nowak +Date: Mon, 22 Apr 2024 16:43:38 +0200 +Subject: [PATCH 8/8] sysext: Mutable overlays + +--- + src/basic/path-util.c | 12 + + src/basic/path-util.h | 3 +- + src/shared/mount-util.c | 10 + + src/shared/mount-util.h | 1 + + src/sysext/sysext.c | 954 +++++++++++++++++++++++++++++++++++++--- + 5 files changed, 913 insertions(+), 67 deletions(-) + +diff --git a/src/basic/path-util.c b/src/basic/path-util.c +index 6810bf66aa..b21c7b66a3 100644 +--- a/src/basic/path-util.c ++++ b/src/basic/path-util.c +@@ -525,6 +525,18 @@ int path_compare_filename(const char *a, const char *b) { + return strcmp(fa, fb); + } + ++int path_equal_or_inode_same_full(const char *a, const char *b, int flags) { ++ /* Returns true if paths are of the same entry, false if not, <0 on error. */ ++ ++ if (path_equal(a, b)) ++ return 1; ++ ++ if (!a || !b) ++ return 0; ++ ++ return inode_same(a, b, flags); ++} ++ + char* path_extend_internal(char **x, ...) { + size_t sz, old_sz; + char *q, *nx; +diff --git a/src/basic/path-util.h b/src/basic/path-util.h +index 6d943e967f..19d42c56bc 100644 +--- a/src/basic/path-util.h ++++ b/src/basic/path-util.h +@@ -68,8 +68,9 @@ static inline bool path_equal_filename(const char *a, const char *b) { + return path_compare_filename(a, b) == 0; + } + ++int path_equal_or_inode_same_full(const char *a, const char *b, int flags); + static inline bool path_equal_or_inode_same(const char *a, const char *b, int flags) { +- return path_equal(a, b) || inode_same(a, b, flags) > 0; ++ return path_equal_or_inode_same_full(a, b, flags) > 0; + } + + char* path_extend_internal(char **x, ...); +diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c +index 4f2acce513..dd9a995fb6 100644 +--- a/src/shared/mount-util.c ++++ b/src/shared/mount-util.c +@@ -453,6 +453,16 @@ int bind_remount_one_with_mountinfo( + return 0; + } + ++int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask) { ++ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; ++ ++ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); ++ if (!proc_self_mountinfo) ++ return log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); ++ ++ return bind_remount_one_with_mountinfo(path, new_flags, flags_mask, proc_self_mountinfo); ++} ++ + static int mount_switch_root_pivot(int fd_newroot, const char *path) { + assert(fd_newroot >= 0); + assert(path); +diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h +index ef31104900..679c94c950 100644 +--- a/src/shared/mount-util.h ++++ b/src/shared/mount-util.h +@@ -26,6 +26,7 @@ static inline int bind_remount_recursive(const char *prefix, unsigned long new_f + } + + int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); ++int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask); + + int mount_switch_root_full(const char *path, unsigned long mount_propagation_flag, bool force_ms_move); + static inline int mount_switch_root(const char *path, unsigned long mount_propagation_flag) { +diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c +index 8dc515e4d5..7c364e5bc7 100644 +--- a/src/sysext/sysext.c ++++ b/src/sysext/sysext.c +@@ -39,15 +39,41 @@ + #include "pager.h" + #include "parse-argument.h" + #include "parse-util.h" ++#include "path-util.h" + #include "pretty-print.h" + #include "process-util.h" ++#include "rm-rf.h" + #include "sort-util.h" ++#include "string-table.h" ++#include "string-util.h" + #include "terminal-util.h" + #include "user-util.h" + #include "varlink.h" + #include "varlink-io.systemd.sysext.h" + #include "verbs.h" + ++typedef enum MutableMode { ++ MUTABLE_NO, ++ MUTABLE_YES, ++ MUTABLE_AUTO, ++ MUTABLE_IMPORT, ++ MUTABLE_EPHEMERAL, ++ MUTABLE_EPHEMERAL_IMPORT, ++ _MUTABLE_MAX, ++ _MUTABLE_INVALID = -EINVAL, ++} MutableMode; ++ ++static const char* const mutable_mode_table[_MUTABLE_MAX] = { ++ [MUTABLE_NO] = "no", ++ [MUTABLE_YES] = "yes", ++ [MUTABLE_AUTO] = "auto", ++ [MUTABLE_IMPORT] = "import", ++ [MUTABLE_EPHEMERAL] = "ephemeral", ++ [MUTABLE_EPHEMERAL_IMPORT] = "ephemeral-import", ++}; ++ ++DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(mutable_mode, MutableMode, MUTABLE_YES); ++ + static char **arg_hierarchies = NULL; /* "/usr" + "/opt" by default for sysext and /etc by default for confext */ + static char *arg_root = NULL; + static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF; +@@ -58,10 +84,13 @@ static bool arg_no_reload = false; + static int arg_noexec = -1; + static ImagePolicy *arg_image_policy = NULL; + static bool arg_varlink = false; ++static MutableMode arg_mutable = MUTABLE_NO; + + /* Is set to IMAGE_CONFEXT when systemd is called with the confext functionality instead of the default */ + static ImageClass arg_image_class = IMAGE_SYSEXT; + ++#define MUTABLE_EXTENSIONS_BASE_DIR "/var/lib/extensions.mutable" ++ + STATIC_DESTRUCTOR_REGISTER(arg_hierarchies, strv_freep); + STATIC_DESTRUCTOR_REGISTER(arg_root, freep); + STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); +@@ -75,6 +104,7 @@ static const struct { + const char *level_env; + const char *scope_env; + const char *name_env; ++ const char *mode_env; + const ImagePolicy *default_image_policy; + unsigned long default_mount_flags; + } image_class_info[_IMAGE_CLASS_MAX] = { +@@ -86,6 +116,7 @@ static const struct { + .level_env = "SYSEXT_LEVEL", + .scope_env = "SYSEXT_SCOPE", + .name_env = "SYSTEMD_SYSEXT_HIERARCHIES", ++ .mode_env = "SYSTEMD_SYSEXT_MUTABLE_MODE", + .default_image_policy = &image_policy_sysext, + .default_mount_flags = MS_RDONLY|MS_NODEV, + }, +@@ -97,11 +128,16 @@ static const struct { + .level_env = "CONFEXT_LEVEL", + .scope_env = "CONFEXT_SCOPE", + .name_env = "SYSTEMD_CONFEXT_HIERARCHIES", ++ .mode_env = "SYSTEMD_CONFEXT_MUTABLE_MODE", + .default_image_policy = &image_policy_confext, + .default_mount_flags = MS_RDONLY|MS_NODEV|MS_NOSUID|MS_NOEXEC, + } + }; + ++static int parse_mutable_mode(const char *p) { ++ return mutable_mode_from_string(p); ++} ++ + static int is_our_mount_point( + ImageClass image_class, + const char *p) { +@@ -150,7 +186,7 @@ static int is_our_mount_point( + return log_error_errno(r, "Failed to parse device major/minor stored in '%s/dev' file on '%s': %m", image_class_info[image_class].dot_directory_name, p); + + if (lstat(p, &st) < 0) +- return log_error_errno(r, "Failed to stat %s: %m", p); ++ return log_error_errno(errno, "Failed to stat %s: %m", p); + + if (st.st_dev != dev) { + log_debug("Hierarchy '%s' reports a different device major/minor than what we are seeing, assuming offline copy.", p); +@@ -248,11 +284,22 @@ static int unmerge_hierarchy( + ImageClass image_class, + const char *p) { + ++ _cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL; + int r; + + assert(p); + ++ dot_dir = path_join(p, image_class_info[image_class].dot_directory_name); ++ if (!dot_dir) ++ return log_oom(); ++ ++ work_dir_info_file = path_join(dot_dir, "work_dir"); ++ if (!work_dir_info_file) ++ return log_oom(); ++ + for (;;) { ++ _cleanup_free_ char *escaped_work_dir_in_root = NULL, *work_dir = NULL; ++ + /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break + * systems where /usr/ is a mount point of its own already. */ + +@@ -262,9 +309,40 @@ static int unmerge_hierarchy( + if (r == 0) + break; + ++ r = read_one_line_file(work_dir_info_file, &escaped_work_dir_in_root); ++ if (r < 0) { ++ if (r != -ENOENT) ++ return log_error_errno(r, "Failed to read '%s': %m", work_dir_info_file); ++ } else { ++ _cleanup_free_ char *work_dir_in_root = NULL; ++ ssize_t l; ++ ++ l = cunescape_length(escaped_work_dir_in_root, r, 0, &work_dir_in_root); ++ if (l < 0) ++ return log_error_errno(l, "Failed to unescape work directory path: %m"); ++ work_dir = path_join(arg_root, work_dir_in_root); ++ if (!work_dir) ++ return log_oom(); ++ } ++ ++ r = umount_verbose(LOG_DEBUG, dot_dir, MNT_DETACH|UMOUNT_NOFOLLOW); ++ if (r < 0) { ++ /* EINVAL is possibly "not a mount point". Let it slide as it's expected to occur if ++ * the whole hierarchy was read-only, so the dot directory inside it was not ++ * bind-mounted as read-only. */ ++ if (r != -EINVAL) ++ return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir); ++ } ++ + r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW); + if (r < 0) +- return log_error_errno(r, "Failed to unmount file system '%s': %m", p); ++ return r; ++ ++ if (work_dir) { ++ r = rm_rf(work_dir, REMOVE_ROOT | REMOVE_MISSING_OK | REMOVE_PHYSICAL); ++ if (r < 0) ++ return log_error_errno(r, "Failed to remove '%s': %m", work_dir); ++ } + + log_info("Unmerged '%s'.", p); + } +@@ -448,7 +526,7 @@ static int verb_status(int argc, char **argv, void *userdata) { + return log_oom(); + + if (stat(*p, &st) < 0) +- return log_error_errno(r, "Failed to stat() '%s': %m", *p); ++ return log_error_errno(errno, "Failed to stat() '%s': %m", *p); + + r = table_add_many( + t, +@@ -474,11 +552,38 @@ static int verb_status(int argc, char **argv, void *userdata) { + return ret; + } + ++static int append_overlayfs_path_option( ++ char **options, ++ const char *separator, ++ const char *option, ++ const char *path) { ++ ++ _cleanup_free_ char *escaped = NULL; ++ ++ assert(options); ++ assert(separator); ++ assert(path); ++ ++ escaped = shell_escape(path, ",:"); ++ if (!escaped) ++ return log_oom(); ++ ++ if (option) { ++ if (!strextend(options, separator, option, "=", escaped)) ++ return log_oom(); ++ } else if (!strextend(options, separator, escaped)) ++ return log_oom(); ++ ++ return 0; ++} ++ + static int mount_overlayfs( + ImageClass image_class, + int noexec, + const char *where, +- char **layers) { ++ char **layers, ++ const char *upper_dir, ++ const char *work_dir) { + + _cleanup_free_ char *options = NULL; + bool separator = false; +@@ -486,20 +591,16 @@ static int mount_overlayfs( + int r; + + assert(where); ++ assert((upper_dir && work_dir) || (!upper_dir && !work_dir)); + + options = strdup("lowerdir="); + if (!options) + return log_oom(); + + STRV_FOREACH(l, layers) { +- _cleanup_free_ char *escaped = NULL; +- +- escaped = shell_escape(*l, ",:"); +- if (!escaped) +- return log_oom(); +- +- if (!strextend(&options, separator ? ":" : "", escaped)) +- return log_oom(); ++ r = append_overlayfs_path_option(&options, separator ? ":" : "", NULL, *l); ++ if (r < 0) ++ return r; + + separator = true; + } +@@ -508,6 +609,22 @@ static int mount_overlayfs( + if (noexec >= 0) + SET_FLAG(flags, MS_NOEXEC, noexec); + ++ if (upper_dir && work_dir) { ++ r = append_overlayfs_path_option(&options, ",", "upperdir", upper_dir); ++ if (r < 0) ++ return r; ++ ++ flags &= ~MS_RDONLY; ++ ++ r = append_overlayfs_path_option(&options, ",", "workdir", work_dir); ++ if (r < 0) ++ return r; ++ /* redirect_dir=on and noatime prevent unnecessary upcopies, metacopy=off prevents broken ++ * files from partial upcopies after umount. */ ++ if (!strextend(&options, ",redirect_dir=on,noatime,metacopy=off")) ++ return log_oom(); ++ } ++ + /* Now mount the actual overlayfs */ + r = mount_nofollow_verbose(LOG_ERR, image_class_info[image_class].short_identifier, where, "overlay", flags, options); + if (r < 0) +@@ -516,62 +633,277 @@ static int mount_overlayfs( + return 0; + } + +-static int merge_hierarchy( +- ImageClass image_class, ++static char *hierarchy_as_single_path_component(const char *hierarchy) { ++ /* We normally expect hierarchy to be /usr, /opt or /etc, but for debugging purposes the hierarchy ++ * could very well be like /foo/bar/baz/. So for a given hierarchy we generate a directory name by ++ * stripping the leading and trailing separators and replacing the rest of separators with dots. This ++ * makes the generated name to be the same for /foo/bar/baz and for /foo/bar.baz, but, again, ++ * speciyfing a different hierarchy is a debugging feature, so non-unique mapping should not be an ++ * issue in general case. */ ++ const char *stripped = hierarchy; ++ _cleanup_free_ char *dir_name = NULL; ++ ++ assert(hierarchy); ++ ++ stripped += strspn(stripped, "/"); ++ ++ dir_name = strdup(stripped); ++ if (!dir_name) ++ return NULL; ++ delete_trailing_chars(dir_name, "/"); ++ string_replace_char(dir_name, '/', '.'); ++ return TAKE_PTR(dir_name); ++} ++ ++static int paths_on_same_fs(const char *path1, const char *path2) { ++ struct stat st1, st2; ++ ++ assert(path1); ++ assert(path2); ++ ++ if (stat(path1, &st1) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", path1); ++ ++ if (stat(path2, &st2) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", path2); ++ ++ return st1.st_dev == st2.st_dev; ++} ++ ++static int work_dir_for_hierarchy( + const char *hierarchy, +- int noexec, +- char **extensions, +- char **paths, +- const char *meta_path, +- const char *overlay_path) { ++ const char *resolved_upper_dir, ++ char **ret_work_dir) { ++ ++ _cleanup_free_ char *parent = NULL; ++ int r; ++ ++ assert(hierarchy); ++ assert(resolved_upper_dir); ++ assert(ret_work_dir); ++ ++ r = path_extract_directory(resolved_upper_dir, &parent); ++ if (r < 0) ++ return log_error_errno(r, "Failed to get parent directory of upperdir '%s': %m", resolved_upper_dir); ++ ++ /* TODO: paths_in_same_superblock? partition? device? */ ++ r = paths_on_same_fs(resolved_upper_dir, parent); ++ if (r < 0) ++ return r; ++ if (!r) ++ return log_error_errno(SYNTHETIC_ERRNO(EXDEV), "Unable to find a suitable workdir location for upperdir '%s' for host hierarchy '%s' - parent directory of the upperdir is in a different filesystem", resolved_upper_dir, hierarchy); ++ ++ _cleanup_free_ char *f = NULL, *dir_name = NULL; ++ ++ f = hierarchy_as_single_path_component(hierarchy); ++ if (!f) ++ return log_oom(); ++ dir_name = strjoin(".systemd-", f, "-workdir"); ++ if (!dir_name) ++ return log_oom(); ++ ++ free(f); ++ f = path_join(parent, dir_name); ++ if (!f) ++ return log_oom(); ++ ++ *ret_work_dir = TAKE_PTR(f); ++ return 0; ++} ++ ++typedef struct OverlayFSPaths { ++ char *hierarchy; ++ mode_t hierarchy_mode; ++ char *resolved_hierarchy; ++ char *resolved_mutable_directory; ++ ++ /* NULL if merged fs is read-only */ ++ char *upper_dir; ++ /* NULL if merged fs is read-only */ ++ char *work_dir; ++ /* lowest index is top lowerdir, highest index is bottom lowerdir */ ++ char **lower_dirs; ++} OverlayFSPaths; ++ ++static OverlayFSPaths *overlayfs_paths_free(OverlayFSPaths *op) { ++ if (!op) ++ return NULL; ++ ++ free(op->hierarchy); ++ free(op->resolved_hierarchy); ++ free(op->resolved_mutable_directory); ++ ++ free(op->upper_dir); ++ free(op->work_dir); ++ strv_free(op->lower_dirs); ++ ++ free(op); ++ return NULL; ++} ++DEFINE_TRIVIAL_CLEANUP_FUNC(OverlayFSPaths *, overlayfs_paths_free); + +- _cleanup_free_ char *resolved_hierarchy = NULL, *f = NULL, *buf = NULL; +- _cleanup_strv_free_ char **layers = NULL; ++static int resolve_hierarchy(const char *hierarchy, char **ret_resolved_hierarchy) { ++ _cleanup_free_ char *resolved_path = NULL; ++ int r; ++ ++ assert(hierarchy); ++ assert(ret_resolved_hierarchy); ++ ++ r = chase(hierarchy, arg_root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r < 0 && r != -ENOENT) ++ return log_error_errno(r, "Failed to resolve hierarchy '%s': %m", hierarchy); ++ ++ *ret_resolved_hierarchy = TAKE_PTR(resolved_path); ++ return 0; ++} ++ ++static int mutable_directory_mode_matches_hierarchy( ++ const char *root_or_null, ++ const char *path, ++ mode_t hierarchy_mode) { ++ ++ _cleanup_free_ char *path_in_root = NULL; + struct stat st; ++ mode_t actual_mode; ++ ++ assert(path); ++ ++ path_in_root = path_join(root_or_null, path); ++ if (!path_in_root) ++ return log_oom(); ++ ++ if (stat(path_in_root, &st) < 0) { ++ if (errno == ENOENT) ++ return 0; ++ return log_error_errno(errno, "Failed to stat mutable directory '%s': %m", path_in_root); ++ } ++ ++ actual_mode = st.st_mode & 0777; ++ if (actual_mode != hierarchy_mode) ++ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Mutable directory '%s' has mode %04o, ought to have mode %04o", path_in_root, actual_mode, hierarchy_mode); ++ ++ return 0; ++} ++ ++static int resolve_mutable_directory( ++ const char *hierarchy, ++ mode_t hierarchy_mode, ++ const char *workspace, ++ char **ret_resolved_mutable_directory) { ++ ++ _cleanup_free_ char *path = NULL, *resolved_path = NULL, *dir_name = NULL; ++ const char *root = arg_root, *base = MUTABLE_EXTENSIONS_BASE_DIR; + int r; + + assert(hierarchy); +- assert(meta_path); +- assert(overlay_path); ++ assert(ret_resolved_mutable_directory); + +- /* Resolve the path of the host's version of the hierarchy, i.e. what we want to use as lowest layer +- * in the overlayfs stack. */ +- r = chase(hierarchy, arg_root, CHASE_PREFIX_ROOT, &resolved_hierarchy, NULL); +- if (r == -ENOENT) +- log_debug_errno(r, "Hierarchy '%s' on host doesn't exist, not merging.", hierarchy); +- else if (r < 0) +- return log_error_errno(r, "Failed to resolve host hierarchy '%s': %m", hierarchy); +- else { +- r = dir_is_empty(resolved_hierarchy, /* ignore_hidden_or_backup= */ false); +- if (r < 0) +- return log_error_errno(r, "Failed to check if host hierarchy '%s' is empty: %m", resolved_hierarchy); +- if (r > 0) { +- log_debug("Host hierarchy '%s' is empty, not merging.", resolved_hierarchy); +- resolved_hierarchy = mfree(resolved_hierarchy); +- } ++ if (arg_mutable == MUTABLE_NO) { ++ log_debug("Mutability for hierarchy '%s' is disabled, not resolving mutable directory.", hierarchy); ++ *ret_resolved_mutable_directory = NULL; ++ return 0; + } + +- /* Let's generate a metadata file that lists all extensions we took into account for this +- * hierarchy. We include this in the final fs, to make things nicely discoverable and +- * recognizable. */ +- f = path_join(meta_path, image_class_info[image_class].dot_directory_name, image_class_info[image_class].short_identifier_plural); +- if (!f) ++ if (IN_SET(arg_mutable, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) { ++ /* We create mutable directory inside the temporary tmpfs workspace, which is a fixed ++ * location that ignores arg_root. */ ++ root = NULL; ++ base = workspace; ++ } ++ ++ dir_name = hierarchy_as_single_path_component(hierarchy); ++ if (!dir_name) + return log_oom(); + +- buf = strv_join(extensions, "\n"); +- if (!buf) ++ path = path_join(base, dir_name); ++ if (!path) + return log_oom(); + +- r = write_string_file(f, buf, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755); ++ if (IN_SET(arg_mutable, MUTABLE_YES, MUTABLE_AUTO)) { ++ /* If there already is a mutable directory, check if its mode matches hierarchy. Merged ++ * hierarchy will have the same mode as the mutable directory, so we want no surprising mode ++ * changes here. */ ++ r = mutable_directory_mode_matches_hierarchy(root, path, hierarchy_mode); ++ if (r < 0) ++ return r; ++ } ++ ++ if (IN_SET(arg_mutable, MUTABLE_YES, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) { ++ _cleanup_free_ char *path_in_root = NULL; ++ ++ path_in_root = path_join(root, path); ++ if (!path_in_root) ++ return log_oom(); ++ ++ r = mkdir_p(path_in_root, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to create a directory '%s': %m", path_in_root); ++ } ++ ++ r = chase(path, root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r < 0 && r != -ENOENT) ++ return log_error_errno(r, "Failed to resolve mutable directory '%s': %m", path); ++ ++ *ret_resolved_mutable_directory = TAKE_PTR(resolved_path); ++ return 0; ++} ++ ++static int overlayfs_paths_new(const char *hierarchy, const char *workspace_path, OverlayFSPaths **ret_op) { ++ _cleanup_free_ char *hierarchy_copy = NULL, *resolved_hierarchy = NULL, *resolved_mutable_directory = NULL; ++ mode_t hierarchy_mode; ++ ++ int r; ++ ++ assert (hierarchy); ++ assert (ret_op); ++ ++ hierarchy_copy = strdup(hierarchy); ++ if (!hierarchy_copy) ++ return log_oom(); ++ ++ r = resolve_hierarchy(hierarchy, &resolved_hierarchy); + if (r < 0) +- return log_error_errno(r, "Failed to write extension meta file '%s': %m", f); ++ return r; + +- /* Put the meta path (i.e. our synthesized stuff) at the top of the layer stack */ +- layers = strv_new(meta_path); +- if (!layers) ++ if (resolved_hierarchy) { ++ struct stat st; ++ ++ if (stat(resolved_hierarchy, &st) < 0) ++ return log_error_errno(errno, "Failed to stat '%s': %m", resolved_hierarchy); ++ hierarchy_mode = st.st_mode & 0777; ++ } else ++ hierarchy_mode = 0755; ++ ++ r = resolve_mutable_directory(hierarchy, hierarchy_mode, workspace_path, &resolved_mutable_directory); ++ if (r < 0) ++ return r; ++ ++ OverlayFSPaths *op; ++ op = new(OverlayFSPaths, 1); ++ if (!op) + return log_oom(); + +- /* Put the extensions in the middle */ ++ *op = (OverlayFSPaths) { ++ .hierarchy = TAKE_PTR(hierarchy_copy), ++ .hierarchy_mode = hierarchy_mode, ++ .resolved_hierarchy = TAKE_PTR(resolved_hierarchy), ++ .resolved_mutable_directory = TAKE_PTR(resolved_mutable_directory), ++ }; ++ ++ *ret_op = TAKE_PTR(op); ++ return 0; ++} ++ ++static int determine_used_extensions(const char *hierarchy, char **paths, char ***ret_used_paths, size_t *ret_extensions_used) { ++ _cleanup_strv_free_ char **used_paths = NULL; ++ size_t n = 0; ++ int r; ++ ++ assert(hierarchy); ++ assert(paths); ++ assert(ret_used_paths); ++ assert(ret_extensions_used); ++ + STRV_FOREACH(p, paths) { + _cleanup_free_ char *resolved = NULL; + +@@ -591,54 +923,514 @@ static int merge_hierarchy( + continue; + } + +- r = strv_consume(&layers, TAKE_PTR(resolved)); ++ r = strv_consume_with_size (&used_paths, &n, TAKE_PTR(resolved)); + if (r < 0) + return log_oom(); + } + +- if (!layers[1]) /* No extension with files in this hierarchy? Then don't do anything. */ ++ *ret_used_paths = TAKE_PTR(used_paths); ++ *ret_extensions_used = n; ++ return 0; ++} ++ ++static int maybe_import_mutable_directory(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ ++ /* If importing mutable layer and it actually exists and is not a hierarchy itself, add it just below ++ * the meta path */ ++ ++ if (arg_mutable != MUTABLE_IMPORT || !op->resolved_mutable_directory) + return 0; + +- if (resolved_hierarchy) { +- /* Add the host hierarchy as last (lowest) layer in the stack */ +- r = strv_consume(&layers, TAKE_PTR(resolved_hierarchy)); ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, op->resolved_mutable_directory, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(ELOOP), "Not importing mutable directory for hierarchy %s as a lower dir, because it points to the hierarchy itself", op->hierarchy); ++ ++ r = strv_extend(&op->lower_dirs, op->resolved_mutable_directory); ++ if (r < 0) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int maybe_import_ignored_mutable_directory(OverlayFSPaths *op) { ++ _cleanup_free_ char *dir_name = NULL, *path = NULL, *resolved_path = NULL; ++ int r; ++ ++ assert(op); ++ ++ /* If importing the ignored mutable layer and it actually exists and is not a hierarchy itself, add ++ * it just below the meta path */ ++ if (arg_mutable != MUTABLE_EPHEMERAL_IMPORT) ++ return 0; ++ ++ dir_name = hierarchy_as_single_path_component(op->hierarchy); ++ if (!dir_name) ++ return log_oom(); ++ ++ path = path_join(MUTABLE_EXTENSIONS_BASE_DIR, dir_name); ++ if (!path) ++ return log_oom(); ++ ++ r = chase(path, arg_root, CHASE_PREFIX_ROOT, &resolved_path, NULL); ++ if (r == -ENOENT) { ++ log_debug("Mutable directory for %s does not exist, not importing", op->hierarchy); ++ return 0; ++ } ++ if (r < 0) ++ return log_error_errno(r, "Failed to resolve mutable directory '%s': %m", path); ++ ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, resolved_path, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(ELOOP), "Not importing mutable directory for hierarchy %s as a lower dir, because it points to the hierarchy itself", op->hierarchy); ++ ++ r = strv_consume(&op->lower_dirs, TAKE_PTR(resolved_path)); ++ if (r < 0) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int determine_top_lower_dirs(OverlayFSPaths *op, const char *meta_path) { ++ int r; ++ ++ assert(op); ++ assert(meta_path); ++ ++ /* Put the meta path (i.e. our synthesized stuff) at the top of the layer stack */ ++ r = strv_extend(&op->lower_dirs, meta_path); ++ if (r < 0) ++ return log_oom(); ++ ++ r = maybe_import_mutable_directory(op); ++ if (r < 0) ++ return r; ++ ++ r = maybe_import_ignored_mutable_directory(op); ++ if (r < 0) ++ return r; ++ ++ return 0; ++} ++ ++static int determine_middle_lower_dirs(OverlayFSPaths *op, char **paths) { ++ int r; ++ ++ assert(op); ++ assert(paths); ++ ++ /* The paths were already determined in determine_used_extensions, so we just take them as is. */ ++ r = strv_extend_strv(&op->lower_dirs, paths, false); ++ if (r < 0) ++ return log_oom (); ++ ++ return 0; ++} ++ ++static int hierarchy_as_lower_dir(OverlayFSPaths *op) { ++ int r; ++ ++ /* return 0 if hierarchy should be used as lower dir, >0, if not */ ++ ++ assert(op); ++ ++ if (!op->resolved_hierarchy) { ++ log_debug("Host hierarchy '%s' does not exist, will not be used as lowerdir", op->hierarchy); ++ return 1; ++ } ++ ++ r = dir_is_empty(op->resolved_hierarchy, /* ignore_hidden_or_backup= */ false); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check if host hierarchy '%s' is empty: %m", op->resolved_hierarchy); ++ if (r > 0) { ++ log_debug("Host hierarchy '%s' is empty, will not be used as lower dir.", op->resolved_hierarchy); ++ return 1; ++ } ++ ++ if (arg_mutable == MUTABLE_IMPORT) { ++ log_debug("Mutability for host hierarchy '%s' is disabled, so host hierarchy will be a lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ if (arg_mutable == MUTABLE_EPHEMERAL_IMPORT) { ++ log_debug("Mutability for host hierarchy '%s' is ephemeral, so host hierarchy will be a lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ if (!op->resolved_mutable_directory) { ++ log_debug("No mutable directory found, so host hierarchy '%s' will be used as lowerdir", op->resolved_hierarchy); ++ return 0; ++ } ++ ++ r = path_equal_or_inode_same_full(op->resolved_hierarchy, op->resolved_mutable_directory, 0); ++ if (r < 0) ++ return log_error_errno(r, "Failed to check equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); ++ if (r > 0) { ++ log_debug("Host hierarchy '%s' will serve as upperdir.", op->resolved_hierarchy); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int determine_bottom_lower_dirs(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ ++ r = hierarchy_as_lower_dir(op); ++ if (r < 0) ++ return r; ++ if (!r) { ++ r = strv_extend(&op->lower_dirs, op->resolved_hierarchy); + if (r < 0) + return log_oom(); + } + ++ return 0; ++} ++ ++static int determine_lower_dirs( ++ OverlayFSPaths *op, ++ char **paths, ++ const char *meta_path) { ++ ++ int r; ++ ++ assert(op); ++ assert(paths); ++ assert(meta_path); ++ ++ r = determine_top_lower_dirs(op, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = determine_middle_lower_dirs(op, paths); ++ if (r < 0) ++ return r; ++ ++ r = determine_bottom_lower_dirs(op); ++ if (r < 0) ++ return r; ++ ++ return 0; ++} ++ ++static int determine_upper_dir(OverlayFSPaths *op) { ++ int r; ++ ++ assert(op); ++ assert(!op->upper_dir); ++ ++ if (arg_mutable == MUTABLE_IMPORT) { ++ log_debug("Mutability is disabled, there will be no upperdir for host hierarchy '%s'", op->hierarchy); ++ return 0; ++ } ++ ++ if (!op->resolved_mutable_directory) { ++ log_debug("No mutable directory found for host hierarchy '%s', there will be no upperdir", op->hierarchy); ++ return 0; ++ } ++ ++ /* Require upper dir to be on writable filesystem if it's going to be used as an actual overlayfs ++ * upperdir, instead of a lowerdir as an imported path. */ ++ r = path_is_read_only_fs(op->resolved_mutable_directory); ++ if (r < 0) ++ return log_error_errno(r, "Failed to determine if mutable directory '%s' is on read-only filesystem: %m", op->resolved_mutable_directory); ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Can't use '%s' as an upperdir as it is read-only.", op->resolved_mutable_directory); ++ ++ op->upper_dir = strdup(op->resolved_mutable_directory); ++ if (!op->upper_dir) ++ return log_oom(); ++ ++ return 0; ++} ++ ++static int determine_work_dir(OverlayFSPaths *op) { ++ _cleanup_free_ char *work_dir = NULL; ++ int r; ++ ++ assert(op); ++ assert(!op->work_dir); ++ ++ if (!op->upper_dir) ++ return 0; ++ ++ if (arg_mutable == MUTABLE_IMPORT) ++ return 0; ++ ++ r = work_dir_for_hierarchy(op->hierarchy, op->upper_dir, &work_dir); ++ if (r < 0) ++ return r; ++ ++ op->work_dir = TAKE_PTR(work_dir); ++ return 0; ++} ++ ++static int mount_overlayfs_with_op( ++ OverlayFSPaths *op, ++ ImageClass image_class, ++ int noexec, ++ const char *overlay_path, ++ const char *meta_path) { ++ ++ int r; ++ const char *top_layer = NULL; ++ ++ assert(op); ++ assert(overlay_path); ++ + r = mkdir_p(overlay_path, 0700); + if (r < 0) + return log_error_errno(r, "Failed to make directory '%s': %m", overlay_path); + +- r = mount_overlayfs(image_class, noexec, overlay_path, layers); ++ r = mkdir_p(meta_path, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make directory '%s': %m", meta_path); ++ ++ if (op->upper_dir && op->work_dir) { ++ r = mkdir_p(op->work_dir, 0700); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make directory '%s': %m", op->work_dir); ++ top_layer = op->upper_dir; ++ } else { ++ assert(!strv_isempty(op->lower_dirs)); ++ top_layer = op->lower_dirs[0]; ++ } ++ ++ /* Overlayfs merged directory has the same mode as the top layer (either first lowerdir in options in ++ * read-only case, or upperdir for mutable case. Set up top overlayfs layer to the same mode as the ++ * unmerged hierarchy, otherwise we might end up with merged hierarchy owned by root and with mode ++ * being 0700. */ ++ if (chmod(top_layer, op->hierarchy_mode) < 0) ++ return log_error_errno(errno, "Failed to set permissions of '%s' to %04o: %m", top_layer, op->hierarchy_mode); ++ ++ r = mount_overlayfs(image_class, noexec, overlay_path, op->lower_dirs, op->upper_dir, op->work_dir); + if (r < 0) + return r; + +- /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra turbo safety 😎 */ +- r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); ++ return 0; ++} ++ ++static int write_extensions_file(ImageClass image_class, char **extensions, const char *meta_path) { ++ _cleanup_free_ char *f = NULL, *buf = NULL; ++ int r; ++ ++ assert(extensions); ++ assert(meta_path); ++ ++ /* Let's generate a metadata file that lists all extensions we took into account for this ++ * hierarchy. We include this in the final fs, to make things nicely discoverable and ++ * recognizable. */ ++ f = path_join(meta_path, image_class_info[image_class].dot_directory_name, image_class_info[image_class].short_identifier_plural); ++ if (!f) ++ return log_oom(); ++ ++ buf = strv_join(extensions, "\n"); ++ if (!buf) ++ return log_oom(); ++ ++ r = write_string_file(f, buf, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755); + if (r < 0) +- return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); ++ return log_error_errno(r, "Failed to write extension meta file '%s': %m", f); ++ ++ return 0; ++} ++ ++static int write_dev_file(ImageClass image_class, const char *meta_path, const char *overlay_path) { ++ _cleanup_free_ char *f = NULL; ++ struct stat st; ++ int r; ++ ++ assert(meta_path); ++ assert(overlay_path); + + /* Now we have mounted the new file system. Let's now figure out its .st_dev field, and make that + * available in the metadata directory. This is useful to detect whether the metadata dir actually + * belongs to the fs it is found on: if .st_dev of the top-level mount matches it, it's pretty likely + * we are looking at a live tree, and not an unpacked tar or so of one. */ + if (stat(overlay_path, &st) < 0) +- return log_error_errno(r, "Failed to stat mount '%s': %m", overlay_path); ++ return log_error_errno(errno, "Failed to stat mount '%s': %m", overlay_path); + +- free(f); + f = path_join(meta_path, image_class_info[image_class].dot_directory_name, "dev"); + if (!f) + return log_oom(); + ++ /* Modifying the underlying layers while the overlayfs is mounted is technically undefined, but at ++ * least it won't crash or deadlock, as per the kernel docs about overlayfs: ++ * https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html#changes-to-underlying-filesystems */ + r = write_string_file(f, FORMAT_DEVNUM(st.st_dev), WRITE_STRING_FILE_CREATE); + if (r < 0) + return log_error_errno(r, "Failed to write '%s': %m", f); + ++ return 0; ++} ++ ++static int write_work_dir_file(ImageClass image_class, const char *meta_path, const char *work_dir) { ++ _cleanup_free_ char *escaped_work_dir_in_root = NULL, *f = NULL; ++ char *work_dir_in_root = NULL; ++ int r; ++ ++ assert(meta_path); ++ ++ if (!work_dir) ++ return 0; ++ ++ /* Do not store work dir path for ephemeral mode, it will be gone once this process is done. */ ++ if (IN_SET(arg_mutable, MUTABLE_EPHEMERAL, MUTABLE_EPHEMERAL_IMPORT)) ++ return 0; ++ ++ work_dir_in_root = path_startswith(work_dir, empty_to_root(arg_root)); ++ if (!work_dir_in_root) ++ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Workdir '%s' must not be outside root '%s'", work_dir, empty_to_root(arg_root)); ++ ++ f = path_join(meta_path, image_class_info[image_class].dot_directory_name, "work_dir"); ++ if (!f) ++ return log_oom(); ++ ++ /* Paths can have newlines for whatever reason, so better escape them to really get a single ++ * line file. */ ++ escaped_work_dir_in_root = cescape(work_dir_in_root); ++ if (!escaped_work_dir_in_root) ++ return log_oom(); ++ r = write_string_file(f, escaped_work_dir_in_root, WRITE_STRING_FILE_CREATE); ++ if (r < 0) ++ return log_error_errno(r, "Failed to write '%s': %m", f); ++ ++ return 0; ++} ++ ++static int store_info_in_meta( ++ ImageClass image_class, ++ char **extensions, ++ const char *meta_path, ++ const char *overlay_path, ++ const char *work_dir) { ++ ++ int r; ++ ++ assert(extensions); ++ assert(meta_path); ++ assert(overlay_path); ++ /* work_dir may be NULL */ ++ ++ r = write_extensions_file(image_class, extensions, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = write_dev_file(image_class, meta_path, overlay_path); ++ if (r < 0) ++ return r; ++ ++ r = write_work_dir_file(image_class, meta_path, work_dir); ++ if (r < 0) ++ return r; ++ + /* Make sure the top-level dir has an mtime marking the point we established the merge */ + if (utimensat(AT_FDCWD, meta_path, NULL, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(r, "Failed fix mtime of '%s': %m", meta_path); + ++ return 0; ++} ++ ++static int make_mounts_read_only(ImageClass image_class, const char *overlay_path, bool mutable) { ++ int r; ++ ++ assert(overlay_path); ++ ++ if (mutable) { ++ /* Bind mount the meta path as read-only on mutable overlays to avoid accidental ++ * modifications of the contents of meta directory, which could lead to systemd thinking that ++ * this hierarchy is not our mount. */ ++ _cleanup_free_ char *f = NULL; ++ ++ f = path_join(overlay_path, image_class_info[image_class].dot_directory_name); ++ if (!f) ++ return log_oom(); ++ ++ r = mount_nofollow_verbose(LOG_ERR, f, f, NULL, MS_BIND, NULL); ++ if (r < 0) ++ return r; ++ ++ r = bind_remount_one(f, MS_RDONLY, MS_RDONLY); ++ if (r < 0) ++ return log_error_errno(r, "Failed to remount '%s' as read-only: %m", f); ++ } else { ++ /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra ++ * turbo safety 😎 */ ++ r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); ++ if (r < 0) ++ return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); ++ } ++ ++ return 0; ++} ++ ++static int merge_hierarchy( ++ ImageClass image_class, ++ const char *hierarchy, ++ int noexec, ++ char **extensions, ++ char **paths, ++ const char *meta_path, ++ const char *overlay_path, ++ const char *workspace_path) { ++ ++ _cleanup_(overlayfs_paths_freep) OverlayFSPaths *op = NULL; ++ _cleanup_strv_free_ char **used_paths = NULL; ++ size_t extensions_used = 0; ++ int r; ++ ++ assert(hierarchy); ++ assert(extensions); ++ assert(paths); ++ assert(meta_path); ++ assert(overlay_path); ++ assert(workspace_path); ++ ++ r = determine_used_extensions(hierarchy, paths, &used_paths, &extensions_used); ++ if (r < 0) ++ return r; ++ ++ if (extensions_used == 0) /* No extension with files in this hierarchy? Then don't do anything. */ ++ return 0; ++ ++ r = overlayfs_paths_new(hierarchy, workspace_path, &op); ++ if (r < 0) ++ return r; ++ ++ r = determine_lower_dirs(op, used_paths, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = determine_upper_dir(op); ++ if (r < 0) ++ return r; ++ ++ r = determine_work_dir(op); ++ if (r < 0) ++ return r; ++ ++ r = mount_overlayfs_with_op(op, image_class, noexec, overlay_path, meta_path); ++ if (r < 0) ++ return r; ++ ++ r = store_info_in_meta(image_class, extensions, meta_path, overlay_path, op->work_dir); ++ if (r < 0) ++ return r; ++ ++ r = make_mounts_read_only(image_class, overlay_path, op->upper_dir && op->work_dir); ++ if (r < 0) ++ return r; ++ + return 1; + } + +@@ -908,7 +1700,7 @@ static int merge_subprocess( + + /* Create overlayfs mounts for all hierarchies */ + STRV_FOREACH(h, hierarchies) { +- _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL; ++ _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL, *merge_hierarchy_workspace = NULL; + + meta_path = path_join(workspace, "meta", *h); /* The place where to store metadata about this instance */ + if (!meta_path) +@@ -918,6 +1710,11 @@ static int merge_subprocess( + if (!overlay_path) + return log_oom(); + ++ /* Temporary directory for merge_hierarchy needs, like ephemeral directories. */ ++ merge_hierarchy_workspace = path_join(workspace, "mh_workspace", *h); ++ if (!merge_hierarchy_workspace) ++ return log_oom(); ++ + r = merge_hierarchy( + image_class, + *h, +@@ -925,7 +1722,8 @@ static int merge_subprocess( + extensions, + paths, + meta_path, +- overlay_path); ++ overlay_path, ++ merge_hierarchy_workspace); + if (r < 0) + return r; + } +@@ -954,7 +1752,8 @@ static int merge_subprocess( + if (r < 0) + return log_error_errno(r, "Failed to create hierarchy mount point '%s': %m", resolved); + +- r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND, NULL); ++ /* Using MS_REC to potentially bring in our read-only bind mount of metadata. */ ++ r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + +@@ -992,9 +1791,10 @@ static int merge(ImageClass image_class, + r = wait_for_terminate_and_check("(sd-merge)", pid, WAIT_LOG_ABNORMAL); + if (r < 0) + return r; +- + if (r == 123) /* exit code 123 means: didn't do anything */ + return 0; ++ if (r > 0) ++ return log_error_errno(SYNTHETIC_ERRNO(EPROTO), "Failed to merge hierarchies"); + + r = need_reload(image_class, hierarchies, no_reload); + if (r < 0) +@@ -1373,6 +2173,8 @@ static int verb_help(int argc, char **argv, void *userdata) { + " -h --help Show this help\n" + " --version Show package version\n" + "\n%3$sOptions:%4$s\n" ++ " --mutable=yes|no|auto|import|ephemeral|ephemeral-import\n" ++ " Specify a mutability mode of the merged hierarchy\n" + " --no-pager Do not pipe output into a pager\n" + " --no-legend Do not show the headers and footers\n" + " --root=PATH Operate relative to root path\n" +@@ -1406,6 +2208,7 @@ static int parse_argv(int argc, char *argv[]) { + ARG_IMAGE_POLICY, + ARG_NOEXEC, + ARG_NO_RELOAD, ++ ARG_MUTABLE, + }; + + static const struct option options[] = { +@@ -1419,6 +2222,7 @@ static int parse_argv(int argc, char *argv[]) { + { "image-policy", required_argument, NULL, ARG_IMAGE_POLICY }, + { "noexec", required_argument, NULL, ARG_NOEXEC }, + { "no-reload", no_argument, NULL, ARG_NO_RELOAD }, ++ { "mutable", required_argument, NULL, ARG_MUTABLE }, + {} + }; + +@@ -1482,6 +2286,13 @@ static int parse_argv(int argc, char *argv[]) { + arg_no_reload = true; + break; + ++ case ARG_MUTABLE: ++ r = parse_mutable_mode(optarg); ++ if (r < 0) ++ return log_error_errno(r, "Failed to parse argument to --mutable=: %s", optarg); ++ arg_mutable = r; ++ break; ++ + case '?': + return -EINVAL; + +@@ -1514,12 +2325,23 @@ static int sysext_main(int argc, char *argv[]) { + } + + static int run(int argc, char *argv[]) { ++ const char *env_var; + int r; + + log_setup(); + + arg_image_class = invoked_as(argv, "systemd-confext") ? IMAGE_CONFEXT : IMAGE_SYSEXT; + ++ env_var = getenv(image_class_info[arg_image_class].mode_env); ++ if (env_var) { ++ r = parse_mutable_mode(env_var); ++ if (r < 0) ++ log_warning("Failed to parse %s environment variable value '%s'. Ignoring.", ++ image_class_info[arg_image_class].mode_env, env_var); ++ else ++ arg_mutable = r; ++ } ++ + r = parse_argv(argc, argv); + if (r <= 0) + return r; +-- +2.34.1 + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/254-PrivateDevices-userdbd.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/254-PrivateDevices-userdbd.patch deleted file mode 100644 index 115c831c275..00000000000 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/254-PrivateDevices-userdbd.patch +++ /dev/null @@ -1,242 +0,0 @@ -https://bugs.gentoo.org/920331 -https://github.com/systemd/systemd/issues/30535 - -From 4a9e03aa6bb2cbd23dac00f2b2a7642cc79eaade Mon Sep 17 00:00:00 2001 -From: Daan De Meyer -Date: Wed, 27 Sep 2023 11:55:59 +0200 -Subject: [PATCH 1/2] core: Make private /dev read-only after populating it - ---- - src/core/namespace.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/src/core/namespace.c b/src/core/namespace.c -index e2304f5d066da..d1153f7690140 100644 ---- a/src/core/namespace.c -+++ b/src/core/namespace.c -@@ -995,6 +995,11 @@ static int mount_private_dev(MountEntry *m) { - if (r < 0) - log_debug_errno(r, "Failed to set up basic device tree at '%s', ignoring: %m", temporary_mount); - -+ /* Make the bind mount read-only. */ -+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, dev, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL); -+ if (r < 0) -+ return r; -+ - /* Create the /dev directory if missing. It is more likely to be missing when the service is started - * with RootDirectory. This is consistent with mount units creating the mount points when missing. */ - (void) mkdir_p_label(mount_entry_path(m), 0755); - -From cd7f3702eb47c82a50bf74c2b7c15c2e4e1f5c79 Mon Sep 17 00:00:00 2001 -From: Daan De Meyer -Date: Wed, 27 Sep 2023 10:52:50 +0200 -Subject: [PATCH 2/2] core: Use a subdirectory of /run/ for PrivateDevices= - -When we're starting early boot services such as systemd-userdbd.service, -/tmp might not yet be mounted, so let's use a directory in /run instead -which is guaranteed to be available. ---- - src/core/execute.c | 1 + - src/core/namespace.c | 61 +++++++++++++++++++++++++++++---------- - src/core/namespace.h | 2 ++ - src/test/test-namespace.c | 1 + - src/test/test-ns.c | 1 + - 5 files changed, 50 insertions(+), 16 deletions(-) - -diff --git a/src/core/execute.c b/src/core/execute.c -index a52df64d01081..89c3868d55f6c 100644 ---- a/src/core/execute.c -+++ b/src/core/execute.c -@@ -3307,6 +3307,7 @@ static int apply_mount_namespace( - extension_dir, - root_dir || root_image ? params->notify_socket : NULL, - host_os_release_stage, -+ params->runtime_scope, - error_path); - - /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports -diff --git a/src/core/namespace.c b/src/core/namespace.c -index d1153f7690140..a0471ac8884bf 100644 ---- a/src/core/namespace.c -+++ b/src/core/namespace.c -@@ -909,7 +909,19 @@ static int clone_device_node( - return 0; - } - --static int mount_private_dev(MountEntry *m) { -+static char *settle_runtime_dir(RuntimeScope scope) { -+ char *runtime_dir; -+ -+ if (scope != RUNTIME_SCOPE_USER) -+ return strdup("/run/"); -+ -+ if (asprintf(&runtime_dir, "/run/user/" UID_FMT, geteuid()) < 0) -+ return NULL; -+ -+ return runtime_dir; -+} -+ -+static int mount_private_dev(MountEntry *m, RuntimeScope scope) { - static const char devnodes[] = - "/dev/null\0" - "/dev/zero\0" -@@ -918,13 +930,21 @@ static int mount_private_dev(MountEntry *m) { - "/dev/urandom\0" - "/dev/tty\0"; - -- char temporary_mount[] = "/tmp/namespace-dev-XXXXXX"; -+ _cleanup_free_ char *runtime_dir = NULL, *temporary_mount = NULL; - const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL; - bool can_mknod = true; - int r; - - assert(m); - -+ runtime_dir = settle_runtime_dir(scope); -+ if (!runtime_dir) -+ return log_oom_debug(); -+ -+ temporary_mount = path_join(runtime_dir, "systemd/namespace-dev-XXXXXX"); -+ if (!temporary_mount) -+ return log_oom_debug(); -+ - if (!mkdtemp(temporary_mount)) - return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount); - -@@ -1364,7 +1384,8 @@ static int apply_one_mount( - MountEntry *m, - const ImagePolicy *mount_image_policy, - const ImagePolicy *extension_image_policy, -- const NamespaceInfo *ns_info) { -+ const NamespaceInfo *ns_info, -+ RuntimeScope scope) { - - _cleanup_free_ char *inaccessible = NULL; - bool rbind = true, make = false; -@@ -1379,8 +1400,7 @@ static int apply_one_mount( - switch (m->mode) { - - case INACCESSIBLE: { -- _cleanup_free_ char *tmp = NULL; -- const char *runtime_dir; -+ _cleanup_free_ char *runtime_dir = NULL; - struct stat target; - - /* First, get rid of everything that is below if there -@@ -1396,14 +1416,14 @@ static int apply_one_mount( - mount_entry_path(m)); - } - -- if (geteuid() == 0) -- runtime_dir = "/run"; -- else { -- if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0) -- return -ENOMEM; -- -- runtime_dir = tmp; -- } -+ /* We don't pass the literal runtime scope through here but one based purely on our UID. This -+ * means that the root user's --user services will use the host's inaccessible inodes rather -+ * then root's private ones. This is preferable since it means device nodes that are -+ * overmounted to make them inaccessible will be overmounted with a device node, rather than -+ * an AF_UNIX socket inode. */ -+ runtime_dir = settle_runtime_dir(geteuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER); -+ if (!runtime_dir) -+ return log_oom_debug(); - - r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible); - if (r < 0) -@@ -1523,7 +1543,7 @@ static int apply_one_mount( - break; - - case PRIVATE_DEV: -- return mount_private_dev(m); -+ return mount_private_dev(m, scope); - - case BIND_DEV: - return mount_bind_dev(m); -@@ -1824,6 +1844,7 @@ static int apply_mounts( - const NamespaceInfo *ns_info, - MountEntry *mounts, - size_t *n_mounts, -+ RuntimeScope scope, - char **symlinks, - char **error_path) { - -@@ -1875,7 +1896,7 @@ static int apply_mounts( - break; - } - -- r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info); -+ r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info, scope); - if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); -@@ -2030,6 +2051,7 @@ int setup_namespace( - const char *extension_dir, - const char *notify_socket, - const char *host_os_release_stage, -+ RuntimeScope scope, - char **error_path) { - - _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; -@@ -2490,7 +2512,14 @@ int setup_namespace( - (void) base_filesystem_create(root, UID_INVALID, GID_INVALID); - - /* Now make the magic happen */ -- r = apply_mounts(root, mount_image_policy, extension_image_policy, ns_info, mounts, &n_mounts, symlinks, error_path); -+ r = apply_mounts(root, -+ mount_image_policy, -+ extension_image_policy, -+ ns_info, -+ mounts, &n_mounts, -+ scope, -+ symlinks, -+ error_path); - if (r < 0) - goto finish; - -diff --git a/src/core/namespace.h b/src/core/namespace.h -index b6132154c5132..581403d89826d 100644 ---- a/src/core/namespace.h -+++ b/src/core/namespace.h -@@ -16,6 +16,7 @@ typedef struct MountImage MountImage; - #include "fs-util.h" - #include "macro.h" - #include "namespace-util.h" -+#include "runtime-scope.h" - #include "string-util.h" - - typedef enum ProtectHome { -@@ -134,6 +135,7 @@ int setup_namespace( - const char *extension_dir, - const char *notify_socket, - const char *host_os_release_stage, -+ RuntimeScope scope, - char **error_path); - - #define RUN_SYSTEMD_EMPTY "/run/systemd/empty" -diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c -index 25aafc35ca837..42ac65d08c87a 100644 ---- a/src/test/test-namespace.c -+++ b/src/test/test-namespace.c -@@ -206,6 +206,7 @@ TEST(protect_kernel_logs) { - NULL, - NULL, - NULL, -+ RUNTIME_SCOPE_SYSTEM, - NULL); - assert_se(r == 0); - -diff --git a/src/test/test-ns.c b/src/test/test-ns.c -index 77afd2f6b9eb8..eb3afed9e1c66 100644 ---- a/src/test/test-ns.c -+++ b/src/test/test-ns.c -@@ -108,6 +108,7 @@ int main(int argc, char *argv[]) { - NULL, - NULL, - NULL, -+ RUNTIME_SCOPE_SYSTEM, - NULL); - if (r < 0) { - log_error_errno(r, "Failed to set up namespace: %m"); diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/255-install-format-overflow.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/255-install-format-overflow.patch new file mode 100644 index 00000000000..3dca7d8e8ec --- /dev/null +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/255-install-format-overflow.patch @@ -0,0 +1,43 @@ +https://github.com/systemd/systemd-stable/commit/f85d2c6d1023b1fe558142440b1d63c4fc5f7c98 +https://github.com/systemd/systemd/issues/30448 +https://bugs.gentoo.org/916518 + +From f85d2c6d1023b1fe558142440b1d63c4fc5f7c98 Mon Sep 17 00:00:00 2001 +From: Luca Boccassi +Date: Sat, 24 Feb 2024 12:05:44 +0000 +Subject: [PATCH] install: fix compiler warning about empty directive argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On ppc64el with gcc 13.2 on Ubuntu 24.04: + +3s In file included from ../src/basic/macro.h:386, +483s from ../src/basic/alloc-util.h:10, +483s from ../src/shared/install.c:12: +483s ../src/shared/install.c: In function ‘install_changes_dump’: +483s ../src/shared/install.c:432:64: error: ‘%s’ directive argument is null [-Werror=format-overflow=] +483s 432 | err = log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", +483s | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +483s ../src/shared/install.c:432:75: note: format string is defined here +483s 432 | err = log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", + +(cherry picked from commit 8040fa55a1cbc34dede3205a902095ecd26c21e3) +--- a/src/shared/install.c ++++ b/src/shared/install.c +@@ -340,9 +340,12 @@ void install_changes_dump(int r, const char *verb, const InstallChange *changes, + assert(verb || r >= 0); + + for (size_t i = 0; i < n_changes; i++) { +- if (changes[i].type < 0) +- assert(verb); + assert(changes[i].path); ++ /* This tries to tell the compiler that it's safe to use 'verb' in a string format if there ++ * was an error, but the compiler doesn't care and fails anyway, so strna(verb) is used ++ * too. */ ++ assert(verb || changes[i].type >= 0); ++ verb = strna(verb); + + /* When making changes here, make sure to also change install_error() in dbus-manager.c. */ + + diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-253-initrd-generators.patch b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-253-initrd-generators.patch deleted file mode 100644 index 60e7b29d7a1..00000000000 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/files/systemd-253-initrd-generators.patch +++ /dev/null @@ -1,34 +0,0 @@ -https://bugs.gentoo.org/896364 - -Workaround for bug in sys-kernel/dracut. - -From 6b25470ee28843a49c50442e9d8a98edc842ceca Mon Sep 17 00:00:00 2001 -From: Yu Watanabe -Date: Mon, 20 Feb 2023 12:00:30 +0900 -Subject: [PATCH] core/manager: run generators directly when we are in initrd - -Some initrd system write files at ourside of /run, /etc, or other -allowed places. This is a kind of workaround, but in most cases, such -sandboxing is not necessary as the filesystem is on ramfs when we are in -initrd. - -Fixes #26488. ---- - src/core/manager.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/core/manager.c b/src/core/manager.c -index 7b394794b0d4..306477c6e6c2 100644 ---- a/src/core/manager.c -+++ b/src/core/manager.c -@@ -3822,8 +3822,8 @@ static int manager_run_generators(Manager *m) { - /* If we are the system manager, we fork and invoke the generators in a sanitized mount namespace. If - * we are the user manager, let's just execute the generators directly. We might not have the - * necessary privileges, and the system manager has already mounted /tmp/ and everything else for us. -- */ -- if (MANAGER_IS_USER(m)) { -+ * If we are in initrd, let's also execute the generators directly, as we are in ramfs. */ -+ if (MANAGER_IS_USER(m) || in_initrd()) { - r = manager_execute_generators(m, paths, /* remount_ro= */ false); - goto finish; - } diff --git a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.3.ebuild b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.4.ebuild similarity index 94% rename from sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.3.ebuild rename to sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.4.ebuild index 5ac6f2b40f8..0a4840ed3f3 100644 --- a/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.3.ebuild +++ b/sdk_container/src/third_party/coreos-overlay/sys-apps/systemd/systemd-255.4.ebuild @@ -24,8 +24,10 @@ else S=${WORKDIR}/${MY_P} SRC_URI="https://github.com/systemd/${MY_PN}/archive/v${MY_PV}/${MY_P}.tar.gz" - # Flatcar: mark as stable - KEYWORDS="~alpha amd64 ~arm arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86" + if [[ ${PV} != *rc* ]] ; then + # Flatcar: mark as stable + KEYWORDS="~alpha amd64 ~arm arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86" + fi fi inherit bash-completion-r1 linux-info meson-multilib optfeature pam python-single-r1 @@ -105,6 +107,9 @@ PEFILE_DEPEND='dev-python/pefile[${PYTHON_USEDEP}]' # flag). The image stage fails with "Failed to resolve # typeattributeset statement at # /var/lib/selinux/mcs/tmp/modules/400/ntp/cil:120" +# +# Flatcar: Added a dep on sys-apps/kbd. It provides a loadkeys binary +# needed by dracut's systemd-vconsole-setup module. RDEPEND="${COMMON_DEPEND} >=acct-group/adm-0-r1 >=acct-group/wheel-0-r1 @@ -188,6 +193,12 @@ QA_FLAGS_IGNORED="usr/lib/systemd/boot/efi/.*" QA_EXECSTACK="usr/lib/systemd/boot/efi/*" pkg_pretend() { + # Flatcar: We keep using split-usr for SDK. + # if use split-usr; then + # eerror "Please complete the migration to merged-usr." + # eerror "https://wiki.gentoo.org/wiki/Merge-usr" + # die "systemd no longer supports split-usr" + # fi if [[ ${MERGE_TYPE} != buildonly ]]; then if use test && has pid-sandbox ${FEATURES}; then ewarn "Tests are known to fail with PID sandboxing enabled." @@ -247,6 +258,7 @@ src_unpack() { src_prepare() { local PATCHES=( + "${FILESDIR}"/255-install-format-overflow.patch # Flatcar: Adding our own patches here. "${FILESDIR}/0001-wait-online-set-any-by-default.patch" "${FILESDIR}/0002-networkd-default-to-kernel-IPForwarding-setting.patch" @@ -255,6 +267,8 @@ src_prepare() { "${FILESDIR}/0005-systemd-Disable-SELinux-permissions-checks.patch" "${FILESDIR}/0006-Revert-getty-Pass-tty-to-use-by-agetty-via-stdin.patch" "${FILESDIR}/0007-units-Keep-using-old-journal-file-format.patch" + # Flatcar: This can be dropped when updating to 256. + "${FILESDIR}/0008-sysext-Mutable-overlays.patch" ) if ! use vanilla; then @@ -297,14 +311,18 @@ src_configure() { get_rootprefix() { usex split-usr "${EPREFIX:-/}" "${EPREFIX}/usr" } + multilib_src_configure() { local myconf=( --localstatedir="${EPREFIX}/var" + # default is developer, bug 918671 + -Dmode=release # Flatcar: Point to our user mailing list. -Dsupport-url="https://groups.google.com/forum/#!forum/flatcar-linux-user" -Dpamlibdir="$(getpam_mod_dir)" # avoid bash-completion dep -Dbashcompletiondir="$(get_bashcompdir)" + # Flatcar: We keep using split-usr in SDK. $(meson_use split-usr) # Flatcar: Always set split-bin to true, we always # have separate bin and sbin directories @@ -312,6 +330,9 @@ multilib_src_configure() { # Flatcar: Use get_rootprefix. No functional change # from upstream, just refactoring the common code used # in some places. + # + # TODO: Drop -Drootprefix and -Drootlibdir we get rid + # of split-usr in SDK -Drootprefix="$(get_rootprefix)" -Drootlibdir="${EPREFIX}/usr/$(get_libdir)" # Disable compatibility with sysvinit @@ -322,6 +343,9 @@ multilib_src_configure() { # no deps -Dima=true -Ddefault-hierarchy=$(usex cgroup-hybrid hybrid unified) + # Match /etc/shells, bug 919749 + -Ddebug-shell="${EPREFIX}/bin/sh" + -Ddefault-user-shell="${EPREFIX}/bin/bash" # Optional components/dependencies $(meson_native_use_bool acl) $(meson_native_use_bool apparmor) @@ -436,9 +460,6 @@ multilib_src_test() { } multilib_src_install_all() { - # Flatcar: We always have bin separate from sbin - # local sbin=$(usex split-usr sbin bin) - local sbin='sbin' # meson doesn't know about docdir mv "${ED}"/usr/share/doc/{systemd,${PF}} || die @@ -480,7 +501,7 @@ multilib_src_install_all() { # keepdir /var/log/journal # if use pam; then - # newpamd "${FILESDIR}"/systemd-user.pam systemd-user + # newpamd "${FILESDIR}"/systemd-user.pam systemd-user # fi if use kernel-install; then @@ -651,6 +672,8 @@ pkg_preinst() { dosym ../../../etc/sysctl.conf /usr/lib/sysctl.d/99-sysctl.conf fi + # Flatcar: This used to be in upstream ebuild, but now it's + # gone. We should drop it once we get rid of split-usr in SDK. if ! use split-usr; then local dir # Flatcar: We still use separate bin and sbin, so drop usr/sbin from the list. @@ -713,11 +736,11 @@ pkg_postinst() { fi if use boot; then - optfeature "automatically installing the kernels in systemd-boot's native layout and updating the bootloader configuration" \ + optfeature "installing kernels in systemd-boot's native layout and update loader entries" \ "sys-kernel/installkernel[systemd-boot]" fi if use ukify; then - optfeature "automatically generating an unified kernel image on each kernel installation" \ + optfeature "generating unified kernel image on each kernel installation" \ "sys-kernel/installkernel[ukify]" fi }