bootc_initramfs_setup/
lib.rs

1//! Mount helpers for bootc-initramfs
2
3use std::{
4    ffi::{CString, OsString},
5    fmt::Debug,
6    io::ErrorKind,
7    os::fd::{AsFd, AsRawFd, OwnedFd},
8    path::{Path, PathBuf},
9};
10
11use anyhow::{Context, Result};
12use cap_std_ext::cap_std::fs::Dir;
13use cap_std_ext::dirext::CapStdExtDirExt;
14use clap::Parser;
15use rustix::{
16    fs::{CWD, Mode, OFlags, major, minor, mkdirat, openat, stat, symlink},
17    io::Errno,
18    mount::{
19        FsMountFlags, MountAttrFlags, OpenTreeFlags, UnmountFlags, fsconfig_create,
20        fsconfig_set_string, fsmount, open_tree, unmount,
21    },
22    path,
23};
24
25use serde::Deserialize;
26
27use cfsctl::composefs;
28use cfsctl::composefs_boot;
29use composefs::{
30    fsverity::{FsVerityHashValue, Sha512HashValue},
31    mount::FsHandle,
32    mountcompat::{overlayfs_set_fd, overlayfs_set_lower_and_data_fds, prepare_mount},
33    repository::Repository,
34};
35use composefs_boot::cmdline::get_cmdline_composefs;
36
37use fn_error_context::context;
38
39use bootc_kernel_cmdline::utf8::Cmdline;
40
41// mount_setattr syscall support
42const MOUNT_ATTR_RDONLY: u64 = 0x00000001;
43
44#[repr(C)]
45struct MountAttr {
46    attr_set: u64,
47    attr_clr: u64,
48    propagation: u64,
49    userns_fd: u64,
50}
51
52/// Set mount attributes using mount_setattr syscall
53#[context("Setting mount attributes")]
54#[allow(unsafe_code)]
55fn mount_setattr(fd: impl AsFd, flags: libc::c_int, attr: &MountAttr) -> Result<()> {
56    let ret = unsafe {
57        libc::syscall(
58            libc::SYS_mount_setattr,
59            fd.as_fd().as_raw_fd(),
60            c"".as_ptr(),
61            flags,
62            attr as *const MountAttr,
63            std::mem::size_of::<MountAttr>(),
64        )
65    };
66    if ret == -1 {
67        Err(std::io::Error::last_os_error())?;
68    }
69    Ok(())
70}
71
72/// Set mount to readonly
73#[context("Setting mount readonly")]
74fn set_mount_readonly(fd: impl AsFd) -> Result<()> {
75    let attr = MountAttr {
76        attr_set: MOUNT_ATTR_RDONLY,
77        attr_clr: 0,
78        propagation: 0,
79        userns_fd: 0,
80    };
81    mount_setattr(fd, libc::AT_EMPTY_PATH, &attr)
82}
83
84/// Types of mounts supported by the configuration
85#[derive(Clone, Copy, Debug, Deserialize)]
86#[serde(rename_all = "lowercase")]
87pub enum MountType {
88    /// No mount
89    None,
90    /// Bind mount
91    Bind,
92    /// Overlay mount
93    Overlay,
94    /// Transient mount
95    Transient,
96}
97
98#[derive(Debug, Default, Deserialize)]
99struct RootConfig {
100    #[serde(default)]
101    transient: bool,
102}
103
104/// Configuration for mount operations
105#[derive(Debug, Default, Deserialize)]
106pub struct MountConfig {
107    /// The type of mount to use
108    pub mount: Option<MountType>,
109    #[serde(default)]
110    /// Whether this mount should be transient (temporary)
111    pub transient: bool,
112}
113
114#[derive(Deserialize, Default)]
115struct Config {
116    #[serde(default)]
117    etc: MountConfig,
118    #[serde(default)]
119    var: MountConfig,
120    #[serde(default)]
121    root: RootConfig,
122}
123
124/// Command-line arguments
125#[derive(Parser, Debug)]
126#[command(version)]
127pub struct Args {
128    #[arg(help = "Execute this command (for testing)")]
129    /// Execute this command (for testing)
130    pub cmd: Vec<OsString>,
131
132    #[arg(
133        long,
134        default_value = "/sysroot",
135        help = "sysroot directory in initramfs"
136    )]
137    /// sysroot directory in initramfs
138    pub sysroot: PathBuf,
139
140    #[arg(
141        long,
142        default_value = "/usr/lib/composefs/setup-root-conf.toml",
143        help = "Config path (for testing)"
144    )]
145    /// Config path (for testing)
146    pub config: PathBuf,
147
148    // we want to test in a userns, but can't mount erofs there
149    #[arg(long, help = "Bind mount root-fs from (for testing)")]
150    /// Bind mount root-fs from (for testing)
151    pub root_fs: Option<PathBuf>,
152
153    #[arg(long, help = "Kernel commandline args (for testing)")]
154    /// Kernel commandline args (for testing)
155    pub cmdline: Option<Cmdline<'static>>,
156
157    #[arg(long, help = "Mountpoint (don't replace sysroot, for testing)")]
158    /// Mountpoint (don't replace sysroot, for testing)
159    pub target: Option<PathBuf>,
160}
161
162/// Wrapper around [`composefs::mount::mount_at`]
163pub fn mount_at_wrapper(
164    fs_fd: impl AsFd,
165    dirfd: impl AsFd,
166    path: impl path::Arg + Debug + Clone,
167) -> Result<()> {
168    composefs::mount::mount_at(fs_fd, dirfd, path.clone())
169        .with_context(|| format!("Mounting at path {path:?}"))
170}
171
172/// Wrapper around [`rustix::fs::openat`]
173#[context("Opening dir {name:?}")]
174pub fn open_dir(dirfd: impl AsFd, name: impl AsRef<Path> + Debug) -> Result<OwnedFd> {
175    let res = openat(
176        dirfd,
177        name.as_ref(),
178        OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC,
179        Mode::empty(),
180    );
181
182    Ok(res?)
183}
184
185#[context("Ensure dir")]
186fn ensure_dir(dirfd: impl AsFd, name: &str, mode: Option<rustix::fs::Mode>) -> Result<OwnedFd> {
187    match mkdirat(dirfd.as_fd(), name, mode.unwrap_or(0o700.into())) {
188        Ok(()) | Err(Errno::EXIST) => {}
189        Err(err) => Err(err).with_context(|| format!("Creating dir {name}"))?,
190    }
191
192    open_dir(dirfd, name)
193}
194
195#[context("Bind mounting to path {path}")]
196fn bind_mount(fd: impl AsFd, path: &str) -> Result<OwnedFd> {
197    let res = open_tree(
198        fd.as_fd(),
199        path,
200        OpenTreeFlags::OPEN_TREE_CLONE
201            | OpenTreeFlags::OPEN_TREE_CLOEXEC
202            | OpenTreeFlags::AT_EMPTY_PATH,
203    );
204
205    Ok(res?)
206}
207
208/// Mount a tmpfs, inheriting the SELinux label from the base filesystem
209/// if provided. See <https://github.com/containers/bootc/issues/1992>.
210#[context("Mounting tmpfs for overlay")]
211fn mount_tmpfs_for_overlay(base: Option<impl AsFd>) -> Result<OwnedFd> {
212    let tmpfs = FsHandle::open("tmpfs")?;
213
214    if let Some(base_fd) = base {
215        let base_dir = Dir::reopen_dir(&base_fd.as_fd())?;
216        if let Some(label) = base_dir.getxattr(".", "security.selinux")? {
217            if let Ok(cstr) = CString::new(label) {
218                fsconfig_set_string(tmpfs.as_fd(), "rootcontext", &cstr)?;
219            }
220        }
221    }
222
223    fsconfig_create(tmpfs.as_fd())?;
224    Ok(fsmount(
225        tmpfs.as_fd(),
226        FsMountFlags::FSMOUNT_CLOEXEC,
227        MountAttrFlags::empty(),
228    )?)
229}
230
231#[context("Mounting state as overlay")]
232fn overlay_state(
233    base: impl AsFd,
234    state: impl AsFd,
235    source: &str,
236    mode: Option<rustix::fs::Mode>,
237    mount_attr_flags: Option<MountAttrFlags>,
238) -> Result<()> {
239    let upper = ensure_dir(state.as_fd(), "upper", mode)?;
240    let work = ensure_dir(state.as_fd(), "work", mode)?;
241
242    let overlayfs = FsHandle::open("overlay")?;
243    fsconfig_set_string(overlayfs.as_fd(), "source", source)?;
244    overlayfs_set_fd(overlayfs.as_fd(), "workdir", work.as_fd())?;
245    overlayfs_set_fd(overlayfs.as_fd(), "upperdir", upper.as_fd())?;
246    overlayfs_set_lower_and_data_fds(&overlayfs, base.as_fd(), None::<OwnedFd>)?;
247    fsconfig_create(overlayfs.as_fd())?;
248    let fs = fsmount(
249        overlayfs.as_fd(),
250        FsMountFlags::FSMOUNT_CLOEXEC,
251        mount_attr_flags.unwrap_or(MountAttrFlags::empty()),
252    )?;
253
254    mount_at_wrapper(fs, base, ".").context("Moving mount")
255}
256
257/// Mounts a transient overlayfs with passed in fd as the lowerdir.
258///
259/// The tmpfs used for the overlay upper layer inherits the SELinux label
260/// from the base filesystem to prevent label mismatches (see #1992).
261#[context("Mounting transient overlayfs")]
262pub fn overlay_transient(
263    base: impl AsFd,
264    mode: Option<rustix::fs::Mode>,
265    mount_attr_flags: Option<MountAttrFlags>,
266) -> Result<()> {
267    let tmpfs = mount_tmpfs_for_overlay(Some(&base))?;
268    overlay_state(
269        base,
270        prepare_mount(tmpfs)?,
271        "transient",
272        mode,
273        mount_attr_flags,
274    )
275}
276
277#[context("Opening rootfs")]
278fn open_root_fs(path: &Path) -> Result<OwnedFd> {
279    let rootfs = open_tree(
280        CWD,
281        path,
282        OpenTreeFlags::OPEN_TREE_CLONE | OpenTreeFlags::OPEN_TREE_CLOEXEC,
283    )?;
284
285    set_mount_readonly(&rootfs)?;
286
287    Ok(rootfs)
288}
289
290/// Prepares a floating mount for composefs and returns the fd
291///
292/// # Arguments
293/// * sysroot                - fd for /sysroot
294/// * name                   - Name of the EROFS image to be mounted
295/// * allow_missing_fsverity - Whether to allow mount without fsverity support
296#[context("Mounting composefs image")]
297pub fn mount_composefs_image(
298    sysroot: &OwnedFd,
299    name: &str,
300    allow_missing_fsverity: bool,
301) -> Result<OwnedFd> {
302    let mut repo = Repository::<Sha512HashValue>::open_path(sysroot, "composefs")?;
303    repo.set_insecure(allow_missing_fsverity);
304    let rootfs = repo
305        .mount(name)
306        .context("Failed to mount composefs image")?;
307
308    set_mount_readonly(&rootfs)?;
309
310    Ok(rootfs)
311}
312
313/// Mounts a subdirectory with the specified configuration
314#[context("Mounting subdirectory")]
315pub fn mount_subdir(
316    new_root: impl AsFd,
317    state: impl AsFd,
318    subdir: &str,
319    config: MountConfig,
320    default: MountType,
321) -> Result<()> {
322    let mount_type = match config.mount {
323        Some(mt) => mt,
324        None => match config.transient {
325            true => MountType::Transient,
326            false => default,
327        },
328    };
329
330    match mount_type {
331        MountType::None => Ok(()),
332        MountType::Bind => Ok(mount_at_wrapper(
333            bind_mount(&state, subdir)?,
334            &new_root,
335            subdir,
336        )?),
337        MountType::Overlay => overlay_state(
338            open_dir(&new_root, subdir)?,
339            open_dir(&state, subdir)?,
340            "overlay",
341            None,
342            None,
343        ),
344        MountType::Transient => overlay_transient(open_dir(&new_root, subdir)?, None, None),
345    }
346}
347
348#[context("GPT workaround")]
349/// Workaround for /dev/gpt-auto-root
350pub fn gpt_workaround() -> Result<()> {
351    // https://github.com/systemd/systemd/issues/35017
352    let rootdev = stat("/dev/gpt-auto-root");
353
354    let rootdev = match rootdev {
355        Ok(r) => r,
356        Err(e) if e.kind() == ErrorKind::NotFound => return Ok(()),
357        Err(e) => Err(e)?,
358    };
359
360    let target = format!(
361        "/dev/block/{}:{}",
362        major(rootdev.st_rdev),
363        minor(rootdev.st_rdev)
364    );
365    symlink(target, "/run/systemd/volatile-root")?;
366    Ok(())
367}
368
369/// Sets up /sysroot for switch-root
370#[context("Setting up /sysroot")]
371pub fn setup_root(args: Args) -> Result<()> {
372    let config = match std::fs::read_to_string(args.config) {
373        Ok(text) => toml::from_str(&text)?,
374        Err(err) if err.kind() == ErrorKind::NotFound => Config::default(),
375        Err(err) => Err(err)?,
376    };
377
378    let sysroot = open_dir(CWD, &args.sysroot)
379        .with_context(|| format!("Failed to open sysroot {:?}", args.sysroot))?;
380
381    let cmdline = args
382        .cmdline
383        .unwrap_or(Cmdline::from_proc().context("Failed to read cmdline")?);
384
385    let (image, insecure) = get_cmdline_composefs::<Sha512HashValue>(&cmdline)?;
386
387    let new_root = match args.root_fs {
388        Some(path) => open_root_fs(&path).context("Failed to clone specified root fs")?,
389        None => mount_composefs_image(&sysroot, &image.to_hex(), insecure)?,
390    };
391
392    // we need to clone this before the next step to make sure we get the old one
393    let sysroot_clone = bind_mount(&sysroot, "")?;
394
395    set_mount_readonly(&sysroot_clone)?;
396
397    let mount_target = args.target.unwrap_or(args.sysroot.clone());
398
399    // Ideally we build the new root filesystem together before we mount it, but that only works on
400    // 6.15 and later.  Before 6.15 we can't mount into a floating tree, so mount it first.  This
401    // will leave an abandoned clone of the sysroot mounted under it, but that's OK for now.
402    if cfg!(feature = "pre-6.15") {
403        mount_at_wrapper(&new_root, CWD, &mount_target)?;
404    }
405
406    if config.root.transient {
407        overlay_transient(&new_root, None, None)?;
408    }
409
410    match composefs::mount::mount_at(&sysroot_clone, &new_root, "sysroot") {
411        Ok(()) | Err(Errno::NOENT) => {}
412        Err(err) => Err(err)?,
413    }
414
415    // etc + var
416    let state = open_dir(open_dir(&sysroot, "state/deploy")?, image.to_hex())?;
417    mount_subdir(&new_root, &state, "etc", config.etc, MountType::Bind)?;
418    mount_subdir(&new_root, &state, "var", config.var, MountType::Bind)?;
419
420    if cfg!(not(feature = "pre-6.15")) {
421        // Replace the /sysroot with the new composed root filesystem
422        unmount(&args.sysroot, UnmountFlags::DETACH)?;
423        mount_at_wrapper(&new_root, CWD, &mount_target)?;
424    }
425
426    Ok(())
427}