bootc_internal_mount/
mount.rs

1//! Helpers for interacting with mountpoints
2
3use std::{
4    fs,
5    mem::MaybeUninit,
6    os::fd::{AsFd, OwnedFd},
7    process::Command,
8};
9
10use anyhow::{Context, Result, anyhow};
11use bootc_utils::CommandRunExt;
12use camino::Utf8Path;
13use cap_std_ext::{cap_std::fs::Dir, cmdext::CapStdExtCommandExt};
14use fn_error_context::context;
15use rustix::{
16    mount::{MoveMountFlags, OpenTreeFlags},
17    net::{
18        AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
19        SocketFlags, SocketType,
20    },
21    process::WaitOptions,
22    thread::Pid,
23};
24use serde::Deserialize;
25
26/// Temporary mount management with automatic cleanup.
27pub mod tempmount;
28
29/// Well known identifier for pid 1
30pub const PID1: Pid = const {
31    match Pid::from_raw(1) {
32        Some(v) => v,
33        None => panic!("Expected to parse pid1"),
34    }
35};
36
37/// Deserialized information about a mounted filesystem from `findmnt`.
38#[derive(Deserialize, Debug)]
39#[serde(rename_all = "kebab-case")]
40#[allow(dead_code)]
41pub struct Filesystem {
42    // Note if you add an entry to this list, you need to change the --output invocation below too
43    /// The source device or path.
44    pub source: String,
45    /// The mount target path.
46    pub target: String,
47    /// Major:minor device numbers.
48    #[serde(rename = "maj:min")]
49    pub maj_min: String,
50    /// The filesystem type (e.g. ext4, xfs).
51    pub fstype: String,
52    /// Mount options.
53    pub options: String,
54    /// The filesystem UUID, if available.
55    pub uuid: Option<String>,
56    /// Child filesystems, if any.
57    pub children: Option<Vec<Filesystem>>,
58}
59
60/// Deserialized output of `findmnt --json`.
61#[derive(Deserialize, Debug, Default)]
62pub struct Findmnt {
63    /// The list of mounted filesystems.
64    pub filesystems: Vec<Filesystem>,
65}
66
67/// Run `findmnt` with JSON output and parse the result.
68pub fn run_findmnt(args: &[&str], cwd: Option<&Dir>, path: Option<&str>) -> Result<Findmnt> {
69    let mut cmd = Command::new("findmnt");
70    if let Some(cwd) = cwd {
71        cmd.cwd_dir(cwd.try_clone()?);
72    }
73    cmd.args([
74        "-J",
75        "-v",
76        // If you change this you probably also want to change the Filesystem struct above
77        "--output=SOURCE,TARGET,MAJ:MIN,FSTYPE,OPTIONS,UUID",
78    ])
79    .args(args)
80    .args(path);
81    let o: Findmnt = cmd.log_debug().run_and_parse_json()?;
82    Ok(o)
83}
84
85// Retrieve a mounted filesystem from a device given a matching path
86fn findmnt_filesystem(args: &[&str], cwd: Option<&Dir>, path: &str) -> Result<Filesystem> {
87    let o = run_findmnt(args, cwd, Some(path))?;
88    o.filesystems
89        .into_iter()
90        .next()
91        .ok_or_else(|| anyhow!("findmnt returned no data for {path}"))
92}
93
94#[context("Inspecting filesystem {path}")]
95/// Inspect a target which must be a mountpoint root - it is an error
96/// if the target is not the mount root.
97pub fn inspect_filesystem(path: &Utf8Path) -> Result<Filesystem> {
98    findmnt_filesystem(&["--mountpoint"], None, path.as_str())
99}
100
101#[context("Inspecting filesystem")]
102/// Inspect a target which must be a mountpoint root - it is an error
103/// if the target is not the mount root.
104pub fn inspect_filesystem_of_dir(d: &Dir) -> Result<Filesystem> {
105    findmnt_filesystem(&["--mountpoint"], Some(d), ".")
106}
107
108#[context("Inspecting filesystem by UUID {uuid}")]
109/// Inspect a filesystem by partition UUID
110pub fn inspect_filesystem_by_uuid(uuid: &str) -> Result<Filesystem> {
111    findmnt_filesystem(&["--source"], None, &(format!("UUID={uuid}")))
112}
113
114/// Check if a specified device contains an already mounted filesystem
115/// in the root mount namespace.
116pub fn is_mounted_in_pid1_mountns(path: &str) -> Result<bool> {
117    let o = run_findmnt(&["-N"], None, Some("1"))?;
118
119    let mounted = o.filesystems.iter().any(|fs| is_source_mounted(path, fs));
120
121    Ok(mounted)
122}
123
124/// Recursively check a given filesystem to see if it contains an already mounted source.
125pub fn is_source_mounted(path: &str, mounted_fs: &Filesystem) -> bool {
126    if mounted_fs.source.contains(path) {
127        return true;
128    }
129
130    if let Some(ref children) = mounted_fs.children {
131        for child in children {
132            if is_source_mounted(path, child) {
133                return true;
134            }
135        }
136    }
137
138    false
139}
140
141/// Mount a device to the target path.
142pub fn mount(dev: &str, target: &Utf8Path) -> Result<()> {
143    Command::new("mount")
144        .args([dev, target.as_str()])
145        .run_inherited_with_cmd_context()
146}
147
148/// If the fsid of the passed path matches the fsid of the same path rooted
149/// at /proc/1/root, it is assumed that these are indeed the same mounted
150/// filesystem between container and host.
151/// Path should be absolute.
152#[context("Comparing filesystems at {path} and /proc/1/root/{path}")]
153pub fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
154    // Add a leading '/' in case a relative path is passed
155    let path = Utf8Path::new("/").join(path);
156
157    // Using statvfs instead of fs, since rustix will translate the fsid field
158    // for us.
159    let devstat = rustix::fs::statvfs(path.as_std_path())?;
160    let hostpath = Utf8Path::new("/proc/1/root").join(path.strip_prefix("/")?);
161    let hostdevstat = rustix::fs::statvfs(hostpath.as_std_path())?;
162    tracing::trace!(
163        "base mount id {:?}, host mount id {:?}",
164        devstat.f_fsid,
165        hostdevstat.f_fsid
166    );
167    Ok(devstat.f_fsid == hostdevstat.f_fsid)
168}
169
170/// Given a pid, enter its mount namespace and acquire a file descriptor
171/// for a mount from that namespace.
172#[allow(unsafe_code)]
173#[context("Opening mount tree from pid")]
174pub fn open_tree_from_pidns(
175    pid: rustix::process::Pid,
176    path: &Utf8Path,
177    recursive: bool,
178) -> Result<OwnedFd> {
179    // Allocate a socket pair to use for sending file descriptors.
180    let (sock_parent, sock_child) = rustix::net::socketpair(
181        AddressFamily::UNIX,
182        SocketType::STREAM,
183        SocketFlags::CLOEXEC,
184        None,
185    )
186    .context("socketpair")?;
187    const DUMMY_DATA: &[u8] = b"!";
188    match unsafe { libc::fork() } {
189        0 => {
190            // We're in the child. At this point we know we don't have multiple threads, so we
191            // can safely `setns`.
192
193            drop(sock_parent);
194
195            // Open up the namespace of the target process as a file descriptor, and enter it.
196            let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
197            rustix::thread::move_into_link_name_space(
198                pidlink.as_fd(),
199                Some(rustix::thread::LinkNameSpaceType::Mount),
200            )
201            .context("setns")?;
202
203            // Open the target mount path as a file descriptor.
204            let recursive = if recursive {
205                OpenTreeFlags::AT_RECURSIVE
206            } else {
207                OpenTreeFlags::empty()
208            };
209            let fd = rustix::mount::open_tree(
210                rustix::fs::CWD,
211                path.as_std_path(),
212                OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
213            )
214            .context("open_tree")?;
215
216            // And send that file descriptor via fd passing over the socketpair.
217            let fd = fd.as_fd();
218            let fds = [fd];
219            let mut buffer = [MaybeUninit::uninit(); rustix::cmsg_space!(ScmRights(1))];
220            let mut control = SendAncillaryBuffer::new(&mut buffer);
221            let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
222            assert!(pushed);
223            let ios = std::io::IoSlice::new(DUMMY_DATA);
224            rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
225            // Then we're done.
226            std::process::exit(0)
227        }
228        -1 => {
229            // fork failed
230            let e = std::io::Error::last_os_error();
231            anyhow::bail!("failed to fork: {e}");
232        }
233        n => {
234            // We're in the parent; create a pid (checking that n > 0).
235            let pid = rustix::process::Pid::from_raw(n).unwrap();
236            drop(sock_child);
237            // Receive the mount file descriptor from the child
238            let mut cmsg_space = vec![MaybeUninit::uninit(); rustix::cmsg_space!(ScmRights(1))];
239            let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
240            let mut buf = [0u8; DUMMY_DATA.len()];
241            let iov = std::io::IoSliceMut::new(buf.as_mut());
242            let mut iov = [iov];
243            let nread = rustix::net::recvmsg(
244                sock_parent,
245                &mut iov,
246                &mut cmsg_buffer,
247                RecvFlags::CMSG_CLOEXEC,
248            )
249            .context("recvmsg")?
250            .bytes;
251            anyhow::ensure!(nread == DUMMY_DATA.len());
252            assert_eq!(buf, DUMMY_DATA);
253            // And extract the file descriptor
254            let r = cmsg_buffer
255                .drain()
256                .filter_map(|m| match m {
257                    rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
258                    _ => None,
259                })
260                .flatten()
261                .next()
262                .ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
263            // SAFETY: Since we're not setting WNOHANG, this will always return Some().
264            let st = rustix::process::waitpid(Some(pid), WaitOptions::empty())?
265                .expect("Wait status")
266                .1;
267            if let Some(0) = st.exit_status() {
268                Ok(r)
269            } else {
270                anyhow::bail!("forked helper failed: {st:?}");
271            }
272        }
273    }
274}
275
276/// Create a bind mount from the mount namespace of the target pid
277/// into our mount namespace.
278pub fn bind_mount_from_pidns(
279    pid: Pid,
280    src: &Utf8Path,
281    target: &Utf8Path,
282    recursive: bool,
283) -> Result<()> {
284    let src = open_tree_from_pidns(pid, src, recursive)?;
285    rustix::mount::move_mount(
286        src.as_fd(),
287        "",
288        rustix::fs::CWD,
289        target.as_std_path(),
290        MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
291    )
292    .context("Moving mount")?;
293    Ok(())
294}
295
296/// If the target path is not already mirrored from the host (e.g. via `-v /dev:/dev`)
297/// then recursively mount it.
298pub fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
299    let path = path.as_ref();
300    // If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
301    // then create it now.
302    std::fs::create_dir_all(path)?;
303    if is_same_as_host(path)? {
304        tracing::debug!("Already mounted from host: {path}");
305        return Ok(());
306    }
307    tracing::debug!("Propagating host mount: {path}");
308    bind_mount_from_pidns(PID1, path, path, true)
309}