bootc_internal_blockdev/
blockdev.rs

1use std::collections::HashMap;
2use std::env;
3use std::path::Path;
4use std::process::{Command, Stdio};
5use std::sync::OnceLock;
6
7use anyhow::{Context, Result, anyhow};
8use camino::{Utf8Path, Utf8PathBuf};
9use fn_error_context::context;
10use regex::Regex;
11use serde::Deserialize;
12
13use bootc_utils::CommandRunExt;
14
15/// EFI System Partition (ESP) on MBR
16/// Refer to <https://en.wikipedia.org/wiki/Partition_type>
17pub const ESP_ID_MBR: &[u8] = &[0x06, 0xEF];
18
19/// EFI System Partition (ESP) for UEFI boot on GPT
20pub const ESP: &str = "c12a7328-f81f-11d2-ba4b-00a0c93ec93b";
21
22#[derive(Debug, Deserialize)]
23struct DevicesOutput {
24    blockdevices: Vec<Device>,
25}
26
27#[allow(dead_code)]
28#[derive(Debug, Deserialize)]
29pub struct Device {
30    pub name: String,
31    pub serial: Option<String>,
32    pub model: Option<String>,
33    pub partlabel: Option<String>,
34    pub parttype: Option<String>,
35    pub partuuid: Option<String>,
36    /// Partition number (1-indexed). None for whole disk devices.
37    pub partn: Option<u32>,
38    pub children: Option<Vec<Device>>,
39    pub size: u64,
40    #[serde(rename = "maj:min")]
41    pub maj_min: Option<String>,
42    // NOTE this one is not available on older util-linux, and
43    // will also not exist for whole blockdevs (as opposed to partitions).
44    pub start: Option<u64>,
45
46    // Filesystem-related properties
47    pub label: Option<String>,
48    pub fstype: Option<String>,
49    pub uuid: Option<String>,
50    pub path: Option<String>,
51}
52
53impl Device {
54    #[allow(dead_code)]
55    // RHEL8's lsblk doesn't have PATH, so we do it
56    pub fn path(&self) -> String {
57        self.path.clone().unwrap_or(format!("/dev/{}", &self.name))
58    }
59
60    #[allow(dead_code)]
61    pub fn has_children(&self) -> bool {
62        self.children.as_ref().is_some_and(|v| !v.is_empty())
63    }
64
65    /// Read a sysfs property for this device and parse it as the target type.
66    fn read_sysfs_property<T>(&self, property: &str) -> Result<Option<T>>
67    where
68        T: std::str::FromStr,
69        T::Err: std::error::Error + Send + Sync + 'static,
70    {
71        let Some(majmin) = self.maj_min.as_deref() else {
72            return Ok(None);
73        };
74        let sysfs_path = format!("/sys/dev/block/{majmin}/{property}");
75        if !Utf8Path::new(&sysfs_path).try_exists()? {
76            return Ok(None);
77        }
78        let value = std::fs::read_to_string(&sysfs_path)
79            .with_context(|| format!("Reading {sysfs_path}"))?;
80        let parsed = value
81            .trim()
82            .parse()
83            .with_context(|| format!("Parsing sysfs {property} property"))?;
84        tracing::debug!("backfilled {property} to {value}");
85        Ok(Some(parsed))
86    }
87
88    /// Older versions of util-linux may be missing some properties. Backfill them if they're missing.
89    pub fn backfill_missing(&mut self) -> Result<()> {
90        // The "start" parameter was only added in a version of util-linux that's only
91        // in Fedora 40 as of this writing.
92        if self.start.is_none() {
93            self.start = self.read_sysfs_property("start")?;
94        }
95        // The "partn" column was added in util-linux 2.39, which is newer than
96        // what CentOS 9 / RHEL 9 ship (2.37). Note: sysfs uses "partition" not "partn".
97        if self.partn.is_none() {
98            self.partn = self.read_sysfs_property("partition")?;
99        }
100        // Recurse to child devices
101        for child in self.children.iter_mut().flatten() {
102            child.backfill_missing()?;
103        }
104        Ok(())
105    }
106}
107
108#[context("Listing device {dev}")]
109pub fn list_dev(dev: &Utf8Path) -> Result<Device> {
110    let mut devs: DevicesOutput = Command::new("lsblk")
111        .args(["-J", "-b", "-O"])
112        .arg(dev)
113        .log_debug()
114        .run_and_parse_json()?;
115    for dev in devs.blockdevices.iter_mut() {
116        dev.backfill_missing()?;
117    }
118    devs.blockdevices
119        .into_iter()
120        .next()
121        .ok_or_else(|| anyhow!("no device output from lsblk for {dev}"))
122}
123
124#[derive(Debug, Deserialize)]
125struct SfDiskOutput {
126    partitiontable: PartitionTable,
127}
128
129#[derive(Debug, Deserialize)]
130#[allow(dead_code)]
131pub struct Partition {
132    pub node: String,
133    pub start: u64,
134    pub size: u64,
135    #[serde(rename = "type")]
136    pub parttype: String,
137    pub uuid: Option<String>,
138    pub name: Option<String>,
139    pub bootable: Option<bool>,
140}
141
142#[derive(Debug, Deserialize, PartialEq, Eq)]
143#[serde(rename_all = "kebab-case")]
144pub enum PartitionType {
145    Dos,
146    Gpt,
147    Unknown(String),
148}
149
150#[derive(Debug, Deserialize)]
151#[allow(dead_code)]
152pub struct PartitionTable {
153    pub label: PartitionType,
154    pub id: String,
155    pub device: String,
156    // We're not using these fields
157    // pub unit: String,
158    // pub firstlba: u64,
159    // pub lastlba: u64,
160    // pub sectorsize: u64,
161    pub partitions: Vec<Partition>,
162}
163
164impl PartitionTable {
165    /// Find the partition with the given device name
166    #[allow(dead_code)]
167    pub fn find<'a>(&'a self, devname: &str) -> Option<&'a Partition> {
168        self.partitions.iter().find(|p| p.node.as_str() == devname)
169    }
170
171    pub fn path(&self) -> &Utf8Path {
172        self.device.as_str().into()
173    }
174
175    // Find the partition with the given offset (starting at 1)
176    #[allow(dead_code)]
177    pub fn find_partno(&self, partno: u32) -> Result<&Partition> {
178        let r = self
179            .partitions
180            .get(partno.checked_sub(1).expect("1 based partition offset") as usize)
181            .ok_or_else(|| anyhow::anyhow!("Missing partition for index {partno}"))?;
182        Ok(r)
183    }
184
185    /// Find the partition with the given type UUID (case-insensitive).
186    ///
187    /// Partition type UUIDs are compared case-insensitively per the GPT specification,
188    /// as different tools may report them in different cases.
189    pub fn find_partition_of_type(&self, uuid: &str) -> Option<&Partition> {
190        self.partitions.iter().find(|p| p.parttype_matches(uuid))
191    }
192
193    /// Find the partition with bootable is 'true'.
194    pub fn find_partition_of_bootable(&self) -> Option<&Partition> {
195        self.partitions.iter().find(|p| p.is_bootable())
196    }
197
198    /// Find the esp partition.
199    pub fn find_partition_of_esp(&self) -> Result<Option<&Partition>> {
200        match &self.label {
201            PartitionType::Dos => Ok(self.partitions.iter().find(|b| {
202                u8::from_str_radix(&b.parttype, 16)
203                    .map(|pt| ESP_ID_MBR.contains(&pt))
204                    .unwrap_or(false)
205            })),
206            PartitionType::Gpt => Ok(self.find_partition_of_type(ESP)),
207            _ => Err(anyhow::anyhow!("Unsupported partition table type")),
208        }
209    }
210}
211
212impl Partition {
213    #[allow(dead_code)]
214    pub fn path(&self) -> &Utf8Path {
215        self.node.as_str().into()
216    }
217
218    /// Check if this partition's type matches the given UUID (case-insensitive).
219    ///
220    /// Partition type UUIDs are compared case-insensitively per the GPT specification,
221    /// as different tools may report them in different cases.
222    pub fn parttype_matches(&self, uuid: &str) -> bool {
223        self.parttype.eq_ignore_ascii_case(uuid)
224    }
225
226    /// Check this partition's bootable property.
227    pub fn is_bootable(&self) -> bool {
228        self.bootable.unwrap_or(false)
229    }
230}
231
232#[context("Listing partitions of {dev}")]
233pub fn partitions_of(dev: &Utf8Path) -> Result<PartitionTable> {
234    let o: SfDiskOutput = Command::new("sfdisk")
235        .args(["-J", dev.as_str()])
236        .run_and_parse_json()?;
237    Ok(o.partitiontable)
238}
239
240pub struct LoopbackDevice {
241    pub dev: Option<Utf8PathBuf>,
242    // Handle to the cleanup helper process
243    cleanup_handle: Option<LoopbackCleanupHandle>,
244}
245
246/// Handle to manage the cleanup helper process for loopback devices
247struct LoopbackCleanupHandle {
248    /// Child process handle
249    child: std::process::Child,
250}
251
252impl LoopbackDevice {
253    // Create a new loopback block device targeting the provided file path.
254    pub fn new(path: &Path) -> Result<Self> {
255        let direct_io = match env::var("BOOTC_DIRECT_IO") {
256            Ok(val) => {
257                if val == "on" {
258                    "on"
259                } else {
260                    "off"
261                }
262            }
263            Err(_e) => "off",
264        };
265
266        let dev = Command::new("losetup")
267            .args([
268                "--show",
269                format!("--direct-io={direct_io}").as_str(),
270                "-P",
271                "--find",
272            ])
273            .arg(path)
274            .run_get_string()?;
275        let dev = Utf8PathBuf::from(dev.trim());
276        tracing::debug!("Allocated loopback {dev}");
277
278        // Try to spawn cleanup helper, but don't fail if it doesn't work
279        let cleanup_handle = match Self::spawn_cleanup_helper(dev.as_str()) {
280            Ok(handle) => Some(handle),
281            Err(e) => {
282                tracing::warn!(
283                    "Failed to spawn loopback cleanup helper for {}: {}. \
284                     Loopback device may not be cleaned up if process is interrupted.",
285                    dev,
286                    e
287                );
288                None
289            }
290        };
291
292        Ok(Self {
293            dev: Some(dev),
294            cleanup_handle,
295        })
296    }
297
298    // Access the path to the loopback block device.
299    pub fn path(&self) -> &Utf8Path {
300        // SAFETY: The option cannot be destructured until we are dropped
301        self.dev.as_deref().unwrap()
302    }
303
304    /// Spawn a cleanup helper process that will clean up the loopback device
305    /// if the parent process dies unexpectedly
306    fn spawn_cleanup_helper(device_path: &str) -> Result<LoopbackCleanupHandle> {
307        // Try multiple strategies to find the bootc binary
308        let bootc_path = bootc_utils::reexec::executable_path()
309            .context("Failed to locate bootc binary for cleanup helper")?;
310
311        // Create the helper process
312        let mut cmd = Command::new(bootc_path);
313        cmd.args([
314            "internals",
315            "loopback-cleanup-helper",
316            "--device",
317            device_path,
318        ]);
319
320        // Set environment variable to indicate this is a cleanup helper
321        cmd.env("BOOTC_LOOPBACK_CLEANUP_HELPER", "1");
322
323        // Set up stdio to redirect to /dev/null
324        cmd.stdin(Stdio::null());
325        cmd.stdout(Stdio::null());
326        // Don't redirect stderr so we can see error messages
327
328        // Spawn the process
329        let child = cmd
330            .spawn()
331            .context("Failed to spawn loopback cleanup helper")?;
332
333        Ok(LoopbackCleanupHandle { child })
334    }
335
336    // Shared backend for our `close` and `drop` implementations.
337    fn impl_close(&mut self) -> Result<()> {
338        // SAFETY: This is the only place we take the option
339        let Some(dev) = self.dev.take() else {
340            tracing::trace!("loopback device already deallocated");
341            return Ok(());
342        };
343
344        // Kill the cleanup helper since we're cleaning up normally
345        if let Some(mut cleanup_handle) = self.cleanup_handle.take() {
346            // Send SIGTERM to the child process and let it do the cleanup
347            let _ = cleanup_handle.child.kill();
348        }
349
350        Command::new("losetup")
351            .args(["-d", dev.as_str()])
352            .run_capture_stderr()
353    }
354
355    /// Consume this device, unmounting it.
356    pub fn close(mut self) -> Result<()> {
357        self.impl_close()
358    }
359}
360
361impl Drop for LoopbackDevice {
362    fn drop(&mut self) {
363        // Best effort to unmount if we're dropped without invoking `close`
364        let _ = self.impl_close();
365    }
366}
367
368/// Main function for the loopback cleanup helper process
369/// This function does not return - it either exits normally or via signal
370pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> {
371    // Check if we're running as a cleanup helper
372    if std::env::var("BOOTC_LOOPBACK_CLEANUP_HELPER").is_err() {
373        anyhow::bail!("This function should only be called as a cleanup helper");
374    }
375
376    // Set up death signal notification - we want to be notified when parent dies
377    rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::TERM))
378        .context("Failed to set parent death signal")?;
379
380    // Wait for SIGTERM (either from parent death or normal cleanup)
381    tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
382        .expect("Failed to create signal stream")
383        .recv()
384        .await;
385
386    // Clean up the loopback device
387    let output = std::process::Command::new("losetup")
388        .args(["-d", device_path])
389        .output();
390
391    match output {
392        Ok(output) if output.status.success() => {
393            // Log to systemd journal instead of stderr
394            tracing::info!("Cleaned up leaked loopback device {}", device_path);
395            std::process::exit(0);
396        }
397        Ok(output) => {
398            let stderr = String::from_utf8_lossy(&output.stderr);
399            tracing::error!(
400                "Failed to clean up loopback device {}: {}. Stderr: {}",
401                device_path,
402                output.status,
403                stderr.trim()
404            );
405            std::process::exit(1);
406        }
407        Err(e) => {
408            tracing::error!(
409                "Error executing losetup to clean up loopback device {}: {}",
410                device_path,
411                e
412            );
413            std::process::exit(1);
414        }
415    }
416}
417
418/// Parse key-value pairs from lsblk --pairs.
419/// Newer versions of lsblk support JSON but the one in CentOS 7 doesn't.
420fn split_lsblk_line(line: &str) -> HashMap<String, String> {
421    static REGEX: OnceLock<Regex> = OnceLock::new();
422    let regex = REGEX.get_or_init(|| Regex::new(r#"([A-Z-_]+)="([^"]+)""#).unwrap());
423    let mut fields: HashMap<String, String> = HashMap::new();
424    for cap in regex.captures_iter(line) {
425        fields.insert(cap[1].to_string(), cap[2].to_string());
426    }
427    fields
428}
429
430/// This is a bit fuzzy, but... this function will return every block device in the parent
431/// hierarchy of `device` capable of containing other partitions. So e.g. parent devices of type
432/// "part" doesn't match, but "disk" and "mpath" does.
433pub fn find_parent_devices(device: &str) -> Result<Vec<String>> {
434    let output = Command::new("lsblk")
435        // Older lsblk, e.g. in CentOS 7.6, doesn't support PATH, but --paths option
436        .arg("--pairs")
437        .arg("--paths")
438        .arg("--inverse")
439        .arg("--output")
440        .arg("NAME,TYPE")
441        .arg(device)
442        .run_get_string()?;
443    let mut parents = Vec::new();
444    // skip first line, which is the device itself
445    for line in output.lines().skip(1) {
446        let dev = split_lsblk_line(line);
447        let name = dev
448            .get("NAME")
449            .with_context(|| format!("device in hierarchy of {device} missing NAME"))?;
450        let kind = dev
451            .get("TYPE")
452            .with_context(|| format!("device in hierarchy of {device} missing TYPE"))?;
453        if kind == "disk" || kind == "loop" {
454            parents.push(name.clone());
455        } else if kind == "mpath" {
456            parents.push(name.clone());
457            // we don't need to know what disks back the multipath
458            break;
459        }
460    }
461    Ok(parents)
462}
463
464/// Parse a string into mibibytes
465pub fn parse_size_mib(mut s: &str) -> Result<u64> {
466    let suffixes = [
467        ("MiB", 1u64),
468        ("M", 1u64),
469        ("GiB", 1024),
470        ("G", 1024),
471        ("TiB", 1024 * 1024),
472        ("T", 1024 * 1024),
473    ];
474    let mut mul = 1u64;
475    for (suffix, imul) in suffixes {
476        if let Some((sv, rest)) = s.rsplit_once(suffix) {
477            if !rest.is_empty() {
478                anyhow::bail!("Trailing text after size: {rest}");
479            }
480            s = sv;
481            mul = imul;
482        }
483    }
484    let v = s.parse::<u64>()?;
485    Ok(v * mul)
486}
487
488#[cfg(test)]
489mod test {
490    use super::*;
491
492    #[test]
493    fn test_parse_size_mib() {
494        let ident_cases = [0, 10, 9, 1024].into_iter().map(|k| (k.to_string(), k));
495        let cases = [
496            ("0M", 0),
497            ("10M", 10),
498            ("10MiB", 10),
499            ("1G", 1024),
500            ("9G", 9216),
501            ("11T", 11 * 1024 * 1024),
502        ]
503        .into_iter()
504        .map(|(k, v)| (k.to_string(), v));
505        for (s, v) in ident_cases.chain(cases) {
506            assert_eq!(parse_size_mib(&s).unwrap(), v as u64, "Parsing {s}");
507        }
508    }
509
510    #[test]
511    fn test_parse_lsblk() {
512        let fixture = include_str!("../tests/fixtures/lsblk.json");
513        let devs: DevicesOutput = serde_json::from_str(fixture).unwrap();
514        let dev = devs.blockdevices.into_iter().next().unwrap();
515        let children = dev.children.as_deref().unwrap();
516        assert_eq!(children.len(), 3);
517        let first_child = &children[0];
518        assert_eq!(
519            first_child.parttype.as_deref().unwrap(),
520            "21686148-6449-6e6f-744e-656564454649"
521        );
522        assert_eq!(
523            first_child.partuuid.as_deref().unwrap(),
524            "3979e399-262f-4666-aabc-7ab5d3add2f0"
525        );
526    }
527
528    #[test]
529    fn test_parse_sfdisk() -> Result<()> {
530        let fixture = indoc::indoc! { r#"
531        {
532            "partitiontable": {
533               "label": "gpt",
534               "id": "A67AA901-2C72-4818-B098-7F1CAC127279",
535               "device": "/dev/loop0",
536               "unit": "sectors",
537               "firstlba": 34,
538               "lastlba": 20971486,
539               "sectorsize": 512,
540               "partitions": [
541                  {
542                     "node": "/dev/loop0p1",
543                     "start": 2048,
544                     "size": 8192,
545                     "type": "9E1A2D38-C612-4316-AA26-8B49521E5A8B",
546                     "uuid": "58A4C5F0-BD12-424C-B563-195AC65A25DD",
547                     "name": "PowerPC-PReP-boot"
548                  },{
549                     "node": "/dev/loop0p2",
550                     "start": 10240,
551                     "size": 20961247,
552                     "type": "0FC63DAF-8483-4772-8E79-3D69D8477DE4",
553                     "uuid": "F51ABB0D-DA16-4A21-83CB-37F4C805AAA0",
554                     "name": "root"
555                  }
556               ]
557            }
558         }
559        "# };
560        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
561        assert_eq!(
562            table.partitiontable.find("/dev/loop0p2").unwrap().size,
563            20961247
564        );
565        Ok(())
566    }
567
568    #[test]
569    fn test_parttype_matches() {
570        let partition = Partition {
571            node: "/dev/loop0p1".to_string(),
572            start: 2048,
573            size: 8192,
574            parttype: "c12a7328-f81f-11d2-ba4b-00a0c93ec93b".to_string(), // lowercase ESP UUID
575            uuid: Some("58A4C5F0-BD12-424C-B563-195AC65A25DD".to_string()),
576            name: Some("EFI System".to_string()),
577            bootable: None,
578        };
579
580        // Test exact match (lowercase)
581        assert!(partition.parttype_matches("c12a7328-f81f-11d2-ba4b-00a0c93ec93b"));
582
583        // Test case-insensitive match (uppercase)
584        assert!(partition.parttype_matches("C12A7328-F81F-11D2-BA4B-00A0C93EC93B"));
585
586        // Test case-insensitive match (mixed case)
587        assert!(partition.parttype_matches("C12a7328-F81f-11d2-Ba4b-00a0C93ec93b"));
588
589        // Test non-match
590        assert!(!partition.parttype_matches("0FC63DAF-8483-4772-8E79-3D69D8477DE4"));
591    }
592
593    #[test]
594    fn test_find_partition_of_type() -> Result<()> {
595        let fixture = indoc::indoc! { r#"
596        {
597            "partitiontable": {
598               "label": "gpt",
599               "id": "A67AA901-2C72-4818-B098-7F1CAC127279",
600               "device": "/dev/loop0",
601               "unit": "sectors",
602               "firstlba": 34,
603               "lastlba": 20971486,
604               "sectorsize": 512,
605               "partitions": [
606                  {
607                     "node": "/dev/loop0p1",
608                     "start": 2048,
609                     "size": 8192,
610                     "type": "C12A7328-F81F-11D2-BA4B-00A0C93EC93B",
611                     "uuid": "58A4C5F0-BD12-424C-B563-195AC65A25DD",
612                     "name": "EFI System"
613                  },{
614                     "node": "/dev/loop0p2",
615                     "start": 10240,
616                     "size": 20961247,
617                     "type": "0FC63DAF-8483-4772-8E79-3D69D8477DE4",
618                     "uuid": "F51ABB0D-DA16-4A21-83CB-37F4C805AAA0",
619                     "name": "root"
620                  }
621               ]
622            }
623         }
624        "# };
625        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
626
627        // Find ESP partition using lowercase UUID (should match uppercase in fixture)
628        let esp = table
629            .partitiontable
630            .find_partition_of_type("c12a7328-f81f-11d2-ba4b-00a0c93ec93b");
631        assert!(esp.is_some());
632        assert_eq!(esp.unwrap().node, "/dev/loop0p1");
633
634        // Find root partition using uppercase UUID (should match case-insensitively)
635        let root = table
636            .partitiontable
637            .find_partition_of_type("0fc63daf-8483-4772-8e79-3d69d8477de4");
638        assert!(root.is_some());
639        assert_eq!(root.unwrap().node, "/dev/loop0p2");
640
641        // Try to find non-existent partition type
642        let nonexistent = table
643            .partitiontable
644            .find_partition_of_type("00000000-0000-0000-0000-000000000000");
645        assert!(nonexistent.is_none());
646
647        // Find esp partition on GPT
648        let esp = table.partitiontable.find_partition_of_esp()?.unwrap();
649        assert_eq!(esp.node, "/dev/loop0p1");
650
651        Ok(())
652    }
653    #[test]
654    fn test_find_partition_of_type_mbr() -> Result<()> {
655        let fixture = indoc::indoc! { r#"
656        {
657            "partitiontable": {
658                "label": "dos",
659                "id": "0xc1748067",
660                "device": "/dev/mmcblk0",
661                "unit": "sectors",
662                "sectorsize": 512,
663                "partitions": [
664                    {
665                        "node": "/dev/mmcblk0p1",
666                        "start": 2048,
667                        "size": 1026048,
668                        "type": "6",
669                        "bootable": true
670                    },{
671                        "node": "/dev/mmcblk0p2",
672                        "start": 1028096,
673                        "size": 2097152,
674                        "type": "83"
675                    },{
676                        "node": "/dev/mmcblk0p3",
677                        "start": 3125248,
678                        "size": 121610240,
679                        "type": "ef"
680                    }
681                ]
682            }
683        }
684        "# };
685        let table: SfDiskOutput = serde_json::from_str(fixture).unwrap();
686
687        // Find ESP partition using bootalbe is true
688        assert_eq!(table.partitiontable.label, PartitionType::Dos);
689        let esp = table
690            .partitiontable
691            .find_partition_of_bootable()
692            .expect("bootable partition not found");
693        assert_eq!(esp.node, "/dev/mmcblk0p1");
694
695        // Find esp partition on MBR
696        let esp1 = table.partitiontable.find_partition_of_esp()?.unwrap();
697        assert_eq!(esp1.node, "/dev/mmcblk0p1");
698        Ok(())
699    }
700}