Files
filereap/src/main.rs
2023-10-05 19:36:33 +02:00

502 lines
15 KiB
Rust

//! CLI to delete dated files in a directory according to a time period config.
//! Useful for backup directories that accumulate files over time.
mod config;
#[macro_use]
extern crate log;
use chrono::{DateTime, Duration, FixedOffset, Local};
use clap::{ArgAction, Parser};
use config::{ConfPeriod, Config, SimpleDuration};
use eyre::{eyre, Context};
use log::LevelFilter;
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::path::PathBuf;
type FileName = DateTime<FixedOffset>;
#[derive(Parser)]
#[command(version)]
struct Opt {
config: PathBuf,
/// Log more stuff
#[clap(long, short, action = ArgAction::Count)]
verbose: u8,
/// Do not output anything but errors.
#[clap(long, short)]
quiet: bool,
/// Do not delete anything
#[clap(long, short)]
dry_run: bool,
}
fn main() -> eyre::Result<()> {
let opt = Opt::parse();
color_eyre::install()?;
let log_level = match opt.verbose {
0 if opt.quiet => LevelFilter::Error,
0 => LevelFilter::Info,
1 => LevelFilter::Debug,
2.. => LevelFilter::Trace,
};
pretty_env_logger::formatted_builder()
.filter(None, log_level)
.init();
run(&opt)
}
fn run(opt: &Opt) -> eyre::Result<()> {
let config = fs::read_to_string(&opt.config)
.wrap_err_with(|| format!("Failed to read config file {:?}", opt.config))?;
let config: Config = toml::from_str(&config).wrap_err("Failed to parse config file")?;
debug!("periods:");
for period in &config.periods {
debug!(
" length={:?}, chunk_size={:?}",
period.period_length, period.chunk_size
);
}
info!("scanning directory {:?}", config.path);
let mut files = BinaryHeap::new();
let dir_err = || format!("Failed to read directory {:?}", config.path);
for entry in fs::read_dir(&config.path).wrap_err_with(dir_err)? {
let name = entry.wrap_err_with(dir_err)?.file_name();
let name = name.to_string_lossy();
if let Ok(time) = DateTime::parse_from_rfc3339(&name) {
trace!("found \"{name}\"");
files.push(time);
} else {
trace!("ignoring \"{name}\", couldn't parse filename as rfc3339");
}
}
let files = files.into_sorted_vec();
let now = Local::now();
let keep_files = check_files_to_keep(now, &config.periods, &files);
info!("final decision:");
for &file in &files {
let keep_file = keep_files.contains(&file);
if keep_file {
debug!(" {file} KEEP");
} else {
info!(" {file} DELETE");
if opt.dry_run {
debug!("dry run enabled, file not deleted");
} else {
delete_file(&config, file)?;
}
}
}
Ok(())
}
fn check_files_to_keep(
now: DateTime<Local>,
periods: &[ConfPeriod],
files: &[FileName],
) -> HashSet<FileName> {
let mut files = files.to_vec();
debug_assert_eq!(
files,
{
let mut sorted = files.clone();
sorted.sort();
sorted
},
"file list must be sorted"
);
let mut chunked_files = HashMap::new();
let mut cursor = now;
'period: for period in periods {
let first_chunk = ChunkTime::of(period, cursor);
let start_index = first_chunk.index();
let stop_index = start_index - period.chunk_count();
trace!("period {period:?}:");
trace!(" first chunk: {first_chunk:?}");
trace!(" index range: {start_index}..{stop_index}");
'chunk: loop {
let file = match files.pop() {
Some(file) => file,
None => break 'period,
};
let file_chunk = ChunkTime::of(period, file.into());
let index = file_chunk.index();
trace!("{file}:");
trace!(" comparing to period {period:?}");
trace!(" is in chunk {file_chunk:?}");
trace!(" with index {index}");
if index <= stop_index {
trace!(" not in this period, checking next");
files.push(file);
cursor = file.into();
break 'chunk;
}
trace!(" keeping for this period");
chunked_files.insert((period, file_chunk), file);
}
}
chunked_files.values().copied().collect()
}
fn delete_file(config: &Config, file: FileName) -> eyre::Result<()> {
let file_path = config.path.join(file.to_rfc3339());
if config.btrfs {
trace!("btrfs subvolume delete {file_path:?}");
use std::process::Command;
let output = Command::new("btrfs")
.args(["subvolume", "delete"])
.arg(&file_path)
.output()
.wrap_err("failed to run 'btrfs subvolume delete'")?;
if !output.status.success() {
let msg = String::from_utf8(output.stderr)
.unwrap_or_else(|_| "Failed to capture stderr".to_string());
return Err(
eyre!("btrfs subvolume delete exited with code {}", output.status)
.wrap_err(msg)
.wrap_err(format!("Failed to delete subvolume {file_path:?}")),
);
};
} else if file_path.is_dir() {
trace!("rm -r {file_path:?}");
fs::remove_dir_all(&file_path)
.wrap_err_with(|| format!("Failed to remove directory {file_path:?}"))?;
} else {
trace!("rm {file_path:?}");
fs::remove_file(&file_path)
.wrap_err_with(|| format!("Failed to remove file {file_path:?}"))?;
}
Ok(())
}
const EPOCH_STR: &str = "1900-01-01T00:00:00+00:00";
fn epoch() -> DateTime<Local> {
DateTime::parse_from_rfc3339(EPOCH_STR)
.expect("Failed to parse epoch")
.into()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ChunkTime {
/// The value of the time of the chunk, e.g. how many seconds
pub value: i64,
/// The time unit of the chunk, e.g. seconds
pub unit: fn(i64) -> Duration,
/// A number of whole [unit]s since the epoch
///
/// This value corresponds to a time within the chunk
pub since_epoch: i64,
}
impl ChunkTime {
//pub fn next(self) -> ChunkTime {
// Self {
// since_epoch: self.since_epoch + self.value,
// ..self
// }
//}
pub fn index(&self) -> i64 {
self.since_epoch / self.value
}
pub fn start(&self) -> DateTime<Local> {
/// compute the largest multiple of `b`, that is smaller than `a`
fn last_mul_of(a: i64, b: i64) -> i64 {
a / b * b
}
epoch() + (self.unit)(last_mul_of(self.since_epoch, self.value))
}
pub fn of(period: &ConfPeriod, time: DateTime<Local>) -> Self {
let since_epoch = time - epoch();
use SimpleDuration::*;
match period.chunk_size {
Seconds(s) => ChunkTime {
unit: Duration::seconds,
value: s,
since_epoch: since_epoch.num_seconds(),
},
Minutes(m) => ChunkTime {
unit: Duration::minutes,
value: m,
since_epoch: since_epoch.num_minutes(),
},
Hours(h) => ChunkTime {
unit: Duration::hours,
value: h,
since_epoch: since_epoch.num_hours(),
},
Days(d) => ChunkTime {
unit: Duration::days,
value: d,
since_epoch: since_epoch.num_days(),
},
Weeks(w) => ChunkTime {
unit: Duration::weeks,
value: w,
since_epoch: since_epoch.num_weeks(),
},
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::config::{ConfPeriod, SimpleDuration};
use chrono::DateTime;
#[test]
fn chunk_of_period_hours() {
let period = ConfPeriod {
period_length: SimpleDuration::Weeks(1),
chunk_size: SimpleDuration::Hours(12),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2020-01-01T12:00:00+00:00", // expected chunk start
87659, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T12:00:00+00:00",
87661,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-03T12:00:00+00:00",
87663,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-04T12:00:00+00:00",
87665,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn chunk_of_period_days() {
let period = ConfPeriod {
period_length: SimpleDuration::Days(15),
chunk_size: SimpleDuration::Days(3),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2019-12-30T00:00:00+00:00", // expected chunk start
14609, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn delete_files() {
use SimpleDuration::*;
let periods = [
ConfPeriod {
period_length: Hours(6),
chunk_size: Seconds(1),
},
ConfPeriod {
period_length: Hours(6),
chunk_size: Hours(1),
},
ConfPeriod {
period_length: Days(8),
chunk_size: Days(2),
},
];
let input = [
"2020-01-01T01:00:00+00:00",
"2020-01-01T02:00:00+00:00",
"2020-01-01T03:00:00+00:00",
"2020-01-01T04:00:00+00:00",
"2020-01-01T05:00:00+00:00",
"2020-01-01T06:00:00+00:00",
"2020-01-01T07:00:00+00:00",
"2020-01-01T08:00:00+00:00",
"2020-01-01T09:00:00+00:00",
"2020-01-01T10:00:00+00:00",
"2020-01-01T10:00:32+00:00",
"2020-01-01T10:00:33+00:00",
"2020-01-01T10:00:34+00:00",
"2020-01-01T11:00:00+00:00",
"2020-01-01T12:00:00+00:00",
"2020-01-01T13:00:00+00:00",
"2020-01-01T14:00:00+00:00",
"2020-01-01T15:00:00+00:00",
"2020-01-01T16:00:00+00:00",
"2020-01-01T17:00:00+00:00",
"2020-01-01T18:00:00+00:00",
"2020-01-01T19:00:00+00:00",
"2020-01-01T20:00:00+00:00",
"2020-01-01T21:00:00+00:00",
"2020-01-01T22:00:00+00:00",
"2020-01-01T23:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-02T01:00:00+00:00",
"2020-01-02T02:00:00+00:00",
"2020-01-02T03:00:00+00:00",
"2020-01-02T04:00:00+00:00",
"2020-01-02T05:00:00+00:00",
"2020-01-02T06:00:00+00:00",
"2020-01-02T07:00:00+00:00",
"2020-01-02T08:00:00+00:00",
"2020-01-02T09:00:00+00:00",
"2020-01-02T10:00:00+00:00",
"2020-01-02T11:00:00+00:00",
"2020-01-02T12:00:00+00:00",
"2020-01-02T13:00:00+00:00",
"2020-01-02T14:00:00+00:00",
"2020-01-02T15:00:00+00:00",
"2020-01-02T16:00:00+00:00",
"2020-01-02T17:00:00+00:00",
"2020-01-02T18:00:00+00:00",
"2020-01-02T19:00:00+00:00",
"2020-01-02T20:00:00+00:00",
"2020-01-02T21:00:00+00:00",
"2020-01-02T22:00:00+00:00",
"2020-01-02T23:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T01:00:00+00:00",
"2020-01-03T02:00:00+00:00",
"2020-01-03T03:00:00+00:00",
"2020-01-03T04:00:00+00:00",
"2020-01-03T05:00:00+00:00",
"2020-01-03T06:00:00+00:00",
"2020-01-03T07:00:00+00:00",
"2020-01-03T08:00:00+00:00",
"2020-01-03T09:00:00+00:00",
"2020-01-03T10:00:00+00:00",
"2020-01-03T11:00:00+00:00",
"2020-01-03T12:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T14:00:10+00:00",
"2020-01-03T14:00:20+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let input = input.map(|date| DateTime::parse_from_rfc3339(date).unwrap());
let expected_output = [
"2020-01-01T01:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let expected_output: HashSet<_> = expected_output
.into_iter()
.map(|date| DateTime::parse_from_rfc3339(date).unwrap())
.collect();
let start_time = DateTime::parse_from_rfc3339("2020-01-04T00:00:00+00:00").unwrap();
let output = check_files_to_keep(start_time.into(), &periods, &input).unwrap();
assert_eq!(output, expected_output);
}
}