From 2fa673b74737fcdc8184054b48307c574774d296 Mon Sep 17 00:00:00 2001 From: Joakim Hulthe Date: Fri, 15 Apr 2022 15:53:19 +0200 Subject: [PATCH] Initial commit --- .gitignore | 1 + Cargo.lock | 421 ++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 13 ++ example.toml | 14 ++ src/config.rs | 91 +++++++++++ src/main.rs | 211 +++++++++++++++++++++++++ 6 files changed, 751 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 example.toml create mode 100644 src/config.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5e781c8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,421 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "clap" +version = "3.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "indexmap", + "lazy_static", + "os_str_bytes", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "file_reap" +version = "0.1.0" +dependencies = [ + "chrono", + "clap", + "log", + "pretty_env_logger", + "serde", + "thiserror", + "toml", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "indexmap" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb691a747a7ab48abc15c5b42066eaafde10dc427e3b6ee2a1cf43db04c763bd" + +[[package]] +name = "log" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "os_str_bytes" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" +dependencies = [ + "memchr", +] + +[[package]] +name = "pretty_env_logger" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +dependencies = [ + "env_logger", + "log", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "serde" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "toml" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +dependencies = [ + "serde", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b881f2f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "file_reap" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "3.1.8", features = ["derive"] } +chrono = "0.4.19" +thiserror = "1.0.30" +serde = { version = "1.0.136", features = ["derive"] } +toml = "0.5.9" +log = "0.4.16" +pretty_env_logger = "0.4.0" diff --git a/example.toml b/example.toml new file mode 100644 index 0000000..18e9d75 --- /dev/null +++ b/example.toml @@ -0,0 +1,14 @@ +path = "./test" +#btrfs = true + +[[periods]] +period_length = "1w" +chunk_size = "1h" + +[[periods]] +period_length = "4w" +chunk_size = "1d" + +[[periods]] +period_length = "40w" +chunk_size = "4w" diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..8b43891 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,91 @@ +use serde::de::Visitor; +use serde::{Deserialize, Deserializer}; +use std::fmt; +use std::path::PathBuf; +use std::time::Duration; + +#[derive(Deserialize)] +pub struct Config { + /// The folder from which to reap + pub path: PathBuf, + + /// Whether to treat the files as btrfs subvolumes + #[serde(default)] + pub btrfs: bool, + + pub periods: Vec, +} + +#[derive(Deserialize)] +pub struct ConfPeriod { + /// The total duration of this period + #[serde(deserialize_with = "parse_duration")] + pub period_length: Duration, + + /// The size of chunks in this period. Each chunk should hold 1 file. + #[serde(deserialize_with = "parse_duration")] + pub chunk_size: Duration, +} + +fn parse_duration<'de, D>(d: D) -> Result +where + D: Deserializer<'de>, +{ + let s = d.deserialize_string(StringVisitor)?; + + let mut duration = Duration::ZERO; + + for part in s.split_whitespace() { + if part.len() < 2 { + continue; + } + + let suffix = part.chars().rev().next().unwrap(); + let value = &part[..part.len() - suffix.len_utf8()]; + + let value: u32 = value.parse().expect("failed to parse duration value"); + + let second: Duration = Duration::from_secs(1); + let minute: Duration = second * 60; + let hour: Duration = minute * 60; + let day: Duration = hour * 24; + let week: Duration = day * 7; + let year: Duration = day * 365; + + let unit = match suffix.to_ascii_lowercase() { + 's' => second, + 'm' => minute, + 'h' => hour, + 'd' => day, + 'w' => week, + 'y' => year, + _ => panic!("unknown unit of duration"), + }; + + duration += unit * value; + } + + if duration == Duration::ZERO { + panic!("Invalid duration: Zero"); + } + + Ok(duration) +} + +struct StringVisitor; + +impl<'de> Visitor<'de> for StringVisitor { + type Value = String; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_string(self, value: String) -> Result { + Ok(value) + } + + fn visit_str(self, value: &str) -> Result { + Ok(value.to_string()) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..43b67bd --- /dev/null +++ b/src/main.rs @@ -0,0 +1,211 @@ +mod config; + +use chrono::{DateTime, FixedOffset, Local}; +#[macro_use] +extern crate log; + +use clap::Parser; +use config::Config; +use log::LevelFilter; +use std::collections::{BinaryHeap, HashSet}; +use std::fs; +use std::io; +use std::path::PathBuf; +use thiserror::Error; + +type FileName = DateTime; + +#[derive(Parser)] +struct Opt { + config: PathBuf, + + /// Log more stuff + #[clap(long, short, parse(from_occurrences))] + verbose: u8, + + /// Do not output anything but errors. + #[clap(long, short)] + quiet: bool, + + /// Do not delete anything + #[clap(long, short)] + dry_run: bool, +} + +#[derive(Debug, Error)] +enum Error { + #[error("I/O error: {0}")] + IO(#[from] io::Error), + + #[error("Failed to parse config: {0}")] + ParseConfig(#[from] toml::de::Error), + + #[error("Managed to overflow a DateTime. What did you do??")] + DateTimeOverflow, + + #[error("Failed to delete btrfs subvolume: {0}")] + DeleteSubvolume(String), +} + +fn main() { + let opt = Opt::parse(); + + let log_level = match opt.verbose { + 0 if opt.quiet => LevelFilter::Error, + 0 => LevelFilter::Info, + 1 => LevelFilter::Debug, + 2.. => LevelFilter::Trace, + }; + + pretty_env_logger::formatted_builder() + .filter(None, log_level) + .init(); + + if let Err(e) = run(&opt) { + println!("{e}"); + } +} + +fn run(opt: &Opt) -> Result<(), Error> { + let config = fs::read_to_string(&opt.config)?; + let config: Config = toml::from_str(&config)?; + debug!("periods:"); + for period in &config.periods { + debug!( + " length={:?}, chunk_size={:?}", + period.period_length, period.chunk_size + ); + } + + info!("scanning directory {:?}", config.path); + + let mut files = BinaryHeap::new(); + for entry in fs::read_dir(&config.path)? { + let name = entry?.file_name(); + let name = name.to_string_lossy(); + if let Ok(time) = DateTime::parse_from_rfc3339(&name) { + trace!("found \"{name}\""); + files.push(time); + } + } + let files = files.into_sorted_vec(); + + let keep_files = check_files_to_keep(&config, &files)?; + + info!("final decision:"); + for &file in &files { + let keep_file = keep_files.contains(&file); + + if keep_file { + debug!(" {file} KEEP"); + } else { + info!(" {file} DELETE"); + if opt.dry_run { + debug!("dry run enabled, file not deleted"); + } else { + delete_file(&config, file)?; + } + } + } + + Ok(()) +} + +fn check_files_to_keep(config: &Config, files: &[FileName]) -> Result, Error> { + let mut files = files.to_vec(); + + let mut keep_files = HashSet::new(); + + let now = Local::now(); + let mut cursor = now; + + for period in &config.periods { + if files.is_empty() { + trace!("no more files, skipping remaining periods"); + break; + } + + let period_length = chrono::Duration::from_std(period.period_length) + .map_err(|_| Error::DateTimeOverflow)?; + let chunk_size = + chrono::Duration::from_std(period.chunk_size).map_err(|_| Error::DateTimeOverflow)?; + + if period_length < chunk_size { + panic!("invalid period configuration"); + } + + // NOTE: we are looking backwards in time, so all checks and additions need to be inverted + let period_end = cursor - period_length; + + while cursor > period_end { + if files.is_empty() { + trace!("no more files, skipping remaining chunks"); + break; + } + + let start_of_chunk = cursor; + let end_of_chunk = cursor - chunk_size; + cursor = end_of_chunk; + + let mut chunk_file_to_keep = None; + + trace!("processing chunk {end_of_chunk} -> {start_of_chunk}"); + loop { + let file = match files.pop() { + Some(file) => file, + None => break, + }; + + if file > start_of_chunk { + trace!("{file} outside of chunk bounds. ignoring."); + keep_files.insert(file); + } else if file > end_of_chunk { + trace!("{file} is in chunk. beaten by {chunk_file_to_keep:?}"); + chunk_file_to_keep.get_or_insert(file); + } else { + trace!("reached end of chunk"); + files.push(file); // put the file back in the queue + break; + } + } + + if let Some(file) = chunk_file_to_keep { + trace!("keeping files {file}"); + keep_files.insert(file); + } + } + + cursor = period_end; + } + + Ok(keep_files) +} + +fn delete_file(config: &Config, file: FileName) -> Result<(), Error> { + let file_path = config.path.join(file.to_rfc3339()); + + if config.btrfs { + trace!("btrfs subvolume delete {file_path:?}"); + use std::process::Command; + let output = Command::new("btrfs") + .args(["subvolume", "delete"]) + .arg(file_path) + .output()?; + + if !output.status.success() { + let msg = String::from_utf8(output.stderr) + .unwrap_or_else(|_| "Failed to capture stderr".to_string()); + return Err(Error::DeleteSubvolume(msg)); + }; + } else { + if file_path.is_dir() { + trace!("rm -r {file_path:?}"); + fs::remove_dir_all(file_path)?; + } else { + trace!("rm {file_path:?}"); + fs::remove_file(file_path)?; + } + } + + Ok(()) +}