diff --git a/example.toml b/example.toml index 18e9d75..24ce146 100644 --- a/example.toml +++ b/example.toml @@ -1,6 +1,10 @@ path = "./test" #btrfs = true +[[periods]] +period_length = "10m" +chunk_size = "1s" + [[periods]] period_length = "1w" chunk_size = "1h" diff --git a/src/config.rs b/src/config.rs index 8b43891..bc36419 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,8 +1,8 @@ +use chrono::Duration; use serde::de::Visitor; use serde::{Deserialize, Deserializer}; use std::fmt; use std::path::PathBuf; -use std::time::Duration; #[derive(Deserialize)] pub struct Config { @@ -16,60 +16,68 @@ pub struct Config { pub periods: Vec, } -#[derive(Deserialize)] -pub struct ConfPeriod { - /// The total duration of this period - #[serde(deserialize_with = "parse_duration")] - pub period_length: Duration, - - /// The size of chunks in this period. Each chunk should hold 1 file. - #[serde(deserialize_with = "parse_duration")] - pub chunk_size: Duration, +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum SimpleDuration { + Weeks(i64), + Days(i64), + Hours(i64), + Minutes(i64), + Seconds(i64), } -fn parse_duration<'de, D>(d: D) -> Result +impl From for Duration { + fn from(simple: SimpleDuration) -> Duration { + match simple { + SimpleDuration::Weeks(weeks) => Duration::weeks(weeks), + SimpleDuration::Days(days) => Duration::days(days), + SimpleDuration::Hours(hours) => Duration::hours(hours), + SimpleDuration::Minutes(minutes) => Duration::minutes(minutes), + SimpleDuration::Seconds(seconds) => Duration::seconds(seconds), + } + } +} + +#[derive(Debug, Deserialize, Hash, PartialEq, Eq)] +pub struct ConfPeriod { + /// The total duration of this period + #[serde(deserialize_with = "parse_simple_duration")] + pub period_length: SimpleDuration, + + /// The size of chunks in this period. Each chunk should hold 1 file. + #[serde(deserialize_with = "parse_simple_duration")] + pub chunk_size: SimpleDuration, +} + +impl ConfPeriod { + pub fn chunk_count(&self) -> i64 { + let period_length: Duration = self.period_length.into(); + let chunk_size: Duration = self.chunk_size.into(); + period_length.num_milliseconds() / chunk_size.num_milliseconds() + } +} + +fn parse_simple_duration<'de, D>(d: D) -> Result where D: Deserializer<'de>, { let s = d.deserialize_string(StringVisitor)?; + let s = s.trim(); - let mut duration = Duration::ZERO; + let suffix = s.chars().rev().next().unwrap(); + let value = &s[..s.len() - suffix.len_utf8()]; - for part in s.split_whitespace() { - if part.len() < 2 { - continue; - } + let value: u64 = value.parse().expect("failed to parse duration value"); + let value = value as i64; - let suffix = part.chars().rev().next().unwrap(); - let value = &part[..part.len() - suffix.len_utf8()]; - - let value: u32 = value.parse().expect("failed to parse duration value"); - - let second: Duration = Duration::from_secs(1); - let minute: Duration = second * 60; - let hour: Duration = minute * 60; - let day: Duration = hour * 24; - let week: Duration = day * 7; - let year: Duration = day * 365; - - let unit = match suffix.to_ascii_lowercase() { - 's' => second, - 'm' => minute, - 'h' => hour, - 'd' => day, - 'w' => week, - 'y' => year, - _ => panic!("unknown unit of duration"), - }; - - duration += unit * value; - } - - if duration == Duration::ZERO { - panic!("Invalid duration: Zero"); - } - - Ok(duration) + use SimpleDuration::*; + Ok(match suffix.to_ascii_lowercase() { + 's' => Seconds(value), + 'm' => Minutes(value), + 'h' => Hours(value), + 'd' => Days(value), + 'w' => Weeks(value), + _ => panic!("unknown unit of duration"), + }) } struct StringVisitor; diff --git a/src/main.rs b/src/main.rs index 3c46d07..2e2893b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,13 @@ mod config; -use chrono::{DateTime, FixedOffset, Local}; #[macro_use] extern crate log; +use chrono::{DateTime, Duration, FixedOffset, Local}; use clap::Parser; -use config::Config; +use config::{ConfPeriod, Config, SimpleDuration}; use log::LevelFilter; -use std::collections::{BinaryHeap, HashSet}; +use std::collections::{BinaryHeap, HashMap, HashSet}; use std::fs; use std::io; use std::path::PathBuf; @@ -40,9 +40,6 @@ enum Error { #[error("Failed to parse config: {0}")] ParseConfig(#[from] toml::de::Error), - #[error("Managed to overflow a DateTime. What did you do??")] - DateTimeOverflow, - #[error("Failed to delete btrfs subvolume: {0}")] DeleteSubvolume(String), } @@ -90,7 +87,8 @@ fn run(opt: &Opt) -> Result<(), Error> { } let files = files.into_sorted_vec(); - let keep_files = check_files_to_keep(&config, &files)?; + let now = Local::now(); + let keep_files = check_files_to_keep(now, &config.periods, &files)?; info!("final decision:"); for &file in &files { @@ -111,80 +109,64 @@ fn run(opt: &Opt) -> Result<(), Error> { Ok(()) } -fn check_files_to_keep(config: &Config, files: &[FileName]) -> Result, Error> { +fn check_files_to_keep( + now: DateTime, + periods: &[ConfPeriod], + files: &[FileName], +) -> Result, Error> { let mut files = files.to_vec(); - let mut keep_files = HashSet::new(); + debug_assert_eq!( + files, + { + let mut sorted = files.clone(); + sorted.sort(); + sorted + }, + "file list must be sorted" + ); + + let mut chunked_files = HashMap::new(); - let now = Local::now(); let mut cursor = now; - 'period: for period in &config.periods { - if files.is_empty() { - trace!("no more files, skipping remaining periods"); - break; - } + 'period: for period in periods { + let first_chunk = ChunkTime::of(period, cursor); + let start_index = first_chunk.index(); + let stop_index = start_index - period.chunk_count(); - let period_length = chrono::Duration::from_std(period.period_length) - .map_err(|_| Error::DateTimeOverflow)?; - let chunk_size = - chrono::Duration::from_std(period.chunk_size).map_err(|_| Error::DateTimeOverflow)?; + trace!("period {period:?}:"); + trace!(" first chunk: {first_chunk:?}"); + trace!(" index range: {start_index}..{stop_index}"); - if period_length < chunk_size { - panic!("invalid period configuration"); - } + 'chunk: loop { + let file = match files.pop() { + Some(file) => file, + None => break 'period, + }; - // NOTE: we are looking backwards in time, so all checks and additions need to be inverted - let period_end = cursor - period_length; + let file_chunk = ChunkTime::of(period, file.into()); - while cursor > period_end { - if files.is_empty() { - trace!("no more files, skipping remaining chunks"); - break; + let index = file_chunk.index(); + + trace!("{file}:"); + trace!(" comparing to period {period:?}"); + trace!(" is in chunk {file_chunk:?}"); + trace!(" with index {index}"); + + if index <= stop_index { + trace!(" not in this period, checking next"); + files.push(file); + cursor = file.into(); + break 'chunk; } - let start_of_chunk = cursor; - let end_of_chunk = cursor - chunk_size; - cursor = end_of_chunk; - - let mut found_file_for_chunk = false; - - trace!("processing chunk {end_of_chunk} -> {start_of_chunk}"); - loop { - let file = match files.pop() { - Some(file) => file, - None => break, - }; - - if file > start_of_chunk { - trace!("{file} outside of chunk bounds. ignoring."); - keep_files.insert(file); - } else if file > end_of_chunk { - if !found_file_for_chunk { - keep_files.insert(file); - found_file_for_chunk = true; - trace!("{file} is in chunk. keeping."); - } else { - trace!("{file} is in chunk. beaten by another file"); - } - } else { - files.push(file); // put the file back in the queue - - if file < period_end { - trace!("reached end of period"); - continue 'period; - } else { - trace!("reached end of chunk"); - break; - } - } - } + trace!(" keeping for this period"); + chunked_files.insert((period, file_chunk), file); } - - cursor = period_end; } - Ok(keep_files) + Ok(chunked_files.values().copied().collect()) } fn delete_file(config: &Config, file: FileName) -> Result<(), Error> { @@ -203,15 +185,311 @@ fn delete_file(config: &Config, file: FileName) -> Result<(), Error> { .unwrap_or_else(|_| "Failed to capture stderr".to_string()); return Err(Error::DeleteSubvolume(msg)); }; + } else if file_path.is_dir() { + trace!("rm -r {file_path:?}"); + fs::remove_dir_all(file_path)?; } else { - if file_path.is_dir() { - trace!("rm -r {file_path:?}"); - fs::remove_dir_all(file_path)?; - } else { - trace!("rm {file_path:?}"); - fs::remove_file(file_path)?; - } + trace!("rm {file_path:?}"); + fs::remove_file(file_path)?; } Ok(()) } + +const EPOCH_STR: &str = "1900-01-01T00:00:00+00:00"; +fn epoch() -> DateTime { + DateTime::parse_from_rfc3339(EPOCH_STR) + .expect("Failed to parse epoch") + .into() +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct ChunkTime { + /// The value of the time of the chunk, e.g. how many seconds + pub value: i64, + + /// The time unit of the chunk, e.g. seconds + pub unit: fn(i64) -> Duration, + + /// A number of whole [unit]s since the epoch + /// + /// This value corresponds to a time within the chunk + pub since_epoch: i64, +} + +impl ChunkTime { + //pub fn next(self) -> ChunkTime { + // Self { + // since_epoch: self.since_epoch + self.value, + // ..self + // } + //} + + pub fn index(&self) -> i64 { + self.since_epoch / self.value + } + + pub fn start(&self) -> DateTime { + /// compute the largest multiple of `b`, that is smaller than `a` + fn last_mul_of(a: i64, b: i64) -> i64 { + a / b * b + } + + epoch() + (self.unit)(last_mul_of(self.since_epoch, self.value)) + } + + pub fn of(period: &ConfPeriod, time: DateTime) -> Self { + let since_epoch = time - epoch(); + + use SimpleDuration::*; + match period.chunk_size { + Seconds(s) => ChunkTime { + unit: Duration::seconds, + value: s, + since_epoch: since_epoch.num_seconds(), + }, + Minutes(m) => ChunkTime { + unit: Duration::minutes, + value: m, + since_epoch: since_epoch.num_minutes(), + }, + Hours(h) => ChunkTime { + unit: Duration::hours, + value: h, + since_epoch: since_epoch.num_hours(), + }, + Days(d) => ChunkTime { + unit: Duration::days, + value: d, + since_epoch: since_epoch.num_days(), + }, + Weeks(w) => ChunkTime { + unit: Duration::weeks, + value: w, + since_epoch: since_epoch.num_weeks(), + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::config::{ConfPeriod, SimpleDuration}; + use chrono::DateTime; + + #[test] + fn chunk_of_period_hours() { + let period = ConfPeriod { + period_length: SimpleDuration::Weeks(1), + chunk_size: SimpleDuration::Hours(12), + }; + + let tests = [ + ( + "2020-01-01T12:00:00+00:00", // time + "2020-01-01T12:00:00+00:00", // expected chunk start + 87659, // expected chunk index + ), + ( + "2020-01-02T12:00:00+00:00", + "2020-01-02T12:00:00+00:00", + 87661, + ), + ( + "2020-01-03T12:00:00+00:00", + "2020-01-03T12:00:00+00:00", + 87663, + ), + ( + "2020-01-04T12:00:00+00:00", + "2020-01-04T12:00:00+00:00", + 87665, + ), + ]; + + for (time, expected_chunk_start, expected_chunk_index) in tests { + let time: DateTime = DateTime::parse_from_rfc3339(time).unwrap().into(); + let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap(); + + let chunk = ChunkTime::of(&period, time); + + assert_eq!(chunk.start(), expected_chunk_start); + assert_eq!(chunk.index(), expected_chunk_index); + } + } + + #[test] + fn chunk_of_period_days() { + let period = ConfPeriod { + period_length: SimpleDuration::Days(15), + chunk_size: SimpleDuration::Days(3), + }; + + let tests = [ + ( + "2020-01-01T12:00:00+00:00", // time + "2019-12-30T00:00:00+00:00", // expected chunk start + 14609, // expected chunk index + ), + ( + "2020-01-02T12:00:00+00:00", + "2020-01-02T00:00:00+00:00", + 14610, + ), + ( + "2020-01-03T12:00:00+00:00", + "2020-01-02T00:00:00+00:00", + 14610, + ), + ( + "2020-01-04T12:00:00+00:00", + "2020-01-02T00:00:00+00:00", + 14610, + ), + ]; + + for (time, expected_chunk_start, expected_chunk_index) in tests { + let time: DateTime = DateTime::parse_from_rfc3339(time).unwrap().into(); + let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap(); + + let chunk = ChunkTime::of(&period, time); + + assert_eq!(chunk.start(), expected_chunk_start); + assert_eq!(chunk.index(), expected_chunk_index); + } + } + + #[test] + fn delete_files() { + use SimpleDuration::*; + + let periods = [ + ConfPeriod { + period_length: Hours(6), + chunk_size: Seconds(1), + }, + ConfPeriod { + period_length: Hours(6), + chunk_size: Hours(1), + }, + ConfPeriod { + period_length: Days(8), + chunk_size: Days(2), + }, + ]; + + let input = [ + "2020-01-01T01:00:00+00:00", + "2020-01-01T02:00:00+00:00", + "2020-01-01T03:00:00+00:00", + "2020-01-01T04:00:00+00:00", + "2020-01-01T05:00:00+00:00", + "2020-01-01T06:00:00+00:00", + "2020-01-01T07:00:00+00:00", + "2020-01-01T08:00:00+00:00", + "2020-01-01T09:00:00+00:00", + "2020-01-01T10:00:00+00:00", + "2020-01-01T10:00:32+00:00", + "2020-01-01T10:00:33+00:00", + "2020-01-01T10:00:34+00:00", + "2020-01-01T11:00:00+00:00", + "2020-01-01T12:00:00+00:00", + "2020-01-01T13:00:00+00:00", + "2020-01-01T14:00:00+00:00", + "2020-01-01T15:00:00+00:00", + "2020-01-01T16:00:00+00:00", + "2020-01-01T17:00:00+00:00", + "2020-01-01T18:00:00+00:00", + "2020-01-01T19:00:00+00:00", + "2020-01-01T20:00:00+00:00", + "2020-01-01T21:00:00+00:00", + "2020-01-01T22:00:00+00:00", + "2020-01-01T23:00:00+00:00", + "2020-01-02T00:00:00+00:00", + "2020-01-02T01:00:00+00:00", + "2020-01-02T02:00:00+00:00", + "2020-01-02T03:00:00+00:00", + "2020-01-02T04:00:00+00:00", + "2020-01-02T05:00:00+00:00", + "2020-01-02T06:00:00+00:00", + "2020-01-02T07:00:00+00:00", + "2020-01-02T08:00:00+00:00", + "2020-01-02T09:00:00+00:00", + "2020-01-02T10:00:00+00:00", + "2020-01-02T11:00:00+00:00", + "2020-01-02T12:00:00+00:00", + "2020-01-02T13:00:00+00:00", + "2020-01-02T14:00:00+00:00", + "2020-01-02T15:00:00+00:00", + "2020-01-02T16:00:00+00:00", + "2020-01-02T17:00:00+00:00", + "2020-01-02T18:00:00+00:00", + "2020-01-02T19:00:00+00:00", + "2020-01-02T20:00:00+00:00", + "2020-01-02T21:00:00+00:00", + "2020-01-02T22:00:00+00:00", + "2020-01-02T23:00:00+00:00", + "2020-01-03T00:00:00+00:00", + "2020-01-03T01:00:00+00:00", + "2020-01-03T02:00:00+00:00", + "2020-01-03T03:00:00+00:00", + "2020-01-03T04:00:00+00:00", + "2020-01-03T05:00:00+00:00", + "2020-01-03T06:00:00+00:00", + "2020-01-03T07:00:00+00:00", + "2020-01-03T08:00:00+00:00", + "2020-01-03T09:00:00+00:00", + "2020-01-03T10:00:00+00:00", + "2020-01-03T11:00:00+00:00", + "2020-01-03T12:00:00+00:00", + "2020-01-03T13:00:00+00:00", + "2020-01-03T14:00:00+00:00", + "2020-01-03T14:00:10+00:00", + "2020-01-03T14:00:20+00:00", + "2020-01-03T15:00:00+00:00", + "2020-01-03T16:00:00+00:00", + "2020-01-03T17:00:00+00:00", + "2020-01-03T18:00:00+00:00", + "2020-01-03T19:00:00+00:00", + "2020-01-03T20:00:00+00:00", + "2020-01-03T21:00:00+00:00", + "2020-01-03T22:00:30+00:00", + "2020-01-03T22:00:31+00:00", + "2020-01-03T22:00:32+00:00", + "2020-01-03T22:00:33+00:00", + "2020-01-03T23:00:00+00:00", + ]; + let input = input.map(|date| DateTime::parse_from_rfc3339(date).unwrap()); + + let expected_output = [ + "2020-01-01T01:00:00+00:00", + "2020-01-02T00:00:00+00:00", + "2020-01-03T00:00:00+00:00", + "2020-01-03T13:00:00+00:00", + "2020-01-03T14:00:00+00:00", + "2020-01-03T15:00:00+00:00", + "2020-01-03T16:00:00+00:00", + "2020-01-03T17:00:00+00:00", + "2020-01-03T18:00:00+00:00", + "2020-01-03T19:00:00+00:00", + "2020-01-03T20:00:00+00:00", + "2020-01-03T21:00:00+00:00", + "2020-01-03T22:00:30+00:00", + "2020-01-03T22:00:31+00:00", + "2020-01-03T22:00:32+00:00", + "2020-01-03T22:00:33+00:00", + "2020-01-03T23:00:00+00:00", + ]; + let expected_output: HashSet<_> = expected_output + .into_iter() + .map(|date| DateTime::parse_from_rfc3339(date).unwrap()) + .collect(); + + let start_time = DateTime::parse_from_rfc3339("2020-01-04T00:00:00+00:00").unwrap(); + + let output = check_files_to_keep(start_time.into(), &periods, &input).unwrap(); + + assert_eq!(output, expected_output); + } +}