Fix everything

This commit is contained in:
2022-05-26 13:42:57 +02:00
parent 22a0cbc07b
commit 770cf5ea0e
3 changed files with 410 additions and 120 deletions

View File

@ -1,13 +1,13 @@
mod config;
use chrono::{DateTime, FixedOffset, Local};
#[macro_use]
extern crate log;
use chrono::{DateTime, Duration, FixedOffset, Local};
use clap::Parser;
use config::Config;
use config::{ConfPeriod, Config, SimpleDuration};
use log::LevelFilter;
use std::collections::{BinaryHeap, HashSet};
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::io;
use std::path::PathBuf;
@ -40,9 +40,6 @@ enum Error {
#[error("Failed to parse config: {0}")]
ParseConfig(#[from] toml::de::Error),
#[error("Managed to overflow a DateTime. What did you do??")]
DateTimeOverflow,
#[error("Failed to delete btrfs subvolume: {0}")]
DeleteSubvolume(String),
}
@ -90,7 +87,8 @@ fn run(opt: &Opt) -> Result<(), Error> {
}
let files = files.into_sorted_vec();
let keep_files = check_files_to_keep(&config, &files)?;
let now = Local::now();
let keep_files = check_files_to_keep(now, &config.periods, &files)?;
info!("final decision:");
for &file in &files {
@ -111,80 +109,64 @@ fn run(opt: &Opt) -> Result<(), Error> {
Ok(())
}
fn check_files_to_keep(config: &Config, files: &[FileName]) -> Result<HashSet<FileName>, Error> {
fn check_files_to_keep(
now: DateTime<Local>,
periods: &[ConfPeriod],
files: &[FileName],
) -> Result<HashSet<FileName>, Error> {
let mut files = files.to_vec();
let mut keep_files = HashSet::new();
debug_assert_eq!(
files,
{
let mut sorted = files.clone();
sorted.sort();
sorted
},
"file list must be sorted"
);
let mut chunked_files = HashMap::new();
let now = Local::now();
let mut cursor = now;
'period: for period in &config.periods {
if files.is_empty() {
trace!("no more files, skipping remaining periods");
break;
}
'period: for period in periods {
let first_chunk = ChunkTime::of(period, cursor);
let start_index = first_chunk.index();
let stop_index = start_index - period.chunk_count();
let period_length = chrono::Duration::from_std(period.period_length)
.map_err(|_| Error::DateTimeOverflow)?;
let chunk_size =
chrono::Duration::from_std(period.chunk_size).map_err(|_| Error::DateTimeOverflow)?;
trace!("period {period:?}:");
trace!(" first chunk: {first_chunk:?}");
trace!(" index range: {start_index}..{stop_index}");
if period_length < chunk_size {
panic!("invalid period configuration");
}
'chunk: loop {
let file = match files.pop() {
Some(file) => file,
None => break 'period,
};
// NOTE: we are looking backwards in time, so all checks and additions need to be inverted
let period_end = cursor - period_length;
let file_chunk = ChunkTime::of(period, file.into());
while cursor > period_end {
if files.is_empty() {
trace!("no more files, skipping remaining chunks");
break;
let index = file_chunk.index();
trace!("{file}:");
trace!(" comparing to period {period:?}");
trace!(" is in chunk {file_chunk:?}");
trace!(" with index {index}");
if index <= stop_index {
trace!(" not in this period, checking next");
files.push(file);
cursor = file.into();
break 'chunk;
}
let start_of_chunk = cursor;
let end_of_chunk = cursor - chunk_size;
cursor = end_of_chunk;
let mut found_file_for_chunk = false;
trace!("processing chunk {end_of_chunk} -> {start_of_chunk}");
loop {
let file = match files.pop() {
Some(file) => file,
None => break,
};
if file > start_of_chunk {
trace!("{file} outside of chunk bounds. ignoring.");
keep_files.insert(file);
} else if file > end_of_chunk {
if !found_file_for_chunk {
keep_files.insert(file);
found_file_for_chunk = true;
trace!("{file} is in chunk. keeping.");
} else {
trace!("{file} is in chunk. beaten by another file");
}
} else {
files.push(file); // put the file back in the queue
if file < period_end {
trace!("reached end of period");
continue 'period;
} else {
trace!("reached end of chunk");
break;
}
}
}
trace!(" keeping for this period");
chunked_files.insert((period, file_chunk), file);
}
cursor = period_end;
}
Ok(keep_files)
Ok(chunked_files.values().copied().collect())
}
fn delete_file(config: &Config, file: FileName) -> Result<(), Error> {
@ -203,15 +185,311 @@ fn delete_file(config: &Config, file: FileName) -> Result<(), Error> {
.unwrap_or_else(|_| "Failed to capture stderr".to_string());
return Err(Error::DeleteSubvolume(msg));
};
} else if file_path.is_dir() {
trace!("rm -r {file_path:?}");
fs::remove_dir_all(file_path)?;
} else {
if file_path.is_dir() {
trace!("rm -r {file_path:?}");
fs::remove_dir_all(file_path)?;
} else {
trace!("rm {file_path:?}");
fs::remove_file(file_path)?;
}
trace!("rm {file_path:?}");
fs::remove_file(file_path)?;
}
Ok(())
}
const EPOCH_STR: &str = "1900-01-01T00:00:00+00:00";
fn epoch() -> DateTime<Local> {
DateTime::parse_from_rfc3339(EPOCH_STR)
.expect("Failed to parse epoch")
.into()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ChunkTime {
/// The value of the time of the chunk, e.g. how many seconds
pub value: i64,
/// The time unit of the chunk, e.g. seconds
pub unit: fn(i64) -> Duration,
/// A number of whole [unit]s since the epoch
///
/// This value corresponds to a time within the chunk
pub since_epoch: i64,
}
impl ChunkTime {
//pub fn next(self) -> ChunkTime {
// Self {
// since_epoch: self.since_epoch + self.value,
// ..self
// }
//}
pub fn index(&self) -> i64 {
self.since_epoch / self.value
}
pub fn start(&self) -> DateTime<Local> {
/// compute the largest multiple of `b`, that is smaller than `a`
fn last_mul_of(a: i64, b: i64) -> i64 {
a / b * b
}
epoch() + (self.unit)(last_mul_of(self.since_epoch, self.value))
}
pub fn of(period: &ConfPeriod, time: DateTime<Local>) -> Self {
let since_epoch = time - epoch();
use SimpleDuration::*;
match period.chunk_size {
Seconds(s) => ChunkTime {
unit: Duration::seconds,
value: s,
since_epoch: since_epoch.num_seconds(),
},
Minutes(m) => ChunkTime {
unit: Duration::minutes,
value: m,
since_epoch: since_epoch.num_minutes(),
},
Hours(h) => ChunkTime {
unit: Duration::hours,
value: h,
since_epoch: since_epoch.num_hours(),
},
Days(d) => ChunkTime {
unit: Duration::days,
value: d,
since_epoch: since_epoch.num_days(),
},
Weeks(w) => ChunkTime {
unit: Duration::weeks,
value: w,
since_epoch: since_epoch.num_weeks(),
},
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::config::{ConfPeriod, SimpleDuration};
use chrono::DateTime;
#[test]
fn chunk_of_period_hours() {
let period = ConfPeriod {
period_length: SimpleDuration::Weeks(1),
chunk_size: SimpleDuration::Hours(12),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2020-01-01T12:00:00+00:00", // expected chunk start
87659, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T12:00:00+00:00",
87661,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-03T12:00:00+00:00",
87663,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-04T12:00:00+00:00",
87665,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn chunk_of_period_days() {
let period = ConfPeriod {
period_length: SimpleDuration::Days(15),
chunk_size: SimpleDuration::Days(3),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2019-12-30T00:00:00+00:00", // expected chunk start
14609, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn delete_files() {
use SimpleDuration::*;
let periods = [
ConfPeriod {
period_length: Hours(6),
chunk_size: Seconds(1),
},
ConfPeriod {
period_length: Hours(6),
chunk_size: Hours(1),
},
ConfPeriod {
period_length: Days(8),
chunk_size: Days(2),
},
];
let input = [
"2020-01-01T01:00:00+00:00",
"2020-01-01T02:00:00+00:00",
"2020-01-01T03:00:00+00:00",
"2020-01-01T04:00:00+00:00",
"2020-01-01T05:00:00+00:00",
"2020-01-01T06:00:00+00:00",
"2020-01-01T07:00:00+00:00",
"2020-01-01T08:00:00+00:00",
"2020-01-01T09:00:00+00:00",
"2020-01-01T10:00:00+00:00",
"2020-01-01T10:00:32+00:00",
"2020-01-01T10:00:33+00:00",
"2020-01-01T10:00:34+00:00",
"2020-01-01T11:00:00+00:00",
"2020-01-01T12:00:00+00:00",
"2020-01-01T13:00:00+00:00",
"2020-01-01T14:00:00+00:00",
"2020-01-01T15:00:00+00:00",
"2020-01-01T16:00:00+00:00",
"2020-01-01T17:00:00+00:00",
"2020-01-01T18:00:00+00:00",
"2020-01-01T19:00:00+00:00",
"2020-01-01T20:00:00+00:00",
"2020-01-01T21:00:00+00:00",
"2020-01-01T22:00:00+00:00",
"2020-01-01T23:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-02T01:00:00+00:00",
"2020-01-02T02:00:00+00:00",
"2020-01-02T03:00:00+00:00",
"2020-01-02T04:00:00+00:00",
"2020-01-02T05:00:00+00:00",
"2020-01-02T06:00:00+00:00",
"2020-01-02T07:00:00+00:00",
"2020-01-02T08:00:00+00:00",
"2020-01-02T09:00:00+00:00",
"2020-01-02T10:00:00+00:00",
"2020-01-02T11:00:00+00:00",
"2020-01-02T12:00:00+00:00",
"2020-01-02T13:00:00+00:00",
"2020-01-02T14:00:00+00:00",
"2020-01-02T15:00:00+00:00",
"2020-01-02T16:00:00+00:00",
"2020-01-02T17:00:00+00:00",
"2020-01-02T18:00:00+00:00",
"2020-01-02T19:00:00+00:00",
"2020-01-02T20:00:00+00:00",
"2020-01-02T21:00:00+00:00",
"2020-01-02T22:00:00+00:00",
"2020-01-02T23:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T01:00:00+00:00",
"2020-01-03T02:00:00+00:00",
"2020-01-03T03:00:00+00:00",
"2020-01-03T04:00:00+00:00",
"2020-01-03T05:00:00+00:00",
"2020-01-03T06:00:00+00:00",
"2020-01-03T07:00:00+00:00",
"2020-01-03T08:00:00+00:00",
"2020-01-03T09:00:00+00:00",
"2020-01-03T10:00:00+00:00",
"2020-01-03T11:00:00+00:00",
"2020-01-03T12:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T14:00:10+00:00",
"2020-01-03T14:00:20+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let input = input.map(|date| DateTime::parse_from_rfc3339(date).unwrap());
let expected_output = [
"2020-01-01T01:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let expected_output: HashSet<_> = expected_output
.into_iter()
.map(|date| DateTime::parse_from_rfc3339(date).unwrap())
.collect();
let start_time = DateTime::parse_from_rfc3339("2020-01-04T00:00:00+00:00").unwrap();
let output = check_files_to_keep(start_time.into(), &periods, &input).unwrap();
assert_eq!(output, expected_output);
}
}