Compare commits

...

2 Commits

Author SHA1 Message Date
770cf5ea0e Fix everything 2022-05-26 13:42:57 +02:00
22a0cbc07b Make handling of very small chunks slightly faster 2022-04-15 16:18:09 +02:00
3 changed files with 410 additions and 114 deletions

View File

@ -1,6 +1,10 @@
path = "./test" path = "./test"
#btrfs = true #btrfs = true
[[periods]]
period_length = "10m"
chunk_size = "1s"
[[periods]] [[periods]]
period_length = "1w" period_length = "1w"
chunk_size = "1h" chunk_size = "1h"

View File

@ -1,8 +1,8 @@
use chrono::Duration;
use serde::de::Visitor; use serde::de::Visitor;
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use std::fmt; use std::fmt;
use std::path::PathBuf; use std::path::PathBuf;
use std::time::Duration;
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct Config { pub struct Config {
@ -16,60 +16,68 @@ pub struct Config {
pub periods: Vec<ConfPeriod>, pub periods: Vec<ConfPeriod>,
} }
#[derive(Deserialize)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct ConfPeriod { pub enum SimpleDuration {
/// The total duration of this period Weeks(i64),
#[serde(deserialize_with = "parse_duration")] Days(i64),
pub period_length: Duration, Hours(i64),
Minutes(i64),
/// The size of chunks in this period. Each chunk should hold 1 file. Seconds(i64),
#[serde(deserialize_with = "parse_duration")]
pub chunk_size: Duration,
} }
fn parse_duration<'de, D>(d: D) -> Result<Duration, D::Error> impl From<SimpleDuration> for Duration {
fn from(simple: SimpleDuration) -> Duration {
match simple {
SimpleDuration::Weeks(weeks) => Duration::weeks(weeks),
SimpleDuration::Days(days) => Duration::days(days),
SimpleDuration::Hours(hours) => Duration::hours(hours),
SimpleDuration::Minutes(minutes) => Duration::minutes(minutes),
SimpleDuration::Seconds(seconds) => Duration::seconds(seconds),
}
}
}
#[derive(Debug, Deserialize, Hash, PartialEq, Eq)]
pub struct ConfPeriod {
/// The total duration of this period
#[serde(deserialize_with = "parse_simple_duration")]
pub period_length: SimpleDuration,
/// The size of chunks in this period. Each chunk should hold 1 file.
#[serde(deserialize_with = "parse_simple_duration")]
pub chunk_size: SimpleDuration,
}
impl ConfPeriod {
pub fn chunk_count(&self) -> i64 {
let period_length: Duration = self.period_length.into();
let chunk_size: Duration = self.chunk_size.into();
period_length.num_milliseconds() / chunk_size.num_milliseconds()
}
}
fn parse_simple_duration<'de, D>(d: D) -> Result<SimpleDuration, D::Error>
where where
D: Deserializer<'de>, D: Deserializer<'de>,
{ {
let s = d.deserialize_string(StringVisitor)?; let s = d.deserialize_string(StringVisitor)?;
let s = s.trim();
let mut duration = Duration::ZERO; let suffix = s.chars().rev().next().unwrap();
let value = &s[..s.len() - suffix.len_utf8()];
for part in s.split_whitespace() { let value: u64 = value.parse().expect("failed to parse duration value");
if part.len() < 2 { let value = value as i64;
continue;
}
let suffix = part.chars().rev().next().unwrap(); use SimpleDuration::*;
let value = &part[..part.len() - suffix.len_utf8()]; Ok(match suffix.to_ascii_lowercase() {
's' => Seconds(value),
let value: u32 = value.parse().expect("failed to parse duration value"); 'm' => Minutes(value),
'h' => Hours(value),
let second: Duration = Duration::from_secs(1); 'd' => Days(value),
let minute: Duration = second * 60; 'w' => Weeks(value),
let hour: Duration = minute * 60; _ => panic!("unknown unit of duration"),
let day: Duration = hour * 24; })
let week: Duration = day * 7;
let year: Duration = day * 365;
let unit = match suffix.to_ascii_lowercase() {
's' => second,
'm' => minute,
'h' => hour,
'd' => day,
'w' => week,
'y' => year,
_ => panic!("unknown unit of duration"),
};
duration += unit * value;
}
if duration == Duration::ZERO {
panic!("Invalid duration: Zero");
}
Ok(duration)
} }
struct StringVisitor; struct StringVisitor;

View File

@ -1,13 +1,13 @@
mod config; mod config;
use chrono::{DateTime, FixedOffset, Local};
#[macro_use] #[macro_use]
extern crate log; extern crate log;
use chrono::{DateTime, Duration, FixedOffset, Local};
use clap::Parser; use clap::Parser;
use config::Config; use config::{ConfPeriod, Config, SimpleDuration};
use log::LevelFilter; use log::LevelFilter;
use std::collections::{BinaryHeap, HashSet}; use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs; use std::fs;
use std::io; use std::io;
use std::path::PathBuf; use std::path::PathBuf;
@ -40,9 +40,6 @@ enum Error {
#[error("Failed to parse config: {0}")] #[error("Failed to parse config: {0}")]
ParseConfig(#[from] toml::de::Error), ParseConfig(#[from] toml::de::Error),
#[error("Managed to overflow a DateTime. What did you do??")]
DateTimeOverflow,
#[error("Failed to delete btrfs subvolume: {0}")] #[error("Failed to delete btrfs subvolume: {0}")]
DeleteSubvolume(String), DeleteSubvolume(String),
} }
@ -90,7 +87,8 @@ fn run(opt: &Opt) -> Result<(), Error> {
} }
let files = files.into_sorted_vec(); let files = files.into_sorted_vec();
let keep_files = check_files_to_keep(&config, &files)?; let now = Local::now();
let keep_files = check_files_to_keep(now, &config.periods, &files)?;
info!("final decision:"); info!("final decision:");
for &file in &files { for &file in &files {
@ -111,74 +109,64 @@ fn run(opt: &Opt) -> Result<(), Error> {
Ok(()) Ok(())
} }
fn check_files_to_keep(config: &Config, files: &[FileName]) -> Result<HashSet<FileName>, Error> { fn check_files_to_keep(
now: DateTime<Local>,
periods: &[ConfPeriod],
files: &[FileName],
) -> Result<HashSet<FileName>, Error> {
let mut files = files.to_vec(); let mut files = files.to_vec();
let mut keep_files = HashSet::new(); debug_assert_eq!(
files,
{
let mut sorted = files.clone();
sorted.sort();
sorted
},
"file list must be sorted"
);
let mut chunked_files = HashMap::new();
let now = Local::now();
let mut cursor = now; let mut cursor = now;
for period in &config.periods { 'period: for period in periods {
if files.is_empty() { let first_chunk = ChunkTime::of(period, cursor);
trace!("no more files, skipping remaining periods"); let start_index = first_chunk.index();
break; let stop_index = start_index - period.chunk_count();
}
let period_length = chrono::Duration::from_std(period.period_length) trace!("period {period:?}:");
.map_err(|_| Error::DateTimeOverflow)?; trace!(" first chunk: {first_chunk:?}");
let chunk_size = trace!(" index range: {start_index}..{stop_index}");
chrono::Duration::from_std(period.chunk_size).map_err(|_| Error::DateTimeOverflow)?;
if period_length < chunk_size { 'chunk: loop {
panic!("invalid period configuration"); let file = match files.pop() {
} Some(file) => file,
None => break 'period,
};
// NOTE: we are looking backwards in time, so all checks and additions need to be inverted let file_chunk = ChunkTime::of(period, file.into());
let period_end = cursor - period_length;
while cursor > period_end { let index = file_chunk.index();
if files.is_empty() {
trace!("no more files, skipping remaining chunks"); trace!("{file}:");
break; trace!(" comparing to period {period:?}");
trace!(" is in chunk {file_chunk:?}");
trace!(" with index {index}");
if index <= stop_index {
trace!(" not in this period, checking next");
files.push(file);
cursor = file.into();
break 'chunk;
} }
let start_of_chunk = cursor; trace!(" keeping for this period");
let end_of_chunk = cursor - chunk_size; chunked_files.insert((period, file_chunk), file);
cursor = end_of_chunk;
let mut chunk_file_to_keep = None;
trace!("processing chunk {end_of_chunk} -> {start_of_chunk}");
loop {
let file = match files.pop() {
Some(file) => file,
None => break,
};
if file > start_of_chunk {
trace!("{file} outside of chunk bounds. ignoring.");
keep_files.insert(file);
} else if file > end_of_chunk {
trace!("{file} is in chunk. beaten by {chunk_file_to_keep:?}");
chunk_file_to_keep.get_or_insert(file);
} else {
trace!("reached end of chunk");
files.push(file); // put the file back in the queue
break;
}
}
if let Some(file) = chunk_file_to_keep {
trace!("keeping files {file}");
keep_files.insert(file);
}
} }
cursor = period_end;
} }
Ok(keep_files) Ok(chunked_files.values().copied().collect())
} }
fn delete_file(config: &Config, file: FileName) -> Result<(), Error> { fn delete_file(config: &Config, file: FileName) -> Result<(), Error> {
@ -197,15 +185,311 @@ fn delete_file(config: &Config, file: FileName) -> Result<(), Error> {
.unwrap_or_else(|_| "Failed to capture stderr".to_string()); .unwrap_or_else(|_| "Failed to capture stderr".to_string());
return Err(Error::DeleteSubvolume(msg)); return Err(Error::DeleteSubvolume(msg));
}; };
} else if file_path.is_dir() {
trace!("rm -r {file_path:?}");
fs::remove_dir_all(file_path)?;
} else { } else {
if file_path.is_dir() { trace!("rm {file_path:?}");
trace!("rm -r {file_path:?}"); fs::remove_file(file_path)?;
fs::remove_dir_all(file_path)?;
} else {
trace!("rm {file_path:?}");
fs::remove_file(file_path)?;
}
} }
Ok(()) Ok(())
} }
const EPOCH_STR: &str = "1900-01-01T00:00:00+00:00";
fn epoch() -> DateTime<Local> {
DateTime::parse_from_rfc3339(EPOCH_STR)
.expect("Failed to parse epoch")
.into()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ChunkTime {
/// The value of the time of the chunk, e.g. how many seconds
pub value: i64,
/// The time unit of the chunk, e.g. seconds
pub unit: fn(i64) -> Duration,
/// A number of whole [unit]s since the epoch
///
/// This value corresponds to a time within the chunk
pub since_epoch: i64,
}
impl ChunkTime {
//pub fn next(self) -> ChunkTime {
// Self {
// since_epoch: self.since_epoch + self.value,
// ..self
// }
//}
pub fn index(&self) -> i64 {
self.since_epoch / self.value
}
pub fn start(&self) -> DateTime<Local> {
/// compute the largest multiple of `b`, that is smaller than `a`
fn last_mul_of(a: i64, b: i64) -> i64 {
a / b * b
}
epoch() + (self.unit)(last_mul_of(self.since_epoch, self.value))
}
pub fn of(period: &ConfPeriod, time: DateTime<Local>) -> Self {
let since_epoch = time - epoch();
use SimpleDuration::*;
match period.chunk_size {
Seconds(s) => ChunkTime {
unit: Duration::seconds,
value: s,
since_epoch: since_epoch.num_seconds(),
},
Minutes(m) => ChunkTime {
unit: Duration::minutes,
value: m,
since_epoch: since_epoch.num_minutes(),
},
Hours(h) => ChunkTime {
unit: Duration::hours,
value: h,
since_epoch: since_epoch.num_hours(),
},
Days(d) => ChunkTime {
unit: Duration::days,
value: d,
since_epoch: since_epoch.num_days(),
},
Weeks(w) => ChunkTime {
unit: Duration::weeks,
value: w,
since_epoch: since_epoch.num_weeks(),
},
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::config::{ConfPeriod, SimpleDuration};
use chrono::DateTime;
#[test]
fn chunk_of_period_hours() {
let period = ConfPeriod {
period_length: SimpleDuration::Weeks(1),
chunk_size: SimpleDuration::Hours(12),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2020-01-01T12:00:00+00:00", // expected chunk start
87659, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T12:00:00+00:00",
87661,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-03T12:00:00+00:00",
87663,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-04T12:00:00+00:00",
87665,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn chunk_of_period_days() {
let period = ConfPeriod {
period_length: SimpleDuration::Days(15),
chunk_size: SimpleDuration::Days(3),
};
let tests = [
(
"2020-01-01T12:00:00+00:00", // time
"2019-12-30T00:00:00+00:00", // expected chunk start
14609, // expected chunk index
),
(
"2020-01-02T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-03T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
(
"2020-01-04T12:00:00+00:00",
"2020-01-02T00:00:00+00:00",
14610,
),
];
for (time, expected_chunk_start, expected_chunk_index) in tests {
let time: DateTime<Local> = DateTime::parse_from_rfc3339(time).unwrap().into();
let expected_chunk_start = DateTime::parse_from_rfc3339(expected_chunk_start).unwrap();
let chunk = ChunkTime::of(&period, time);
assert_eq!(chunk.start(), expected_chunk_start);
assert_eq!(chunk.index(), expected_chunk_index);
}
}
#[test]
fn delete_files() {
use SimpleDuration::*;
let periods = [
ConfPeriod {
period_length: Hours(6),
chunk_size: Seconds(1),
},
ConfPeriod {
period_length: Hours(6),
chunk_size: Hours(1),
},
ConfPeriod {
period_length: Days(8),
chunk_size: Days(2),
},
];
let input = [
"2020-01-01T01:00:00+00:00",
"2020-01-01T02:00:00+00:00",
"2020-01-01T03:00:00+00:00",
"2020-01-01T04:00:00+00:00",
"2020-01-01T05:00:00+00:00",
"2020-01-01T06:00:00+00:00",
"2020-01-01T07:00:00+00:00",
"2020-01-01T08:00:00+00:00",
"2020-01-01T09:00:00+00:00",
"2020-01-01T10:00:00+00:00",
"2020-01-01T10:00:32+00:00",
"2020-01-01T10:00:33+00:00",
"2020-01-01T10:00:34+00:00",
"2020-01-01T11:00:00+00:00",
"2020-01-01T12:00:00+00:00",
"2020-01-01T13:00:00+00:00",
"2020-01-01T14:00:00+00:00",
"2020-01-01T15:00:00+00:00",
"2020-01-01T16:00:00+00:00",
"2020-01-01T17:00:00+00:00",
"2020-01-01T18:00:00+00:00",
"2020-01-01T19:00:00+00:00",
"2020-01-01T20:00:00+00:00",
"2020-01-01T21:00:00+00:00",
"2020-01-01T22:00:00+00:00",
"2020-01-01T23:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-02T01:00:00+00:00",
"2020-01-02T02:00:00+00:00",
"2020-01-02T03:00:00+00:00",
"2020-01-02T04:00:00+00:00",
"2020-01-02T05:00:00+00:00",
"2020-01-02T06:00:00+00:00",
"2020-01-02T07:00:00+00:00",
"2020-01-02T08:00:00+00:00",
"2020-01-02T09:00:00+00:00",
"2020-01-02T10:00:00+00:00",
"2020-01-02T11:00:00+00:00",
"2020-01-02T12:00:00+00:00",
"2020-01-02T13:00:00+00:00",
"2020-01-02T14:00:00+00:00",
"2020-01-02T15:00:00+00:00",
"2020-01-02T16:00:00+00:00",
"2020-01-02T17:00:00+00:00",
"2020-01-02T18:00:00+00:00",
"2020-01-02T19:00:00+00:00",
"2020-01-02T20:00:00+00:00",
"2020-01-02T21:00:00+00:00",
"2020-01-02T22:00:00+00:00",
"2020-01-02T23:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T01:00:00+00:00",
"2020-01-03T02:00:00+00:00",
"2020-01-03T03:00:00+00:00",
"2020-01-03T04:00:00+00:00",
"2020-01-03T05:00:00+00:00",
"2020-01-03T06:00:00+00:00",
"2020-01-03T07:00:00+00:00",
"2020-01-03T08:00:00+00:00",
"2020-01-03T09:00:00+00:00",
"2020-01-03T10:00:00+00:00",
"2020-01-03T11:00:00+00:00",
"2020-01-03T12:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T14:00:10+00:00",
"2020-01-03T14:00:20+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let input = input.map(|date| DateTime::parse_from_rfc3339(date).unwrap());
let expected_output = [
"2020-01-01T01:00:00+00:00",
"2020-01-02T00:00:00+00:00",
"2020-01-03T00:00:00+00:00",
"2020-01-03T13:00:00+00:00",
"2020-01-03T14:00:00+00:00",
"2020-01-03T15:00:00+00:00",
"2020-01-03T16:00:00+00:00",
"2020-01-03T17:00:00+00:00",
"2020-01-03T18:00:00+00:00",
"2020-01-03T19:00:00+00:00",
"2020-01-03T20:00:00+00:00",
"2020-01-03T21:00:00+00:00",
"2020-01-03T22:00:30+00:00",
"2020-01-03T22:00:31+00:00",
"2020-01-03T22:00:32+00:00",
"2020-01-03T22:00:33+00:00",
"2020-01-03T23:00:00+00:00",
];
let expected_output: HashSet<_> = expected_output
.into_iter()
.map(|date| DateTime::parse_from_rfc3339(date).unwrap())
.collect();
let start_time = DateTime::parse_from_rfc3339("2020-01-04T00:00:00+00:00").unwrap();
let output = check_files_to_keep(start_time.into(), &periods, &input).unwrap();
assert_eq!(output, expected_output);
}
}