Store duplicated data just once
This commit is contained in:
parent
c0aa4ed22d
commit
62a0e0981a
3 changed files with 49 additions and 3 deletions
|
@ -1,4 +1,4 @@
|
|||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{fmt, fs, io};
|
||||
|
||||
use crate::error::BakareError;
|
||||
|
@ -10,6 +10,7 @@ use sha2::Sha512;
|
|||
use std::fmt::Formatter;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// represents a place where backup is stored an can be restored from.
|
||||
/// right now only on-disk directory storage is supported
|
||||
|
@ -21,6 +22,8 @@ pub struct Repository<'a> {
|
|||
index: Index,
|
||||
}
|
||||
|
||||
const DATA_DIR_NAME: &str = "data";
|
||||
|
||||
#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize, Hash)]
|
||||
pub struct ItemId(Box<[u8]>);
|
||||
|
||||
|
@ -95,7 +98,8 @@ impl<'a> Repository<'a> {
|
|||
return Err(BakareError::PathToStoreNotAbsolute);
|
||||
}
|
||||
let id = Repository::calculate_id(source_path)?;
|
||||
let destination_path = self.path.join(id.to_string());
|
||||
let destination_path = self.data_dir();
|
||||
let destination_path = destination_path.join(id.to_string());
|
||||
let destination_path = Path::new(&destination_path);
|
||||
|
||||
if source_path.is_file() {
|
||||
|
@ -126,6 +130,20 @@ impl<'a> Repository<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn data_weight(&self) -> Result<u64, BakareError> {
|
||||
let total_size = WalkDir::new(self.data_dir())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len());
|
||||
Ok(total_size)
|
||||
}
|
||||
|
||||
fn data_dir(&self) -> PathBuf {
|
||||
self.path().join(DATA_DIR_NAME)
|
||||
}
|
||||
|
||||
fn calculate_id(source_path: &Path) -> Result<ItemId, BakareError> {
|
||||
let source_file = File::open(source_path)?;
|
||||
let mut reader = BufReader::new(source_file);
|
||||
|
|
|
@ -97,6 +97,13 @@ pub fn backup_file_with_contents(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn data_weight(repository_path: &Path) -> Result<u64, BakareError> {
|
||||
{
|
||||
let repository = Repository::open(repository_path)?;
|
||||
Ok(repository.data_weight()?)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_directory_trees_have_same_contents(left: &Path, right: &Path) -> Result<(), BakareError> {
|
||||
let left_files = get_sorted_files_recursively(left)?;
|
||||
let right_files = get_sorted_files_recursively(right)?;
|
||||
|
|
|
@ -80,6 +80,28 @@ fn newer_version_should_be_greater_than_earlier_version() -> Result<(), BakareEr
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn store_duplicated_files_just_once() -> Result<(), BakareError> {
|
||||
let source = TempSource::new()?;
|
||||
let repository_path = &tempdir()?.into_path();
|
||||
Repository::init(repository_path)?;
|
||||
assert_eq!(data_weight(&repository_path)?, 0);
|
||||
|
||||
let contents = "some contents";
|
||||
|
||||
backup_file_with_contents(&source, &repository_path, "1", contents)?;
|
||||
let first_weight = data_weight(&repository_path)?;
|
||||
assert!(first_weight > 0);
|
||||
|
||||
backup_file_with_contents(&source, &repository_path, "2", contents)?;
|
||||
let second_weight = data_weight(&repository_path)?;
|
||||
assert_eq!(first_weight, second_weight);
|
||||
|
||||
assert_restored_has_contents(repository_path, &source.file_path("1"), contents)?;
|
||||
assert_restored_has_contents(repository_path, &source.file_path("2"), contents)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restore_latest_version_by_default() -> Result<(), BakareError> {
|
||||
let source = TempSource::new()?;
|
||||
|
@ -110,6 +132,5 @@ fn forbid_backup_of_paths_within_repository() -> Result<(), BakareError> {
|
|||
}
|
||||
|
||||
// TODO: test concurrent writes
|
||||
// TODO: deduplicate data
|
||||
// TODO: test that index is stored separately from data
|
||||
// TODO: index corruption
|
||||
|
|
Loading…
Reference in a new issue