diff --git a/src/repository.rs b/src/repository.rs index 011b06f..3402ab3 100644 --- a/src/repository.rs +++ b/src/repository.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use std::{fmt, fs, io}; use crate::error::BakareError; @@ -10,6 +10,7 @@ use sha2::Sha512; use std::fmt::Formatter; use std::fs::File; use std::io::BufReader; +use walkdir::WalkDir; /// represents a place where backup is stored an can be restored from. /// right now only on-disk directory storage is supported @@ -21,6 +22,8 @@ pub struct Repository<'a> { index: Index, } +const DATA_DIR_NAME: &str = "data"; + #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize, Hash)] pub struct ItemId(Box<[u8]>); @@ -95,7 +98,8 @@ impl<'a> Repository<'a> { return Err(BakareError::PathToStoreNotAbsolute); } let id = Repository::calculate_id(source_path)?; - let destination_path = self.path.join(id.to_string()); + let destination_path = self.data_dir(); + let destination_path = destination_path.join(id.to_string()); let destination_path = Path::new(&destination_path); if source_path.is_file() { @@ -126,6 +130,20 @@ impl<'a> Repository<'a> { } } + pub fn data_weight(&self) -> Result { + let total_size = WalkDir::new(self.data_dir()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()); + Ok(total_size) + } + + fn data_dir(&self) -> PathBuf { + self.path().join(DATA_DIR_NAME) + } + fn calculate_id(source_path: &Path) -> Result { let source_file = File::open(source_path)?; let mut reader = BufReader::new(source_file); diff --git a/src/test/assertions.rs b/src/test/assertions.rs index 91a85df..d3733e7 100644 --- a/src/test/assertions.rs +++ b/src/test/assertions.rs @@ -97,6 +97,13 @@ pub fn backup_file_with_contents( } } +pub fn data_weight(repository_path: &Path) -> Result { + { + let repository = Repository::open(repository_path)?; + Ok(repository.data_weight()?) + } +} + fn assert_directory_trees_have_same_contents(left: &Path, right: &Path) -> Result<(), BakareError> { let left_files = get_sorted_files_recursively(left)?; let right_files = get_sorted_files_recursively(right)?; diff --git a/tests/system_tests.rs b/tests/system_tests.rs index 8fd9818..c145eea 100644 --- a/tests/system_tests.rs +++ b/tests/system_tests.rs @@ -80,6 +80,28 @@ fn newer_version_should_be_greater_than_earlier_version() -> Result<(), BakareEr Ok(()) } +#[test] +fn store_duplicated_files_just_once() -> Result<(), BakareError> { + let source = TempSource::new()?; + let repository_path = &tempdir()?.into_path(); + Repository::init(repository_path)?; + assert_eq!(data_weight(&repository_path)?, 0); + + let contents = "some contents"; + + backup_file_with_contents(&source, &repository_path, "1", contents)?; + let first_weight = data_weight(&repository_path)?; + assert!(first_weight > 0); + + backup_file_with_contents(&source, &repository_path, "2", contents)?; + let second_weight = data_weight(&repository_path)?; + assert_eq!(first_weight, second_weight); + + assert_restored_has_contents(repository_path, &source.file_path("1"), contents)?; + assert_restored_has_contents(repository_path, &source.file_path("2"), contents)?; + Ok(()) +} + #[test] fn restore_latest_version_by_default() -> Result<(), BakareError> { let source = TempSource::new()?; @@ -110,6 +132,5 @@ fn forbid_backup_of_paths_within_repository() -> Result<(), BakareError> { } // TODO: test concurrent writes -// TODO: deduplicate data // TODO: test that index is stored separately from data // TODO: index corruption