WIP on adding verions support

This commit is contained in:
Cyryl Płotnicki 2019-09-23 12:18:18 +01:00
parent 98a6f094be
commit 3ac27c1021
6 changed files with 216 additions and 136 deletions

View file

@ -4,27 +4,65 @@ use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::error::BakareError;
use crate::repository::ItemVersion;
use crate::error::BakareError::RepositoryPathNotAbsolute;
use crate::repository::{ItemId, Version};
use crate::repository_item::RepositoryItem;
use std::collections::hash_map::Iter;
use std::collections::HashMap;
#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize)]
pub struct IndexItem {
relative_path: String,
original_source_path: String,
version: ItemVersion,
id: ItemId,
version: Version,
}
#[derive(Serialize, Deserialize)]
pub struct Index {
items: Vec<IndexItem>,
newest_items_by_source_path: HashMap<String, IndexItem>,
items_by_file_id: HashMap<ItemId, IndexItem>,
index_path: String,
repository_path: String,
}
pub struct IndexItemIterator<'a> {
iterator: Iter<'a, String, IndexItem>,
}
impl<'a> Iterator for IndexItemIterator<'a> {
type Item = IndexItem;
fn next(&mut self) -> Option<Self::Item> {
self.iterator.next().map(|i| i.1.clone())
}
}
impl IndexItem {
fn from(original_source_path: String, relative_path: String, id: ItemId, version: Version) -> IndexItem {
IndexItem {
relative_path,
original_source_path,
id,
version,
}
}
fn next_version(&self, id: ItemId) -> IndexItem {
IndexItem {
relative_path: self.relative_path.clone(),
original_source_path: self.original_source_path.clone(),
version: self.version.next(),
id,
}
}
}
impl Index {
pub fn new(repository_path: &Path) -> Self {
Index {
items: vec![],
newest_items_by_source_path: Default::default(),
items_by_file_id: Default::default(),
index_path: repository_path.join("index").to_string_lossy().to_string(),
repository_path: repository_path.to_string_lossy().to_string(),
}
@ -48,32 +86,67 @@ impl Index {
}
pub fn len(&self) -> usize {
self.items.len()
self.items_by_file_id.len()
}
pub fn is_empty(&self) -> bool {
self.items.is_empty()
self.items_by_file_id.is_empty()
}
pub fn iter(&self) -> IndexIterator {
IndexIterator {
index: self,
current_item_number: 0,
}
pub fn remember(&mut self, original_source_path: &Path, absolute_path: &Path, relative_path: &Path, id: ItemId) {
let item = if let Some(old) = self
.newest_items_by_source_path
.get(&original_source_path.to_string_lossy().to_string())
{
old.next_version(id)
} else {
IndexItem::from(
original_source_path.to_string_lossy().to_string(),
relative_path.to_string_lossy().to_string(),
id,
Version::default(),
)
};
self.items_by_file_id.insert(item.id.clone(), item.clone());
self.newest_items_by_source_path
.insert(original_source_path.to_string_lossy().to_string(), item.clone());
}
pub fn remember(&mut self, item: RepositoryItem) {
self.items.push(item.into());
}
fn repository_item(&self, i: &IndexItem) -> RepositoryItem {
pub fn repository_item(&self, i: &IndexItem) -> RepositoryItem {
let index_item = i.clone();
let relative_path = Path::new(&index_item.relative_path);
let repository_path = Path::new(&self.repository_path);
let original_source_path = Path::new(&index_item.original_source_path);
let absolute_path = repository_path.join(relative_path);
let absolute_path = absolute_path.as_path();
RepositoryItem::from(original_source_path, absolute_path, relative_path, index_item.version)
RepositoryItem::from(
original_source_path,
absolute_path,
relative_path,
index_item.id,
index_item.version,
)
}
pub fn newest_item_by_source_path(&self, path: &Path) -> Result<Option<IndexItem>, BakareError> {
if !path.is_absolute() {
return Err(BakareError::RepositoryPathNotAbsolute);
}
Ok(self
.newest_items_by_source_path
.get(&path.to_string_lossy().to_string())
.cloned())
}
pub fn item_by_id(&self, id: &ItemId) -> Result<Option<IndexItem>, BakareError> {
Ok(self.items_by_file_id.get(id).map(|i| i.clone()))
}
pub fn newest_items(&self) -> IndexItemIterator {
IndexItemIterator {
iterator: self.newest_items_by_source_path.iter(),
}
}
}
@ -82,27 +155,8 @@ impl From<RepositoryItem> for IndexItem {
IndexItem {
relative_path: i.relative_path().to_string_lossy().to_string(),
original_source_path: i.original_source_path().to_string_lossy().to_string(),
id: i.id().clone(),
version: i.version().clone(),
}
}
}
pub struct IndexIterator<'a> {
index: &'a Index,
current_item_number: usize,
}
impl<'a> Iterator for IndexIterator<'a> {
type Item = RepositoryItem;
fn next(&mut self) -> Option<Self::Item> {
if self.index.is_empty() || self.current_item_number > self.index.len() - 1 {
None
} else {
let current_item_number = self.current_item_number;
self.current_item_number += 1;
let index_item = &self.index.items[current_item_number];
Some(self.index.repository_item(index_item))
}
}
}

View file

@ -2,7 +2,7 @@ use std::path::Path;
use std::{fmt, fs, io};
use crate::error::BakareError;
use crate::index::{Index, IndexIterator};
use crate::index::{Index, IndexItem, IndexItemIterator};
use crate::repository_item::RepositoryItem;
use serde::{Deserialize, Serialize};
use sha2::Digest;
@ -10,6 +10,7 @@ use sha2::Sha512;
use std::fmt::Formatter;
use std::fs::File;
use std::io::BufReader;
use std::ops::Add;
/// represents a place where backup is stored an can be restored from.
/// right now only on-disk directory storage is supported
@ -21,39 +22,55 @@ pub struct Repository<'a> {
index: Index,
}
#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize)]
pub struct ItemVersion(Box<[u8]>);
#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize, Hash)]
pub struct ItemId(Box<[u8]>);
impl AsRef<[u8]> for ItemVersion {
#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq, Serialize, Deserialize, Hash)]
pub struct Version(u128);
pub struct RepositoryItemIterator<'a> {
iterator: IndexItemIterator<'a>,
index: &'a Index,
}
impl<'a> Iterator for RepositoryItemIterator<'a> {
type Item = RepositoryItem;
fn next(&mut self) -> Option<Self::Item> {
self.iterator.next().map(|i| self.index.repository_item(&i))
}
}
impl Version {
pub fn next(&self) -> Self {
Version(self.0 + 1)
}
}
impl Default for Version {
fn default() -> Self {
Version(1)
}
}
impl AsRef<[u8]> for ItemId {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl From<&[u8]> for ItemVersion {
impl From<&[u8]> for ItemId {
fn from(a: &[u8]) -> Self {
ItemVersion(Box::from(a))
ItemId(Box::from(a))
}
}
impl fmt::Display for ItemVersion {
impl fmt::Display for ItemId {
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", hex::encode(self))
}
}
pub struct RepositoryIterator<'a> {
index_iterator: IndexIterator<'a>,
}
impl<'a> Iterator for RepositoryIterator<'a> {
type Item = RepositoryItem;
fn next(&mut self) -> Option<Self::Item> {
self.index_iterator.next()
}
}
impl<'a> Repository<'a> {
pub fn init(path: &Path) -> Result<(), BakareError> {
let index = Index::new(path);
@ -70,12 +87,6 @@ impl<'a> Repository<'a> {
Ok(Repository { path, index })
}
pub fn iter(&self) -> RepositoryIterator {
RepositoryIterator {
index_iterator: self.index.iter(),
}
}
pub fn path(&self) -> &Path {
&self.path
}
@ -84,26 +95,40 @@ impl<'a> Repository<'a> {
if !source_path.is_absolute() {
return Err(BakareError::PathToStoreNotAbsolute);
}
let version = Repository::calculate_version(source_path)?;
let destination_path = self.path.join(version.to_string());
let id = Repository::calculate_id(source_path)?;
let destination_path = self.path.join(id.to_string());
let destination_path = Path::new(&destination_path);
if source_path.is_file() {
fs::create_dir_all(destination_path.parent().unwrap())?;
fs::copy(source_path, destination_path)?;
self.index.remember(RepositoryItem::from(
source_path,
destination_path,
destination_path.strip_prefix(self.path)?,
version,
));
self.index
.remember(source_path, destination_path, destination_path.strip_prefix(self.path)?, id);
self.index.save()?;
}
Ok(())
}
fn calculate_version(source_path: &Path) -> Result<ItemVersion, BakareError> {
pub fn newest_item_by_source__path(&self, path: &Path) -> Result<Option<RepositoryItem>, BakareError> {
Ok(self
.index
.newest_item_by_source_path(path)?
.map(|i| self.index.repository_item(&i)))
}
pub fn item_by_id(&self, id: &ItemId) -> Result<Option<RepositoryItem>, BakareError> {
self.index.item_by_id(id).map(|i| i.map(|i| self.index.repository_item(&i)))
}
pub fn newest_items(&self) -> RepositoryItemIterator {
RepositoryItemIterator {
iterator: self.index.newest_items(),
index: &self.index,
}
}
fn calculate_id(source_path: &Path) -> Result<ItemId, BakareError> {
let source_file = File::open(source_path)?;
let mut reader = BufReader::new(source_file);
let mut hasher = Sha512::new();
@ -112,25 +137,4 @@ impl<'a> Repository<'a> {
Ok(hasher.result().as_slice().into())
}
pub fn item_by_source_path_and_version(
&self,
path: &Path,
version: &ItemVersion,
) -> Result<Option<RepositoryItem>, BakareError> {
if !path.is_absolute() {
return Err(BakareError::RepositoryPathNotAbsolute);
}
Ok(self
.iter()
.find(|i| i.original_source_path() == path && i.version() == version))
}
pub fn item_by_source_path(&self, path: &Path) -> Result<Option<RepositoryItem>, BakareError> {
if !path.is_absolute() {
return Err(BakareError::RepositoryPathNotAbsolute);
}
Ok(self.iter().find(|i| i.original_source_path() == path))
}
}

View file

@ -1,5 +1,5 @@
use crate::error::BakareError;
use crate::repository::ItemVersion;
use crate::repository::{ItemId, Version};
use std::fmt::{Display, Formatter};
use std::path::Path;
use std::{fmt, fs};
@ -9,15 +9,17 @@ pub struct RepositoryItem {
relative_path: Box<Path>,
absolute_path: Box<Path>,
original_source_path: Box<Path>,
version: ItemVersion,
id: ItemId,
version: Version,
}
impl RepositoryItem {
pub fn from(original_source_path: &Path, absolute_path: &Path, relative_path: &Path, version: ItemVersion) -> Self {
pub fn from(original_source_path: &Path, absolute_path: &Path, relative_path: &Path, id: ItemId, version: Version) -> Self {
RepositoryItem {
relative_path: Box::from(relative_path),
absolute_path: Box::from(absolute_path),
original_source_path: Box::from(original_source_path),
id,
version,
}
}
@ -52,9 +54,13 @@ impl RepositoryItem {
&self.original_source_path
}
pub fn version(&self) -> &ItemVersion {
pub fn version(&self) -> &Version {
&self.version
}
pub fn id(&self) -> &ItemId {
&self.id
}
}
impl Display for RepositoryItem {
@ -63,7 +69,7 @@ impl Display for RepositoryItem {
f,
"'{}' : {}",
self.original_source_path().to_string_lossy(),
hex::encode(self.version())
hex::encode(self.id())
)
}
}

View file

@ -18,7 +18,7 @@ impl<'a> Engine<'a> {
}
pub fn restore_all(&self) -> Result<(), BakareError> {
for item in self.repository.iter() {
for item in self.repository.newest_items() {
self.restore(&item)?;
}
Ok(())

View file

@ -1,4 +1,3 @@
use std::fs;
use std::fs::File;
use std::io::Read;
use std::path::Path;
@ -7,18 +6,10 @@ use tempfile::tempdir;
use walkdir::WalkDir;
use crate::error::BakareError;
use crate::repository::{ItemVersion, Repository};
use crate::repository::{ItemId, Repository};
use crate::source::TempSource;
use crate::{backup, restore};
pub fn assert_target_file_contents(restored_path: &Path, expected_contents: &str) -> Result<(), BakareError> {
let mut actual_contents = String::new();
assert!(restored_path.exists(), "Expected '{}' to be there", restored_path.display());
File::open(restored_path)?.read_to_string(&mut actual_contents)?;
assert_eq!(expected_contents, actual_contents);
Ok(())
}
pub fn assert_same_after_restore(source_path: &Path) -> Result<(), BakareError> {
let repository_path = tempdir()?.into_path();
let restore_target = tempdir()?.into_path();
@ -42,41 +33,44 @@ pub fn assert_same_after_restore(source_path: &Path) -> Result<(), BakareError>
Ok(())
}
pub fn assert_restored_from_version_has_contents(
pub fn assert_restored_has_contents(
repository_path: &Path,
source_file_full_path: &Path,
old_contents: &str,
old_version: &ItemVersion,
contents: &str,
) -> Result<(), BakareError> {
let restore_repository = Repository::open(repository_path)?;
let restore_target = tempdir()?;
let restore_engine = restore::Engine::new(&restore_repository, &restore_target.path())?;
let old_item = restore_repository.item_by_source_path_and_version(&source_file_full_path, &old_version)?;
let item = restore_repository.newest_item_by_source__path(&source_file_full_path)?;
restore_engine.restore(&item.unwrap())?;
let restored_file_path = restore_target.path().join(source_file_full_path.strip_prefix("/")?);
assert_target_file_contents(&restored_file_path, contents)
}
pub fn assert_restored_from_version_has_contents(
repository_path: &Path,
source_file_full_path: &Path,
old_contents: &str,
old_id: &ItemId,
) -> Result<(), BakareError> {
let restore_repository = Repository::open(repository_path)?;
let restore_target = tempdir()?;
let restore_engine = restore::Engine::new(&restore_repository, &restore_target.path())?;
let old_item = restore_repository.item_by_id(&old_id)?;
restore_engine.restore(&old_item.unwrap())?;
let restored_file_path = restore_target.path().join(source_file_full_path.strip_prefix("/")?);
assert_target_file_contents(&restored_file_path, old_contents)
}
pub fn item_version(repository_path: &Path, source_file_full_path: &Path) -> Result<ItemVersion, BakareError> {
let old_version = {
pub fn item_id(repository_path: &Path, source_file_full_path: &Path) -> Result<ItemId, BakareError> {
let id = {
let reading_repository = Repository::open(repository_path)?;
let item = reading_repository.item_by_source_path(&source_file_full_path)?;
let item = reading_repository.newest_item_by_source__path(&source_file_full_path)?;
assert!(item.is_some());
let item = item.unwrap();
item.version().clone()
item.id().clone()
};
Ok(old_version)
}
pub fn read_restored_file_contents(
source: TempSource,
restore_target: &Path,
source_file_relative_path: &str,
) -> Result<String, BakareError> {
let source_file_full_path = source.file_path(source_file_relative_path);
let restored_file_path = restore_target.join(source_file_full_path.strip_prefix("/")?);
let contents = fs::read_to_string(restored_file_path)?;
Ok(contents)
Ok(id)
}
pub fn restore_all_from_reloaded_repository(repository_path: &Path, restore_target: &Path) -> Result<(), BakareError> {
@ -138,3 +132,11 @@ fn get_sorted_files_recursively(path: &Path) -> Result<Vec<Box<Path>>, BakareErr
Ok(result)
}
fn assert_target_file_contents(restored_path: &Path, expected_contents: &str) -> Result<(), BakareError> {
let mut actual_contents = String::new();
assert!(restored_path.exists(), "Expected '{}' to be there", restored_path.display());
File::open(restored_path)?.read_to_string(&mut actual_contents)?;
assert_eq!(expected_contents, actual_contents);
Ok(())
}

View file

@ -20,9 +20,9 @@ fn restore_multiple_files() -> Result<(), BakareError> {
#[test]
fn restore_files_after_reopening_repository() -> Result<(), BakareError> {
let source = TempSource::new()?;
let repository_path = tempdir()?.into_path();
let repository_path = &tempdir()?.into_path();
let restore_target = tempdir()?.into_path();
Repository::init(repository_path.as_path())?;
Repository::init(repository_path)?;
let source_file_relative_path = "some file path";
let original_contents = "some old contents";
@ -31,10 +31,8 @@ fn restore_files_after_reopening_repository() -> Result<(), BakareError> {
restore_all_from_reloaded_repository(&repository_path, &restore_target)?;
let contents = read_restored_file_contents(source, &restore_target, source_file_relative_path)?;
assert_eq!(contents, original_contents);
Ok(())
let source_file_full_path = &source.file_path(source_file_relative_path);
assert_restored_has_contents(repository_path, source_file_full_path, "newest contents")
}
#[test]
@ -49,7 +47,7 @@ fn restore_older_version_of_file() -> Result<(), BakareError> {
backup_file_with_contents(&source, &repository_path, source_file_relative_path, old_contents)?;
let old_version = item_version(&repository_path, &source_file_full_path)?;
let old_version = item_id(&repository_path, &source_file_full_path)?;
let new_contents = "totally new contents";
backup_file_with_contents(&source, &repository_path, source_file_relative_path, new_contents)?;
@ -57,6 +55,21 @@ fn restore_older_version_of_file() -> Result<(), BakareError> {
assert_restored_from_version_has_contents(&repository_path, &source_file_full_path, old_contents, &old_version)
}
#[test]
fn restore_latest_version_by_default() -> Result<(), BakareError> {
let source = TempSource::new()?;
let repository_path = &tempdir()?.into_path();
Repository::init(repository_path)?;
let source_file_relative_path = "some path";
backup_file_with_contents(&source, &repository_path, source_file_relative_path, "old contents")?;
backup_file_with_contents(&source, &repository_path, source_file_relative_path, "newer contents")?;
backup_file_with_contents(&source, &repository_path, source_file_relative_path, "newest contents")?;
let source_file_full_path = &source.file_path(source_file_relative_path);
assert_restored_has_contents(repository_path, source_file_full_path, "newest contents")
}
#[test]
fn forbid_backup_of_paths_within_repository() -> Result<(), BakareError> {
let repository_path = &tempdir()?.into_path();
@ -71,7 +84,8 @@ fn forbid_backup_of_paths_within_repository() -> Result<(), BakareError> {
Ok(())
}
// TODO: restore latest version by default
// TODO: deduplicate data
// TODO: test that index is stored separately from data
// TODO: index corruption
// TODO: newer version should be greater than older version
// TODO: split version into file id and version