diff --git a/Cargo.lock b/Cargo.lock index be82bb7..a14b5ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,7 @@ dependencies = [ "proptest", "rand 0.8.3", "rayon", + "reed-solomon-erasure", "rust-crypto", "serde", "serde_cbor", @@ -186,9 +187,9 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10bcb9d7dcbf7002aaffbb53eac22906b64cdcc127971dcc387d8eb7c95d5560" +checksum = "e8f45d9ad417bcef4817d614a501ab55cdd96a6fdb24f49aab89a54acfd66b19" dependencies = [ "quote", "syn", @@ -346,9 +347,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.84" +version = "0.2.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cca32fa0182e8c0989459524dc356b8f2b5c10f1b9eb521b7d182c03cf8c5ff" +checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3" [[package]] name = "log" @@ -640,6 +641,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "reed-solomon-erasure" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a415a013dd7c5d4221382329a5a3482566da675737494935cbbbcdec04662f9d" +dependencies = [ + "cc", + "libc", + "smallvec", +] + [[package]] name = "regex-syntax" version = "0.6.22" @@ -739,9 +751,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.61" +version = "1.0.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" +checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486" dependencies = [ "itoa", "ryu", @@ -750,9 +762,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7aab86fe2149bad8c507606bdb3f4ef5e7b2380eb92350f56122cca72a42a8" +checksum = "fa827a14b29ab7f44778d14a88d3cb76e949c45083f7dbfa507d0cb699dc12de" dependencies = [ "block-buffer", "cfg-if 1.0.0", @@ -761,6 +773,12 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + [[package]] name = "syn" version = "1.0.60" diff --git a/Cargo.toml b/Cargo.toml index 959e72d..5a6f17e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,12 +19,13 @@ log = "0.4" nix = "0.19" rand = "0.8" rayon = "1.5" +reed-solomon-erasure = { version = "4.0", features = ["simd-accel"] } rust-crypto = "0.2" serde = { version = "1.0", features = ["derive"] } serde_cbor = "0.11" serde_json = "1.0" sha2 = "0.9" -tempfile = "3.1" +tempfile = "3.2" thiserror = "1.0" uuid = { version = "0.8", features = ["v4"] } vfs = "0.4" diff --git a/src/index/io.rs b/src/index/io.rs index 1b98459..2527b79 100644 --- a/src/index/io.rs +++ b/src/index/io.rs @@ -12,6 +12,8 @@ use lock::Lock; use nix::unistd::getpid; use std::{cmp::max, io::Write}; +const DATA_SHARD_SIZE: u16 = 4096; + impl Index { pub fn load(repository_path: &VfsPath) -> Result { if !repository_path.exists() { @@ -66,21 +68,31 @@ impl Index { .context(format!("create index directory at {}", index_file_path.as_str()))?), }?; - let contents; + let serialised = serde_json::to_string(&self)?; + + let bytes = serialised.as_bytes(); + let encoded = Index::encode(bytes)?; + { let mut file = index_file_path.create_file()?; - contents = serde_json::to_string(&self)?; - file.write_all(contents.as_bytes()).context("writing index to disk")?; + file.write_all(encoded).context("writing index to disk")?; file.flush()?; } - let readback = index_file_path.read_to_string()?; - if readback != contents { + + let mut file = index_file_path.open_file()?; + let mut readback = vec![]; + file.read_to_end(&mut readback)?; + if readback != encoded { Err(anyhow!("index readback incorrect")) } else { Ok(()) } } + fn encode(bytes: &[u8]) -> Result<&[u8]> { + Ok(bytes) + } + fn load_from_file(index_file_path: &VfsPath) -> Result { let index_text = index_file_path .read_to_string() @@ -117,6 +129,7 @@ mod must { use crate::index::Index; use anyhow::Result; + use rand::Rng; use vfs::{MemoryFS, VfsPath}; #[test] @@ -146,4 +159,39 @@ mod must { Ok(()) } + + #[test] + fn survive_file_corruption() -> Result<()> { + let repository_path: VfsPath = MemoryFS::new().into(); + let mut original = Index::new()?; + + original.save(&repository_path)?; + corrupt(&repository_path)?; + let loaded = Index::load(&repository_path)?; + + assert_eq!(original, loaded); + + Ok(()) + } + + fn corrupt(repository_path: &VfsPath) -> Result<()> { + let index_file_path = &Index::index_file_path_for_repository_path(&repository_path)?; + + let size = dbg!(index_file_path.metadata()?.len as usize); + let corrupt_byte_index = rand::thread_rng().gen_range::(0..size); + + let corrupted = { + let mut file = index_file_path.open_file()?; + let mut buffer = vec![]; + file.read_to_end(&mut buffer)?; + buffer[corrupt_byte_index] = rand::thread_rng().gen::(); + buffer + }; + { + let mut file = index_file_path.create_file()?; + file.write_all(&corrupted)?; + } + + Ok(()) + } }