Prototype hashing

main
Cyryl Płotnicki 2023-01-21 10:45:40 +00:00
parent 4560bd6518
commit 7b4734bfe4
8 changed files with 11150 additions and 12 deletions

2
.envrc Normal file
View File

@ -0,0 +1,2 @@
use flake

13
.gitignore vendored
View File

@ -1,16 +1,5 @@
# ---> Rust
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
.direnv/
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

1618
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

19
Cargo.toml Normal file
View File

@ -0,0 +1,19 @@
[package]
name = "neosync"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1"
camino = "1"
chrono = { version = "0.4", features = ["serde"] }
hex ="0.4"
indicatif = "0.17"
jwalk = "0.8"
rayon = "1"
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha1 = "0.10"
structopt = "0.3"
tokio = { version = "1", features = ["full"] }

140
flake.lock Normal file
View File

@ -0,0 +1,140 @@
{
"nodes": {
"advisory-db": {
"flake": false,
"locked": {
"lastModified": 1674256015,
"narHash": "sha256-fII7UccIXtdURqQHRnNFzeI51c+uloaaFcJKqrvIZVI=",
"owner": "rustsec",
"repo": "advisory-db",
"rev": "ac877f8184b76dc295d1105dfaecfc00420d9334",
"type": "github"
},
"original": {
"owner": "rustsec",
"repo": "advisory-db",
"type": "github"
}
},
"crane": {
"inputs": {
"flake-compat": "flake-compat",
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
],
"rust-overlay": "rust-overlay"
},
"locked": {
"lastModified": 1673405853,
"narHash": "sha256-6Nq9DuOo+gE2I8z5UZaKuumykz2xxZ9JGYmUthOuwSA=",
"owner": "ipetkov",
"repo": "crane",
"rev": "b13963c8c18026aa694acd98d14f66d24666f70b",
"type": "github"
},
"original": {
"owner": "ipetkov",
"repo": "crane",
"type": "github"
}
},
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1668681692,
"narHash": "sha256-Ht91NGdewz8IQLtWZ9LCeNXMSXHUss+9COoqu6JLmXU=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "009399224d5e398d03b22badca40a37ac85412a1",
"type": "github"
},
"original": {
"owner": "edolstra",
"repo": "flake-compat",
"type": "github"
}
},
"flake-utils": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"locked": {
"lastModified": 1667395993,
"narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1674236650,
"narHash": "sha256-B4GKL1YdJnII6DQNNJ4wDW1ySJVx2suB1h/v4Ql8J0Q=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "cfb43ad7b941d9c3606fb35d91228da7ebddbfc5",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"advisory-db": "advisory-db",
"crane": "crane",
"flake-utils": "flake-utils_2",
"nixpkgs": "nixpkgs"
}
},
"rust-overlay": {
"inputs": {
"flake-utils": [
"crane",
"flake-utils"
],
"nixpkgs": [
"crane",
"nixpkgs"
]
},
"locked": {
"lastModified": 1672712534,
"narHash": "sha256-8S0DdMPcbITnlOu0uA81mTo3hgX84wK8S9wS34HEFY4=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "69fb7bf0a8c40e6c4c197fa1816773774c8ac59f",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

106
flake.nix Normal file
View File

@ -0,0 +1,106 @@
{
description = "Build a cargo project";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
crane = {
url = "github:ipetkov/crane";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils.url = "github:numtide/flake-utils";
advisory-db = {
url = "github:rustsec/advisory-db";
flake = false;
};
};
outputs = { self, nixpkgs, crane, flake-utils, advisory-db, ... }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
inherit (pkgs) lib;
craneLib = crane.lib.${system};
src = craneLib.cleanCargoSource ./.;
buildInputs = [
# Add additional build inputs here
] ++ lib.optionals pkgs.stdenv.isDarwin [
# Additional darwin specific inputs can be set here
pkgs.libiconv
];
# Build *just* the cargo dependencies, so we can reuse
# all of that work (e.g. via cachix) when running in CI
cargoArtifacts = craneLib.buildDepsOnly { inherit src buildInputs; };
# Build the actual crate itself, reusing the dependency
# artifacts from above.
my-crate =
craneLib.buildPackage { inherit cargoArtifacts src buildInputs; };
in {
checks = {
# Build the crate as part of `nix flake check` for convenience
inherit my-crate;
# Run clippy (and deny all warnings) on the crate source,
# again, resuing the dependency artifacts from above.
#
# Note that this is done as a separate derivation so that
# we can block the CI if there are issues here, but not
# prevent downstream consumers from building our crate by itself.
my-crate-clippy = craneLib.cargoClippy {
inherit cargoArtifacts src buildInputs;
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
};
my-crate-doc =
craneLib.cargoDoc { inherit cargoArtifacts src buildInputs; };
# Check formatting
my-crate-fmt = craneLib.cargoFmt { inherit src; };
# Audit dependencies
my-crate-audit = craneLib.cargoAudit { inherit src advisory-db; };
# Run tests with cargo-nextest
# Consider setting `doCheck = false` on `my-crate` if you do not want
# the tests to run twice
my-crate-nextest = craneLib.cargoNextest {
inherit cargoArtifacts src buildInputs;
partitions = 1;
partitionType = "count";
};
} // lib.optionalAttrs (system == "x86_64-linux") {
# NB: cargo-tarpaulin only supports x86_64 systems
# Check code coverage (note: this will not upload coverage anywhere)
my-crate-coverage =
craneLib.cargoTarpaulin { inherit cargoArtifacts src; };
};
packages.default = my-crate;
apps.default = flake-utils.lib.mkApp { drv = my-crate; };
devShells.default = pkgs.mkShell {
inputsFrom = builtins.attrValues self.checks;
# Extra inputs can be added here
nativeBuildInputs = with pkgs; [
cargo
cargo-edit
cargo-nextest
cargo-watch
clippy
openssl
pkg-config
rustc
rustfmt
];
};
});
}

118
src/main.rs Normal file
View File

@ -0,0 +1,118 @@
use anyhow::{Result, anyhow, Context};
use camino::Utf8PathBuf;
use chrono::prelude::*;
use indicatif::ProgressBar;
use jwalk::WalkDir;
use rayon::prelude::*;
use sha1::{Sha1, Digest};
use std::{path::PathBuf, str::FromStr, collections::{HashMap, HashSet}, io, fs};
use structopt::StructOpt;
struct FileEntry {
path: Utf8PathBuf,
is_directory: bool,
updated_at: DateTime<Local>,
size: Option<u64>,
sha1_hash: Option<String>,
}
#[derive(StructOpt)]
struct Options {
#[structopt(long, parse(from_str))]
directory_to_upload: Utf8PathBuf,
#[structopt(long)]
api_key: String,
}
#[tokio::main]
async fn main() -> Result<()> {
let options = Options::from_args();
/*
let response = ureq::get("https://neocities.org/api/list")
.set("Authorization", &format!("Bearer {}", &options.api_key))
.call()?
.into_json::<serde_json::Value>()?;
*/
let response = serde_json::from_str( include_str!("../tests/data/list.json"))?;
let server_entries = parse(&response)?;
let local_entries = hash(&options.directory_to_upload);
Ok(())
}
fn hash(directory: &Utf8PathBuf) -> HashMap<String, String> {
let list_of_files = all_files(directory);
let progressbar = ProgressBar::new(list_of_files.len() as u64);
let result = list_of_files
.par_iter()
.filter_map(|path| match fs::File::open(&path) {
Ok(file) => Some((path, file)),
Err(_) => None,
})
.filter_map(|(path, file)| {
let mut hasher = Sha1::new();
let mut file = file;
if io::copy(&mut file, &mut hasher).is_err() {
return None;
}
let hash = hasher.finalize();
let hex_text = hex::encode(hash);
progressbar.inc(1);
Some((path.to_string(), hex_text))
})
.collect();
progressbar.finish_and_clear();
result
}
fn all_files(directory: &Utf8PathBuf) -> HashSet<Utf8PathBuf> {
WalkDir::new(directory)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| match entry.file_type().is_dir() {
false => Some(Utf8PathBuf::from_path_buf(entry.path().to_path_buf())),
true => None,
})
.filter_map(|entry| entry.ok())
.collect()
}
fn parse(server_list: &serde_json::Value) -> Result<Vec<FileEntry>, anyhow::Error> {
let file_entries = server_list.as_object().ok_or_else(|| anyhow!("response is not a json object"))?;
let file_entries = file_entries["files"].as_array().ok_or_else(|| anyhow!("no 'files' array in response"))?;
let file_entries = file_entries.iter().map(|entry| {
let path = entry["path"].as_str().ok_or_else(|| anyhow!("cannot parse path"))?;
let is_directory = entry["is_directory"].as_bool().ok_or_else(|| anyhow!("is_directory for {path} does not exist"))?;
let updated_at = entry["updated_at"].as_str().ok_or_else(|| anyhow!(format!("cannot deserialize 'updated_at' field for {path}")))?;
let sha1_hash = match is_directory {
true => Ok(None),
false => entry["sha1_hash"].as_str().map_or(Err(anyhow!(format!("cannot deserialize 'sha1_hash' for {path}"))), |f| Ok(Some(f.to_string())))
}?;
Ok(FileEntry{
path: Utf8PathBuf::from_str(path)?,
is_directory,
updated_at: DateTime::parse_from_rfc2822(updated_at).context(format!("cannot parse date ('{updated_at}') for {path}"))?.into(),
size: entry["size"].as_u64(),
sha1_hash,
})
}).collect::<Result<Vec<_>>>()?;
Ok(file_entries)
}
#[test]
fn parse_example() -> Result<()> {
let server_list = serde_json::from_str( include_str!("../tests/data/list.json"))?;
let file_entries = parse(&server_list)?;
assert_eq!(1468, file_entries.len());
let first_entry = file_entries.first().unwrap();
assert_eq!("2012", first_entry.path);
Ok(())
}

9146
tests/data/list.json Normal file

File diff suppressed because it is too large Load Diff