Skip to content

Commit b40d12a

Browse files
authored
fix(storage-proofs): always use temporary files for MTs (#833)
* remove MTs directory, use only temp files * import new FromIndexedParallelIterator * bump merkletree dep
1 parent 2d6aa39 commit b40d12a

File tree

4 files changed

+5
-152
lines changed

4 files changed

+5
-152
lines changed

README.md

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ To check that it's working you can inspect the replication log to find `using pa
161161

162162
### Memory
163163

164-
We try to generate the MTs in parallel to speed up the process but that takes 2 sector sizes per each layer (e.g., a 1 GiB sector may require, in the worst case scenario, up to 20 GiB of memory to hold the MTs alone). To reduce that (at the cost of speed) we have the (experimental) `disk-trees` feature to offload the MTs to disk when we don't use them. For example, to run the `zigzag` example with this feature you'd need to indicate so to `cargo` *and* then indicate to the example (or any other code doing the replication) where they should be stored using the `FIL_PROOFS_REPLICATED_TREES_DIR` environmental variable (if set to a relative path, it will be relative to the current working directory of the process in which the replication happens, see [`create_dir`](https://doc.rust-lang.org/std/fs/fn.create_dir.html#platform-specific-behavior)),
164+
We try to generate the MTs in parallel to speed up the process but that takes 2 sector sizes per each layer (e.g., a 1 GiB sector may require, in the worst case scenario, up to 20 GiB of memory to hold the MTs alone). To reduce that (at the cost of speed) we have the (experimental) `disk-trees` feature to offload the MTs to disk when we don't use them. For example, to run the `zigzag` example with this feature you'd need to indicate so to `cargo`,
165165

166166
```
167167
# From inside the `storage-proofs` directory, where this feature
@@ -172,15 +172,10 @@ cargo build \
172172
--example zigzag \
173173
--features \
174174
disk-trees &&
175-
FIL_PROOFS_REPLICATED_TREES_DIR="<tree-dir>" \
176175
../target/release/examples/zigzag \
177176
--size 1048576
178177
```
179178

180-
To check if the feature is indeed working you can inspect the replication log to find the text `creating leaves tree mmap-file` indicating the path where each MT file is saved.
181-
182-
Note that at the moment we do *not* clean up `<tree-dir>` so you'll need to **remove old MT files** yourself (otherwise the disk will start to fill up pretty fast). To avoid collisions in the directory we append a random hexadecimal value to each file, to check which files belong to the latest replication run you can (besides checking modification times) inspect the replication log to find out the actual file names generated.
183-
184179
To optimize even more for memory there's another option (used in addition to the `disk-trees` feature) to generate all MTs in sequential order, to make sure we can offload them to disk before we start buildding the next one,
185180

186181
```

storage-proofs/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ bench = false
1515
bitvec = "0.5"
1616
rand = "0.4"
1717
libc = "0.2"
18-
merkletree = "=0.8"
18+
merkletree = "=0.9"
1919
failure = "0.1"
2020
byteorder = "1"
2121
config = "0.9.3"

storage-proofs/src/drgraph.rs

Lines changed: 1 addition & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,7 @@ use crate::hasher::{Domain, Hasher};
1010
use crate::merkle::MerkleTree;
1111
use crate::parameter_cache::ParameterSetMetadata;
1212
use crate::util::{data_at_node, NODE_SIZE};
13-
14-
#[cfg(feature = "disk-trees")]
15-
use crate::merkle::DiskStore;
16-
#[cfg(feature = "disk-trees")]
17-
use merkletree::merkle::next_pow2;
18-
#[cfg(feature = "disk-trees")]
19-
use std::path::Path;
20-
#[cfg(feature = "disk-trees")]
21-
use std::path::PathBuf;
13+
use merkletree::merkle::FromIndexedParallelIterator;
2214

2315
/// The default hasher currently in use.
2416
pub type DefaultTreeHasher = PedersenHasher;
@@ -70,84 +62,6 @@ pub trait Graph<H: Hasher>: ::std::fmt::Debug + Clone + PartialEq + Eq {
7062
}
7163
}
7264

73-
/// Builds a merkle tree based on the given data and stores it in `path`
74-
/// (if set).
75-
#[cfg(feature = "disk-trees")]
76-
fn merkle_tree_path<'a>(
77-
&self,
78-
data: &'a [u8],
79-
path: Option<&Path>,
80-
) -> Result<MerkleTree<H::Domain, H::Function>> {
81-
self.merkle_tree_aux_path(data, PARALLEL_MERKLE, path)
82-
}
83-
84-
#[cfg(feature = "disk-trees")]
85-
fn merkle_tree_aux_path<'a>(
86-
&self,
87-
data: &'a [u8],
88-
parallel: bool,
89-
path: Option<&Path>,
90-
) -> Result<MerkleTree<H::Domain, H::Function>> {
91-
if data.len() != (NODE_SIZE * self.size()) as usize {
92-
return Err(Error::InvalidMerkleTreeArgs(
93-
data.len(),
94-
NODE_SIZE,
95-
self.size(),
96-
));
97-
}
98-
99-
let f = |i| {
100-
let d = data_at_node(&data, i).expect("data_at_node math failed");
101-
// TODO/FIXME: This can panic. FOR NOW, let's leave this since we're experimenting with
102-
// optimization paths. However, we need to ensure that bad input will not lead to a panic
103-
// that isn't caught by the FPS API.
104-
// Unfortunately, it's not clear how to perform this error-handling in the parallel
105-
// iterator case.
106-
H::Domain::try_from_bytes(d).unwrap()
107-
};
108-
109-
if let Some(path) = path {
110-
let path_prefix = path.to_str().expect("couldn't convert path to string");
111-
let leaves_path = &PathBuf::from([path_prefix, "leaves"].join("-"));
112-
let top_half_path = &PathBuf::from([path_prefix, "top-half"].join("-"));
113-
// FIXME: There is probably a more direct way of doing this without
114-
// reconverting to string.
115-
116-
info!(
117-
"creating leaves tree mmap-file (path-prefix: {:?})",
118-
leaves_path.to_str()
119-
);
120-
info!(
121-
"creating top half tree mmap-file (path-prefix: {:?})",
122-
top_half_path.to_str()
123-
);
124-
125-
let leaves_disk_mmap = DiskStore::new_with_path(next_pow2(self.size()), leaves_path);
126-
let top_half_disk_mmap =
127-
DiskStore::new_with_path(next_pow2(self.size()), top_half_path);
128-
129-
// FIXME: `new_with_path` is using the `from_iter` implementation,
130-
// instead the `parallel` flag should be passed also as argument
131-
// and decide *there* which code to use (merging this into the
132-
// `if` logic below).
133-
134-
Ok(MerkleTree::from_data_with_store(
135-
(0..self.size()).map(f),
136-
leaves_disk_mmap,
137-
top_half_disk_mmap,
138-
))
139-
// If path is `None` use the existing code that will eventually
140-
// call the default `DiskStore::new` creating a temporary
141-
// file.
142-
} else if parallel {
143-
Ok(MerkleTree::from_par_iter(
144-
(0..self.size()).into_par_iter().map(f),
145-
))
146-
} else {
147-
Ok(MerkleTree::new((0..self.size()).map(f)))
148-
}
149-
}
150-
15165
/// Returns the merkle tree depth.
15266
fn merkle_tree_depth(&self) -> u64 {
15367
graph_height(self.size()) as u64

storage-proofs/src/layered_drgporep.rs

Lines changed: 2 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,6 @@ use crate::proof::ProofScheme;
2121
use crate::settings;
2222
use crate::vde;
2323

24-
#[cfg(feature = "disk-trees")]
25-
use rand;
26-
#[cfg(feature = "disk-trees")]
27-
use std::fs;
28-
#[cfg(feature = "disk-trees")]
29-
use std::io;
30-
#[cfg(feature = "disk-trees")]
31-
use std::path::PathBuf;
32-
3324
type Tree<H> = MerkleTree<<H as Hasher>::Domain, <H as Hasher>::Function>;
3425

3526
fn anonymous_mmap(len: usize) -> MmapMut {
@@ -389,7 +380,7 @@ pub trait Layers {
389380
let mut sorted_trees: Vec<_> = Vec::new();
390381

391382
(0..=layers).fold(graph.clone(), |current_graph, layer| {
392-
let tree_d = Self::generate_data_tree(&current_graph, &data, layer);
383+
let tree_d = current_graph.merkle_tree(&data).unwrap();
393384

394385
info!("returning tree (layer: {})", layer);
395386

@@ -466,7 +457,7 @@ pub trait Layers {
466457
.recv()
467458
.expect("Failed to receive value through channel");
468459

469-
let tree_d = Self::generate_data_tree(&graph, &data_copy, layer);
460+
let tree_d = graph.merkle_tree(&data_copy).unwrap();
470461

471462
info!("returning tree (layer: {})", layer);
472463
return_channel
@@ -518,53 +509,6 @@ pub trait Layers {
518509

519510
Ok((taus, auxs))
520511
}
521-
522-
fn generate_data_tree(
523-
graph: &Self::Graph,
524-
data: &[u8],
525-
_layer: usize,
526-
) -> MerkleTree<<Self::Hasher as Hasher>::Domain, <Self::Hasher as Hasher>::Function> {
527-
#[cfg(not(feature = "disk-trees"))]
528-
return graph.merkle_tree(&data).unwrap();
529-
530-
#[cfg(feature = "disk-trees")]
531-
{
532-
let tree_dir = &settings::SETTINGS.lock().unwrap().replicated_trees_dir;
533-
// We should always be able to get this configuration
534-
// variable (at least as an empty string).
535-
536-
if tree_dir.is_empty() {
537-
// Signal `merkle_tree_path` to create a temporary file.
538-
return graph.merkle_tree_path(&data, None).unwrap();
539-
} else {
540-
// Try to create `tree_dir`, ignore the error if `AlreadyExists`.
541-
if let Some(create_error) = fs::create_dir(&tree_dir).err() {
542-
if create_error.kind() != io::ErrorKind::AlreadyExists {
543-
panic!(create_error);
544-
}
545-
}
546-
547-
let tree_d = graph
548-
.merkle_tree_path(
549-
&data,
550-
Some(&PathBuf::from(tree_dir).join(format!(
551-
"tree-{}-{}",
552-
_layer,
553-
// FIXME: This argument is used only with `disk-trees`.
554-
rand::random::<u32>()
555-
))),
556-
)
557-
.unwrap();
558-
// FIXME: The user of `REPLICATED_TREES_DIR` should figure out
559-
// how to manage this directory, for now we create every file with
560-
// a different random number; the problem being that tests now do
561-
// replications many times in the same run so they may end up
562-
// reusing the same files with invalid (old) data and failing.
563-
564-
return tree_d;
565-
}
566-
}
567-
}
568512
}
569513

570514
impl<'a, L: Layers> ProofScheme<'a> for L {

0 commit comments

Comments
 (0)