Skip to content

feat(delayed-persist): dump unpersisted nodestore #1055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 26 additions & 28 deletions firewood/src/merkle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use crate::stream::PathIterator;
use crate::v2::api;
use firewood_storage::{
BranchNode, Child, FileIoError, HashType, Hashable, HashedNodeReader, ImmutableProposal,
IntoHashType, LeafNode, LinearAddress, MutableProposal, NibblesIterator, Node, NodeStore, Path,
ReadableStorage, SharedNode, TrieReader, ValueDigest,
IntoHashType, LeafNode, MaybePersistedNode, MutableProposal, NibblesIterator, Node, NodeStore,
Path, ReadableStorage, SharedNode, TrieReader, ValueDigest,
};
#[cfg(test)]
use futures::{StreamExt, TryStreamExt};
Expand Down Expand Up @@ -176,10 +176,6 @@ impl<T: TrieReader> Merkle<T> {
&self.nodestore
}

fn read_node(&self, addr: LinearAddress) -> Result<SharedNode, FileIoError> {
self.nodestore.read_node(addr)
}

/// Returns a proof that the given key has a certain value,
/// or that the key isn't in the trie.
pub fn prove(&self, key: &[u8]) -> Result<Proof<ProofNode>, ProofError> {
Expand Down Expand Up @@ -364,14 +360,15 @@ impl<T: TrieReader> Merkle<T> {
}

impl<T: HashedNodeReader> Merkle<T> {
/// Dump a node, recursively, to a dot file
pub(crate) fn dump_node(
&self,
addr: LinearAddress,
node: &MaybePersistedNode,
hash: Option<&HashType>,
seen: &mut HashSet<LinearAddress>,
seen: &mut HashSet<String>,
writer: &mut dyn Write,
) -> Result<(), FileIoError> {
write!(writer, " {addr}[label=\"addr:{addr:?}")
write!(writer, " {node}")
.map_err(Error::other)
.map_err(|e| FileIoError::new(e, None, 0, None))?;
if let Some(hash) = hash {
Expand All @@ -380,33 +377,28 @@ impl<T: HashedNodeReader> Merkle<T> {
.map_err(|e| FileIoError::new(e, None, 0, None))?;
}

match &*self.read_node(addr)? {
match &*node.as_shared_node(&self.nodestore)? {
Node::Branch(b) => {
write_attributes!(writer, b, &b.value.clone().unwrap_or(Box::from([])));
writeln!(writer, "\"]")
.map_err(|e| FileIoError::from_generic_no_file(e, "write branch"))?;
for (childidx, child) in b.children.iter().enumerate() {
let (child_addr, child_hash) = match child {
let (child, child_hash) = match child {
None => continue,
Some(node) => {
let (Some(addr), hash) = (node.persisted_address(), node.hash()) else {
continue;
};
(addr, hash)
}
Some(node) => (node.as_maybe_persisted_node(), node.hash()),
};

let inserted = seen.insert(child_addr);
let inserted = seen.insert(format!("{child}"));
if inserted {
writeln!(writer, " {addr} -> {child_addr}[label=\"{childidx}\"]")
writeln!(writer, " {node} -> {child}[label=\"{childidx}\"]")
.map_err(|e| FileIoError::from_generic_no_file(e, "write branch"))?;
self.dump_node(child_addr, child_hash, seen, writer)?;
self.dump_node(&child, child_hash, seen, writer)?;
} else {
// We have already seen this child, which shouldn't happen.
// Indicate this with a red edge.
writeln!(
writer,
" {addr} -> {child_addr}[label=\"{childidx} (dup)\" color=red]"
" {node} -> {child}[label=\"{childidx} (dup)\" color=red]"
)
.map_err(|e| FileIoError::from_generic_no_file(e, "write branch"))?;
}
Expand All @@ -421,19 +413,25 @@ impl<T: HashedNodeReader> Merkle<T> {
Ok(())
}

/// Dump the trie to a dot file.
///
/// This function is primarily used in testing, but also has an API implementation
///
/// Dot files can be rendered using `dot -Tpng -o output.png input.dot`
/// or online at <https://dreampuf.github.io/GraphvizOnline>
pub(crate) fn dump(&self) -> Result<String, Error> {
let root = self.nodestore.root_as_maybe_persisted_node();

let mut result = String::new();
writeln!(result, "digraph Merkle {{\n rankdir=LR;").map_err(Error::other)?;
if let (Some(root_addr), Some(root_hash)) =
(self.nodestore.root_address(), self.nodestore.root_hash())
{
writeln!(result, " root -> {root_addr}")
if let (Some(root), Some(root_hash)) = (root, self.nodestore.root_hash()) {
writeln!(result, " root -> {root}")
.map_err(Error::other)
.map_err(|e| FileIoError::new(e, None, 0, None))
.map_err(Error::other)?;
let mut seen = HashSet::new();
self.dump_node(
root_addr,
&root,
Some(&root_hash.into_hash_type()),
&mut seen,
&mut result,
Expand Down Expand Up @@ -462,10 +460,10 @@ impl<S: ReadableStorage> TryFrom<Merkle<NodeStore<MutableProposal, S>>>

impl<S: ReadableStorage> Merkle<NodeStore<MutableProposal, S>> {
/// Convert a merkle backed by an `MutableProposal` into an `ImmutableProposal`
/// TODO: We probably don't need this function
///
/// This function is only used in benchmarks and tests
#[must_use]
pub fn hash(self) -> Merkle<NodeStore<Arc<ImmutableProposal>, S>> {
// I think this is only used in testing
self.try_into().expect("failed to convert")
}

Expand Down
15 changes: 14 additions & 1 deletion storage/src/node/branch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
use smallvec::SmallVec;

use crate::node::ExtendableBytes;
use crate::{LeafNode, LinearAddress, MaybePersistedNode, Node, Path};
use crate::{LeafNode, LinearAddress, MaybePersistedNode, Node, Path, SharedNode};
use std::fmt::{Debug, Formatter};

/// The type of a hash. For ethereum compatible hashes, this might be a RLP encoded
Expand Down Expand Up @@ -112,6 +112,19 @@ impl Child {
Child::Node(_) => None,
}
}

/// Return a `MaybePersistedNode` from a child
///
/// This is used in the dump utility, but otherwise should be avoided,
/// as it may create an unnecessary `MaybePersistedNode`
#[must_use]
pub fn as_maybe_persisted_node(&self) -> MaybePersistedNode {
match self {
Child::Node(node) => MaybePersistedNode::from(SharedNode::from(node.clone())),
Child::AddressWithHash(addr, _) => MaybePersistedNode::from(*addr),
Child::MaybePersisted(maybe_persisted, _) => maybe_persisted.clone(),
}
}
}

#[cfg(feature = "ethhash")]
Expand Down
20 changes: 19 additions & 1 deletion storage/src/node/persist.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2023, Ava Labs, Inc. All rights reserved.
// See the file LICENSE.md for licensing terms.

use std::sync::Arc;
use std::{fmt::Display, sync::Arc};

use arc_swap::ArcSwap;

Expand Down Expand Up @@ -123,6 +123,24 @@ impl MaybePersistedNode {
}
}

/// Display the `MaybePersistedNode` as a string.
///
/// This is used in the dump utility.
///
/// We render these:
/// For disk addresses, just the address
/// For shared nodes, the address of the [`SharedNode`] object, prefixed with a 'M'
///
/// If instead you want the node itself, use [`MaybePersistedNode::as_shared_node`] first.
impl Display for MaybePersistedNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.0.load().as_ref() {
MaybePersisted::Unpersisted(node) => write!(f, "M{:p}", (*node).as_ptr()),
MaybePersisted::Persisted(addr) => write!(f, "{addr}"),
}
}
}

/// The internal state of a `MaybePersistedNode`.
///
/// This enum represents the two possible states of a `MaybePersisted`:
Expand Down
23 changes: 23 additions & 0 deletions storage/src/nodestore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,12 +330,23 @@ where
fn root_node(&self) -> Option<SharedNode> {
self.deref().root_node()
}
fn root_as_maybe_persisted_node(&self) -> Option<MaybePersistedNode> {
self.deref().root_as_maybe_persisted_node()
}
}

/// Reads the root of a merkle trie.
///
/// The root may be None if the trie is empty.
pub trait RootReader {
/// Returns the root of the trie.
/// Callers that just need the node at the root should use this function.
fn root_node(&self) -> Option<SharedNode>;

/// Returns the root of the trie as a `MaybePersistedNode`.
/// Callers that might want to modify the root or know how it is stored
/// should use this function.
fn root_as_maybe_persisted_node(&self) -> Option<MaybePersistedNode>;
Copy link
Collaborator Author

@rkuris rkuris Jul 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a future diff, this will just be called root but I want it verbose during the rest of the development of this feature as it helped identify areas still to changed.

}

/// A committed revision of a merkle trie.
Expand Down Expand Up @@ -623,20 +634,32 @@ impl<S: ReadableStorage> RootReader for NodeStore<MutableProposal, S> {
fn root_node(&self) -> Option<SharedNode> {
self.kind.root.as_ref().map(|node| node.clone().into())
}
fn root_as_maybe_persisted_node(&self) -> Option<MaybePersistedNode> {
self.kind
.root
.as_ref()
.map(|node| SharedNode::new(node.clone()).into())
}
}

impl<S: ReadableStorage> RootReader for NodeStore<Committed, S> {
fn root_node(&self) -> Option<SharedNode> {
// TODO: If the read_node fails, we just say there is no root; this is incorrect
self.kind.root.as_ref()?.as_shared_node(self).ok()
}
fn root_as_maybe_persisted_node(&self) -> Option<MaybePersistedNode> {
self.kind.root.clone()
}
}

impl<S: ReadableStorage> RootReader for NodeStore<Arc<ImmutableProposal>, S> {
fn root_node(&self) -> Option<SharedNode> {
// Use the MaybePersistedNode's as_shared_node method to get the root
self.kind.root.as_ref()?.as_shared_node(self).ok()
}
fn root_as_maybe_persisted_node(&self) -> Option<MaybePersistedNode> {
self.kind.root.clone()
}
}

impl<T, S> HashedNodeReader for NodeStore<T, S>
Expand Down
Loading