Skip to content

Commit

Permalink
fix(state): node hashes vs merkle values
Browse files Browse the repository at this point in the history
- Pruners only care about node hashes
- Rename variables, functions and methods only dealing with node hashes
- Do not write or read inlined nodes with a non-hash Merkle value
- Clarify error wrappings and comments
  • Loading branch information
qdm12 committed Nov 23, 2022
1 parent 43ed21c commit 750bfcc
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 119 deletions.
10 changes: 5 additions & 5 deletions dot/state/offline_pruner.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func (p *OfflinePruner) SetBloomFilter() (err error) {
}

latestBlockNum := header.Number
merkleValues := make(map[string]struct{})
nodeHashes := make(map[common.Hash]struct{})

logger.Infof("Latest block number is %d", latestBlockNum)

Expand All @@ -121,7 +121,7 @@ func (p *OfflinePruner) SetBloomFilter() (err error) {
return err
}

trie.PopulateNodeHashes(tr.RootNode(), merkleValues)
trie.PopulateNodeHashes(tr.RootNode(), nodeHashes)

// get parent header of current block
header, err = p.blockState.GetHeader(header.ParentHash)
Expand All @@ -131,14 +131,14 @@ func (p *OfflinePruner) SetBloomFilter() (err error) {
blockNum = header.Number
}

for key := range merkleValues {
err = p.bloom.put([]byte(key))
for key := range nodeHashes {
err = p.bloom.put(key.ToBytes())
if err != nil {
return err
}
}

logger.Infof("Total keys added in bloom filter: %d", len(merkleValues))
logger.Infof("Total keys added in bloom filter: %d", len(nodeHashes))
return nil
}

Expand Down
66 changes: 33 additions & 33 deletions dot/state/pruner/pruner.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,22 @@ type Config struct {

// Pruner is implemented by FullNode and ArchiveNode.
type Pruner interface {
StoreJournalRecord(deletedMerkleValues, insertedMerkleValues map[string]struct{},
StoreJournalRecord(deletedNodeHashes, insertedNodeHashes map[common.Hash]struct{},
blockHash common.Hash, blockNum int64) error
}

// ArchiveNode is a no-op since we don't prune nodes in archive mode.
type ArchiveNode struct{}

// StoreJournalRecord for archive node doesn't do anything.
func (*ArchiveNode) StoreJournalRecord(_, _ map[string]struct{},
func (*ArchiveNode) StoreJournalRecord(_, _ map[common.Hash]struct{},
_ common.Hash, _ int64) error {
return nil
}

type deathRecord struct {
blockHash common.Hash
deletedMerkleValueToBlockNumber map[string]int64
blockHash common.Hash
deletedNodeHashToBlockNumber map[common.Hash]int64
}

type deathRow []*deathRecord
Expand All @@ -77,8 +77,8 @@ type FullNode struct {
deathList []deathRow
storageDB chaindb.Database
journalDB chaindb.Database
// deathIndex is the mapping from deleted node Merkle value to block number.
deathIndex map[string]int64
// deathIndex is the mapping from deleted node hash to block number.
deathIndex map[common.Hash]int64
// pendingNumber is the block number to be pruned.
// Initial value is set to 1 and is incremented after every block pruning.
pendingNumber int64
Expand All @@ -89,31 +89,31 @@ type FullNode struct {
type journalRecord struct {
// blockHash of the block corresponding to journal record
blockHash common.Hash
// Merkle values of nodes inserted in the state trie of the block
insertedMerkleValues map[string]struct{}
// Merkle values of nodes deleted from the state trie of the block
deletedMerkleValues map[string]struct{}
// Node hashes of nodes inserted in the state trie of the block
insertedNodeHashes map[common.Hash]struct{}
// Node hashes of nodes deleted from the state trie of the block
deletedNodeHashes map[common.Hash]struct{}
}

type journalKey struct {
blockNum int64
blockHash common.Hash
}

func newJournalRecord(hash common.Hash, insertedMerkleValues,
deletedMerkleValues map[string]struct{}) *journalRecord {
func newJournalRecord(hash common.Hash, insertedNodeHashes,
deletedNodeHashes map[common.Hash]struct{}) *journalRecord {
return &journalRecord{
blockHash: hash,
insertedMerkleValues: insertedMerkleValues,
deletedMerkleValues: deletedMerkleValues,
blockHash: hash,
insertedNodeHashes: insertedNodeHashes,
deletedNodeHashes: deletedNodeHashes,
}
}

// NewFullNode creates a Pruner for full node.
func NewFullNode(db, storageDB chaindb.Database, retainBlocks uint32, l log.LeveledLogger) (Pruner, error) {
p := &FullNode{
deathList: make([]deathRow, 0),
deathIndex: make(map[string]int64),
deathIndex: make(map[common.Hash]int64),
storageDB: storageDB,
journalDB: chaindb.NewTable(db, journalPrefix),
retainBlocks: retainBlocks,
Expand Down Expand Up @@ -141,9 +141,9 @@ func NewFullNode(db, storageDB chaindb.Database, retainBlocks uint32, l log.Leve
}

// StoreJournalRecord stores journal record into DB and add deathRow into deathList
func (p *FullNode) StoreJournalRecord(deletedMerkleValues, insertedMerkleValues map[string]struct{},
func (p *FullNode) StoreJournalRecord(deletedNodeHashes, insertedNodeHashes map[common.Hash]struct{},
blockHash common.Hash, blockNum int64) error {
jr := newJournalRecord(blockHash, insertedMerkleValues, deletedMerkleValues)
jr := newJournalRecord(blockHash, insertedNodeHashes, deletedNodeHashes)

key := &journalKey{blockNum, blockHash}
err := p.storeJournal(key, jr)
Expand All @@ -169,13 +169,13 @@ func (p *FullNode) addDeathRow(jr *journalRecord, blockNum int64) {
return
}

p.processInsertedKeys(jr.insertedMerkleValues, jr.blockHash)
p.processInsertedKeys(jr.insertedNodeHashes, jr.blockHash)

// add deleted node Merkle values from journal to death index
deletedMerkleValueToBlockNumber := make(map[string]int64, len(jr.deletedMerkleValues))
for k := range jr.deletedMerkleValues {
// add deleted node hashes from journal to death index
deletedNodeHashToBlockNumber := make(map[common.Hash]int64, len(jr.deletedNodeHashes))
for k := range jr.deletedNodeHashes {
p.deathIndex[k] = blockNum
deletedMerkleValueToBlockNumber[k] = blockNum
deletedNodeHashToBlockNumber[k] = blockNum
}

blockIndex := blockNum - p.pendingNumber
Expand All @@ -184,25 +184,25 @@ func (p *FullNode) addDeathRow(jr *journalRecord, blockNum int64) {
}

record := &deathRecord{
blockHash: jr.blockHash,
deletedMerkleValueToBlockNumber: deletedMerkleValueToBlockNumber,
blockHash: jr.blockHash,
deletedNodeHashToBlockNumber: deletedNodeHashToBlockNumber,
}

// add deathRow to deathList
p.deathList[blockIndex] = append(p.deathList[blockIndex], record)
}

// Remove re-inserted keys
func (p *FullNode) processInsertedKeys(insertedMerkleValues map[string]struct{}, blockHash common.Hash) {
for k := range insertedMerkleValues {
func (p *FullNode) processInsertedKeys(insertedNodeHashes map[common.Hash]struct{}, blockHash common.Hash) {
for k := range insertedNodeHashes {
num, ok := p.deathIndex[k]
if !ok {
continue
}
records := p.deathList[num-p.pendingNumber]
for _, v := range records {
if v.blockHash == blockHash {
delete(v.deletedMerkleValueToBlockNumber, k)
delete(v.deletedNodeHashToBlockNumber, k)
}
}
delete(p.deathIndex, k)
Expand Down Expand Up @@ -230,14 +230,14 @@ func (p *FullNode) start() {

sdbBatch := p.storageDB.NewBatch()
for _, record := range row {
err := p.deleteKeys(sdbBatch, record.deletedMerkleValueToBlockNumber)
err := p.deleteKeys(sdbBatch, record.deletedNodeHashToBlockNumber)
if err != nil {
p.logger.Warnf("failed to prune keys for block number %d: %s", blockNum, err)
sdbBatch.Reset()
return
}

for k := range record.deletedMerkleValueToBlockNumber {
for k := range record.deletedNodeHashToBlockNumber {
delete(p.deathIndex, k)
}
}
Expand Down Expand Up @@ -374,9 +374,9 @@ func (p *FullNode) getLastPrunedIndex() (int64, error) {
return blockNum, nil
}

func (*FullNode) deleteKeys(b chaindb.Batch, deletedMerkleValueToBlockNumber map[string]int64) error {
for merkleValue := range deletedMerkleValueToBlockNumber {
err := b.Del([]byte(merkleValue))
func (*FullNode) deleteKeys(b chaindb.Batch, deletedNodeHashToBlockNumber map[common.Hash]int64) error {
for nodeHash := range deletedNodeHashToBlockNumber {
err := b.Del(nodeHash.ToBytes())
if err != nil {
return err
}
Expand Down
6 changes: 3 additions & 3 deletions dot/state/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ func (s *StorageState) StoreTrie(ts *rtstorage.TrieState, header *types.Header)
}

if header != nil {
insertedMerkleValues, deletedMerkleValues, err := ts.GetChangedNodeHashes()
insertedNodeHashes, deletedNodeHashes, err := ts.GetChangedNodeHashes()
if err != nil {
return fmt.Errorf("failed to get state trie inserted keys: block %s %w", header.Hash(), err)
return fmt.Errorf("getting trie changed node hashes for block hash %s: %w", header.Hash(), err)
}

err = s.pruner.StoreJournalRecord(deletedMerkleValues, insertedMerkleValues, header.Hash(), int64(header.Number))
err = s.pruner.StoreJournalRecord(deletedNodeHashes, insertedNodeHashes, header.Hash(), int64(header.Number))
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion lib/runtime/storage/trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ func (s *TrieState) LoadCodeHash() (common.Hash, error) {

// GetChangedNodeHashes returns the two sets of hashes for all nodes
// inserted and deleted in the state trie since the last block produced (trie snapshot).
func (s *TrieState) GetChangedNodeHashes() (inserted, deleted map[string]struct{}, err error) {
func (s *TrieState) GetChangedNodeHashes() (inserted, deleted map[common.Hash]struct{}, err error) {
s.lock.RLock()
defer s.lock.RUnlock()
return s.t.GetChangedNodeHashes()
Expand Down
70 changes: 41 additions & 29 deletions lib/trie/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,33 +59,34 @@ func (t *Trie) loadNode(db Database, n *Node) error {

merkleValue := child.MerkleValue

if len(merkleValue) == 0 {
if len(merkleValue) < 32 {
// node has already been loaded inline
// just set encoding + hash digest
// just set its encoding
_, err := child.CalculateMerkleValue()
if err != nil {
return fmt.Errorf("merkle value: %w", err)
}
continue
}

encodedNode, err := db.Get(merkleValue)
nodeHash := merkleValue
encodedNode, err := db.Get(nodeHash)
if err != nil {
return fmt.Errorf("cannot find child node key 0x%x in database: %w", merkleValue, err)
return fmt.Errorf("cannot find child node key 0x%x in database: %w", nodeHash, err)
}

reader := bytes.NewReader(encodedNode)
decodedNode, err := node.Decode(reader)
if err != nil {
return fmt.Errorf("decoding node with Merkle value 0x%x: %w", merkleValue, err)
return fmt.Errorf("decoding node with hash 0x%x: %w", nodeHash, err)
}

decodedNode.MerkleValue = merkleValue
decodedNode.MerkleValue = nodeHash
branch.Children[i] = decodedNode

err = t.loadNode(db, decodedNode)
if err != nil {
return fmt.Errorf("loading child at index %d with Merkle value 0x%x: %w", i, merkleValue, err)
return fmt.Errorf("loading child at index %d with node hash 0x%x: %w", i, nodeHash, err)
}

if decodedNode.Kind() == node.Branch {
Expand Down Expand Up @@ -123,7 +124,7 @@ func (t *Trie) loadNode(db Database, n *Node) error {
// all its descendant nodes as keys to the nodeHashes map.
// It is assumed the node and its descendant nodes have their Merkle value already
// computed.
func PopulateNodeHashes(n *Node, nodeHashes map[string]struct{}) {
func PopulateNodeHashes(n *Node, nodeHashes map[common.Hash]struct{}) {
if n == nil {
return
}
Expand All @@ -139,7 +140,8 @@ func PopulateNodeHashes(n *Node, nodeHashes map[string]struct{}) {
return
}

nodeHashes[string(n.MerkleValue)] = struct{}{}
nodeHash := common.NewHash(n.MerkleValue)
nodeHashes[nodeHash] = struct{}{}

if n.Kind() == node.Leaf {
return
Expand Down Expand Up @@ -251,15 +253,15 @@ func getFromDBAtNode(db chaindb.Database, n *Node, key []byte) (
encodedChild, err := db.Get(childMerkleValue)
if err != nil {
return nil, fmt.Errorf(
"finding child node with Merkle value 0x%x in database: %w",
"finding child node with hash 0x%x in database: %w",
childMerkleValue, err)
}

reader := bytes.NewReader(encodedChild)
decodedChild, err := node.Decode(reader)
if err != nil {
return nil, fmt.Errorf(
"decoding child node with Merkle value 0x%x: %w",
"decoding child node with hash 0x%x: %w",
childMerkleValue, err)
}

Expand Down Expand Up @@ -296,11 +298,19 @@ func (t *Trie) writeDirtyNode(db chaindb.Batch, n *Node) (err error) {
n.MerkleValue, err)
}

err = db.Put(merkleValue, encoding)
if len(merkleValue) < 32 {
// Inlined node, there is no need to write it to database.
n.SetClean()
return nil
}

nodeHash := merkleValue

err = db.Put(nodeHash, encoding)
if err != nil {
return fmt.Errorf(
"putting encoding of node with Merkle value 0x%x in database: %w",
merkleValue, err)
"putting encoding of node with node hash 0x%x in database: %w",
nodeHash, err)
}

if n.Kind() != node.Branch {
Expand Down Expand Up @@ -333,25 +343,20 @@ func (t *Trie) writeDirtyNode(db chaindb.Batch, n *Node) (err error) {

// GetChangedNodeHashes returns the two sets of hashes for all nodes
// inserted and deleted in the state trie since the last snapshot.
// Returned maps are safe for mutation.
func (t *Trie) GetChangedNodeHashes() (inserted, deleted map[string]struct{}, err error) {
inserted = make(map[string]struct{})
// Returned inserted map is safe for mutation, but deleted is not safe for mutation.
func (t *Trie) GetChangedNodeHashes() (inserted, deleted map[common.Hash]struct{}, err error) {
inserted = make(map[common.Hash]struct{})
err = t.getInsertedNodeHashesAtNode(t.root, inserted)
if err != nil {
return nil, nil, fmt.Errorf("getting inserted node hashes: %w", err)
}

deletedNodeHashes := t.deltas.Deleted()
// TODO return deletedNodeHashes directly after changing MerkleValue -> NodeHash
deleted = make(map[string]struct{}, len(deletedNodeHashes))
for nodeHash := range deletedNodeHashes {
deleted[string(nodeHash[:])] = struct{}{}
}
deleted = t.deltas.Deleted()

return inserted, deleted, nil
}

func (t *Trie) getInsertedNodeHashesAtNode(n *Node, merkleValues map[string]struct{}) (err error) {
func (t *Trie) getInsertedNodeHashesAtNode(n *Node, nodeHashes map[common.Hash]struct{}) (err error) {
if n == nil || !n.Dirty {
return nil
}
Expand All @@ -363,12 +368,19 @@ func (t *Trie) getInsertedNodeHashesAtNode(n *Node, merkleValues map[string]stru
merkleValue, err = n.CalculateMerkleValue()
}
if err != nil {
return fmt.Errorf(
"encoding and hashing node with Merkle value 0x%x: %w",
n.MerkleValue, err)
return fmt.Errorf("calculating Merkle value: %w", err)
}

if len(merkleValue) < 32 {
// this is an inlined node and is encoded as part of its parent node.
// Therefore it is not written to disk and the online pruner does not
// need to track it. If the node encodes to less than 32B, it cannot have
// non-inlined children so it's safe to stop here and not recurse further.
return nil
}

merkleValues[string(merkleValue)] = struct{}{}
nodeHash := common.NewHash(merkleValue)
nodeHashes[nodeHash] = struct{}{}

if n.Kind() != node.Branch {
return nil
Expand All @@ -379,7 +391,7 @@ func (t *Trie) getInsertedNodeHashesAtNode(n *Node, merkleValues map[string]stru
continue
}

err := t.getInsertedNodeHashesAtNode(child, merkleValues)
err := t.getInsertedNodeHashesAtNode(child, nodeHashes)
if err != nil {
// Note: do not wrap error since this is called recursively.
return err
Expand Down
Loading

0 comments on commit 750bfcc

Please sign in to comment.