diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index c57add06563..24cd3b5d5ac 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -19,6 +19,7 @@ package main import ( "bytes" "fmt" + "math" "os" "os/signal" "path/filepath" @@ -75,6 +76,7 @@ Remove blockchain and state databases`, dbCompactCmd, dbGetCmd, dbDeleteCmd, + dbInspectTrieCmd, dbPutCmd, dbGetSlotsCmd, dbDumpFreezerIndex, @@ -93,6 +95,15 @@ Remove blockchain and state databases`, Usage: "Inspect the storage size for each type of data in the database", Description: `This commands iterates the entire database. If the optional 'prefix' and 'start' arguments are provided, then the iteration is limited to the given subset of data.`, } + dbInspectTrieCmd = &cli.Command{ + Action: inspectTrie, + Name: "inspect-trie", + ArgsUsage: "", + Flags: slices.Concat([]cli.Flag{utils.ExcludeStorageFlag, utils.TopFlag}, utils.NetworkFlags, utils.DatabaseFlags), + Usage: "Print detailed trie information about the structure of account trie and storage tries.", + Description: `This commands iterates the entrie trie-backed state. If the 'blocknum' is not specified, +the latest block number will be used by default.`, + } dbCheckStateContentCmd = &cli.Command{ Action: checkStateContent, Name: "check-state-content", @@ -386,6 +397,67 @@ func checkStateContent(ctx *cli.Context) error { return nil } +func inspectTrie(ctx *cli.Context) error { + if ctx.NArg() > 1 { + return fmt.Errorf("excessive number of arguments: %v", ctx.Command.ArgsUsage) + } + stack, _ := makeConfigNode(ctx) + db := utils.MakeChainDatabase(ctx, stack, false) + defer stack.Close() + defer db.Close() + + var ( + trieRoot common.Hash + hash common.Hash + number uint64 + ) + switch { + case ctx.NArg() == 0 || ctx.Args().Get(0) == "latest": + hash := rawdb.ReadHeadHeaderHash(db) + n, ok := rawdb.ReadHeaderNumber(db, hash) + if !ok { + return fmt.Errorf("could not load head block hash") + } + number = n + case ctx.Args().Get(0) == "snapshot": + trieRoot = rawdb.ReadSnapshotRoot(db) + number = math.MaxUint64 + default: + var err error + number, err = strconv.ParseUint(ctx.Args().Get(0), 10, 64) + if err != nil { + return fmt.Errorf("failed to parse blocknum, Args[0]: %v, err: %v", ctx.Args().Get(0), err) + } + } + + // Load head block number based on canonical hash, if applicable. + if number != math.MaxUint64 { + hash = rawdb.ReadCanonicalHash(db, number) + if hash == (common.Hash{}) { + return fmt.Errorf("canonical hash for block %d not found", number) + } + blockHeader := rawdb.ReadHeader(db, hash, number) + trieRoot = blockHeader.Root + } + if (trieRoot == common.Hash{}) { + log.Error("Empty root hash") + } + + triedb := utils.MakeTrieDatabase(ctx, stack, db, false, true, false) + defer triedb.Close() + + log.Info("Inspecting trie", "root", trieRoot, "block", number) + config := &trie.InspectConfig{ + NoStorage: ctx.Bool(utils.ExcludeStorageFlag.Name), + TopN: ctx.Int(utils.TopFlag.Name), + } + err := trie.Inspect(triedb, trieRoot, config) + if err != nil { + return err + } + return nil +} + func showDBStats(db ethdb.KeyValueStater) { stats, err := db.Stat() if err != nil { diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 5e96185dbd0..36c67e0b3e5 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -220,6 +220,11 @@ var ( Usage: "Max number of elements (0 = no limit)", Value: 0, } + TopFlag = &cli.IntFlag{ + Name: "top", + Usage: "Print the top N results", + Value: 5, + } SnapshotFlag = &cli.BoolFlag{ Name: "snapshot", diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 626d390c0d3..8a0da313498 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/internal/tablewriter" "github.com/ethereum/go-ethereum/log" "golang.org/x/sync/errgroup" ) @@ -643,7 +644,7 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { total.Add(uint64(ancient.size())) } - table := newTableWriter(os.Stdout) + table := tablewriter.NewWriter(os.Stdout) table.SetHeader([]string{"Database", "Category", "Size", "Items"}) table.SetFooter([]string{"", "Total", common.StorageSize(total.Load()).String(), fmt.Sprintf("%d", count.Load())}) table.AppendBulk(stats) diff --git a/core/rawdb/database_tablewriter_unix.go b/internal/tablewriter/table.go similarity index 92% rename from core/rawdb/database_tablewriter_unix.go rename to internal/tablewriter/table.go index 8bec5396e87..ed44add2c49 100644 --- a/core/rawdb/database_tablewriter_unix.go +++ b/internal/tablewriter/table.go @@ -17,7 +17,7 @@ //go:build !tinygo // +build !tinygo -package rawdb +package tablewriter import ( "io" @@ -28,6 +28,7 @@ import ( // Re-export the real tablewriter types and functions type Table = tablewriter.Table -func newTableWriter(w io.Writer) *Table { +// Re-export NewWriter. +func NewWriter(w io.Writer) *Table { return tablewriter.NewWriter(w) } diff --git a/core/rawdb/database_tablewriter_tinygo.go b/internal/tablewriter/table_tinygo.go similarity index 99% rename from core/rawdb/database_tablewriter_tinygo.go rename to internal/tablewriter/table_tinygo.go index 2f8e456fd51..620ba772152 100644 --- a/core/rawdb/database_tablewriter_tinygo.go +++ b/internal/tablewriter/table_tinygo.go @@ -19,7 +19,7 @@ //go:build tinygo // +build tinygo -package rawdb +package tablewriter import ( "errors" diff --git a/core/rawdb/database_tablewriter_tinygo_test.go b/internal/tablewriter/table_tinygo_test.go similarity index 99% rename from core/rawdb/database_tablewriter_tinygo_test.go rename to internal/tablewriter/table_tinygo_test.go index 3bcf93832b4..ee61875ee9f 100644 --- a/core/rawdb/database_tablewriter_tinygo_test.go +++ b/internal/tablewriter/table_tinygo_test.go @@ -17,7 +17,7 @@ //go:build tinygo // +build tinygo -package rawdb +package tablewriter import ( "bytes" diff --git a/trie/inspect.go b/trie/inspect.go new file mode 100644 index 00000000000..d68f8802990 --- /dev/null +++ b/trie/inspect.go @@ -0,0 +1,294 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "bytes" + "fmt" + "sort" + "strings" + "sync" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/tablewriter" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/triedb/database" + "golang.org/x/sync/semaphore" +) + +// inspector is used by the inner inspect function to coordinate across threads. +type inspector struct { + triedb database.NodeDatabase + root common.Hash + + config *InspectConfig + stats map[common.Hash]*triestat + m sync.Mutex // protects stats + + sem *semaphore.Weighted + wg sync.WaitGroup +} + +// InspectConfig is a set of options to control inspection and format the +// output. TopN will print the deepest min(len(results), N) storage tries. +type InspectConfig struct { + NoStorage bool + TopN int +} + +// Inspect walks the trie with the given root and records the number and type of +// nodes at each depth. It works by recursively calling the inner inspect +// function on each child node. +func Inspect(triedb database.NodeDatabase, root common.Hash, config *InspectConfig) error { + trie, err := New(TrieID(root), triedb) + if err != nil { + return fmt.Errorf("fail to open trie %s: %w", root, err) + } + if config == nil { + config = &InspectConfig{} + } + in := inspector{ + triedb: triedb, + root: root, + config: config, + stats: make(map[common.Hash]*triestat), + sem: semaphore.NewWeighted(int64(128)), + } + in.stats[root] = &triestat{} + + in.inspect(trie, trie.root, 0, []byte{}, in.stats[root]) + in.wg.Wait() + in.DisplayResult() + return nil +} + +// inspect is called recursively down the trie. At each level it records the +// node type encountered. +func (in *inspector) inspect(trie *Trie, n node, height uint32, path []byte, stat *triestat) { + if n == nil { + return + } + + // Four types of nodes can be encountered: + // - short: extend path with key, inspect single value. + // - full: inspect all 17 children, spin up new threads when possible. + // - hash: need to resolve node from disk, retry inspect on result. + // - value: if account, begin inspecting storage trie. + switch n := (n).(type) { + case *shortNode: + in.inspect(trie, n.Val, height+1, append(path, n.Key...), stat) + case *fullNode: + for idx, child := range n.Children { + if child == nil { + continue + } + childPath := append(path, byte(idx)) + if in.sem.TryAcquire(1) { + in.wg.Add(1) + go func() { + in.inspect(trie, child, height+1, childPath, stat) + in.wg.Done() + }() + } else { + in.inspect(trie, child, height+1, childPath, stat) + } + } + case hashNode: + resolved, err := trie.resolveWithoutTrack(n, path) + if err != nil { + log.Error("Failed to resolve HashNode", "err", err, "trie", trie.Hash(), "height", height+1, "path", path) + return + } + in.inspect(trie, resolved, height, path, stat) + + // Return early here so this level isn't recorded twice. + return + case valueNode: + if !hasTerm(path) { + break + } + var account types.StateAccount + if err := rlp.Decode(bytes.NewReader(n), &account); err != nil { + // Not an account value. + break + } + if account.Root == (common.Hash{}) || account.Root == types.EmptyRootHash { + // Account is empty, nothing further to inspect. + break + } + + // Start inspecting storage trie. + if !in.config.NoStorage { + owner := common.BytesToHash(hexToCompact(path)) + storage, err := New(StorageTrieID(in.root, owner, account.Root), in.triedb) + if err != nil { + log.Error("Failed to open account storage trie", "node", n, "error", err, "height", height, "path", common.Bytes2Hex(path)) + break + } + stat := &triestat{} + + in.m.Lock() + in.stats[owner] = stat + in.m.Unlock() + + in.wg.Add(1) + go func() { + in.inspect(storage, storage.root, 0, []byte{}, stat) + in.wg.Done() + }() + } + default: + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } + + // Record stats for current height + stat.add(n, height) +} + +// Display results prints out the inspect results. +func (in *inspector) DisplayResult() { + fmt.Println("Results for trie", in.root) + in.stats[in.root].display("Accounts trie") + fmt.Println("===") + fmt.Println() + + if !in.config.NoStorage { + // Sort stats by max node depth. + keys, stats := sortedTriestat(in.stats).sort() + + fmt.Println("Results for top storage tries") + for i := range keys[0:min(in.config.TopN, len(keys))] { + fmt.Printf("%d: %s\n", i+1, keys[i]) + stats[i].display("storage trie") + } + } +} + +// triestat tracks the type and count of trie nodes at each level in the trie. +// +// Note: theoretically it is possible to have up to 64 trie level. Since it is +// unlikely to encounter such a large trie, the stats are capped at 16 levels to +// avoid substantial unneeded allocation. +type triestat struct { + level [16]stat +} + +// maxDepth iterates each level and finds the deepest level with at least one +// trie node. +func (s *triestat) maxDepth() int { + depth := 0 + for i := range s.level { + if s.level[i].short.Load() != 0 || s.level[i].full.Load() != 0 || s.level[i].value.Load() != 0 { + depth = i + } + } + return depth +} + +// sortedTriestat implements sort(). +type sortedTriestat map[common.Hash]*triestat + +// sort returns the keys and triestats in decending order of the maximum trie +// node depth. +func (s sortedTriestat) sort() ([]common.Hash, []*triestat) { + var ( + keys = make([]common.Hash, 0, len(s)) + stats = make([]*triestat, 0, len(s)) + ) + for k := range s { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { return s[keys[i]].maxDepth() > s[keys[j]].maxDepth() }) + for _, k := range keys { + stats = append(stats, s[k]) + } + return keys, stats +} + +// add increases the node count by one for the specified node type and depth. +func (s *triestat) add(n node, d uint32) { + switch (n).(type) { + case *shortNode: + s.level[d].short.Add(1) + case *fullNode: + s.level[d].full.Add(1) + case valueNode: + s.level[d].value.Add(1) + default: + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } +} + +// stat is a specific level's count of each node type. +type stat struct { + short atomic.Uint64 + full atomic.Uint64 + value atomic.Uint64 +} + +// empty is a helper that returns whether there are any trie nodes at the level. +func (s *stat) empty() bool { + if s.full.Load() == 0 && s.short.Load() == 0 && s.value.Load() == 0 { + return true + } + return false +} + +// load is a helper that loads each node type's value. +func (s *stat) load() (uint64, uint64, uint64) { + return s.short.Load(), s.full.Load(), s.value.Load() +} + +// add is a helper that adds two level's stats together. +func (s *stat) add(other *stat) *stat { + s.short.Add(other.short.Load()) + s.full.Add(other.full.Load()) + s.value.Add(other.value.Load()) + return s +} + +// display will print a table displaying the trie's node statistics. +func (s *triestat) display(title string) { + // Shorten title if too long. + if len(title) > 32 { + title = title[0:8] + "..." + title[len(title)-8:len(title)] + } + + b := new(strings.Builder) + table := tablewriter.NewWriter(b) + table.SetHeader([]string{title, "Level", "Short Nodes", "Full Node", "Value Node"}) + table.SetAlignment(1) + + stat := &stat{} + for i := range s.level { + if s.level[i].empty() { + break + } + short, full, value := s.level[i].load() + table.Append([]string{"-", fmt.Sprint(i), fmt.Sprint(short), fmt.Sprint(full), fmt.Sprint(value)}) + stat.add(&s.level[i]) + } + short, full, value := stat.load() + table.SetFooter([]string{"Total", "", fmt.Sprint(short), fmt.Sprint(full), fmt.Sprint(value)}) + table.Render() + fmt.Print(b.String()) + fmt.Println("Max depth", s.maxDepth()) + fmt.Println() +} diff --git a/trie/inspect_test.go b/trie/inspect_test.go new file mode 100644 index 00000000000..eb9e16722a2 --- /dev/null +++ b/trie/inspect_test.go @@ -0,0 +1,97 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package trie + +import ( + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/holiman/uint256" +) + +// TestInspect inspects a randomly generated account trie. It's useful for +// quickly verifying changes to the results display. +func TestInspect(t *testing.T) { + db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme) + trie, err := NewStateTrie(TrieID(types.EmptyRootHash), db) + if err != nil { + t.Fatalf("failed to create state trie: %v", err) + } + // Create a realistic looking account trie with storage. + addresses, accounts := makeAccountsWithStorage(db, 11, true) + for i := 0; i < len(addresses); i++ { + trie.MustUpdate(crypto.Keccak256(addresses[i][:]), accounts[i]) + } + // Insert the accounts into the trie and hash it + root, nodes := trie.Commit(true) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + db.Commit(root) + + if err := Inspect(db, root, &InspectConfig{TopN: 1}); err != nil { + t.Fatalf("inspect failed: %v", err) + } +} + +func makeAccountsWithStorage(db *testDb, size int, storage bool) (addresses [][20]byte, accounts [][]byte) { + // Make the random benchmark deterministic + random := rand.New(rand.NewSource(0)) + + addresses = make([][20]byte, size) + for i := 0; i < len(addresses); i++ { + data := make([]byte, 20) + random.Read(data) + copy(addresses[i][:], data) + } + accounts = make([][]byte, len(addresses)) + for i := 0; i < len(accounts); i++ { + var ( + nonce = uint64(random.Int63()) + root = types.EmptyRootHash + code = crypto.Keccak256(nil) + ) + if storage { + trie := NewEmpty(db) + for range random.Uint32()%256 + 1 { // non-zero + k, v := make([]byte, 32), make([]byte, 32) + random.Read(k) + random.Read(v) + trie.MustUpdate(k, v) + } + var nodes *trienode.NodeSet + root, nodes = trie.Commit(true) + db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes)) + db.Commit(root) + } + numBytes := random.Uint32() % 33 // [0, 32] bytes + balanceBytes := make([]byte, numBytes) + random.Read(balanceBytes) + balance := new(uint256.Int).SetBytes(balanceBytes) + data, _ := rlp.EncodeToBytes(&types.StateAccount{ + Nonce: nonce, + Balance: balance, + Root: root, + CodeHash: code, + }) + accounts[i] = data + } + return addresses, accounts +} diff --git a/trie/trie.go b/trie/trie.go index 36cc732ee85..299cfa72189 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -694,6 +694,19 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { return decodeNodeUnsafe(n, blob) } +// resolveWithoutTrack loads node from the underlying store with the given node hash +// and path prefix. +func (t *Trie) resolveWithoutTrack(n node, prefix []byte) (node, error) { + if n, ok := n.(hashNode); ok { + blob, err := t.reader.Node(prefix, common.BytesToHash(n)) + if err != nil { + return nil, err + } + return mustDecodeNode(n, blob), nil + } + return n, nil +} + // deletedNodes returns a list of node paths, referring the nodes being deleted // from the trie. It's possible a few deleted nodes were embedded in their parent // before, the deletions can be no effect by deleting nothing, filter them out.