Add nfsstore bandwidth testing script

zhou-haitao · zhou-haitao · commit d71fcbcc665f · 2025-10-29T19:08:50.000+08:00
diff --git a/ucm/store/test/e2e/nfsstore_embed_fetch.py b/ucm/store/test/e2e/nfsstore_embed_fetch.py
@@ -22,11 +22,11 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 #
+import csv
 import os
 import secrets
 import time
-import csv
-from typing import List, Dict
+from typing import Dict, List
 
 import torch
 
@@ -46,7 +46,9 @@ def setup_store(storage_backends, block_size, device_id, io_size) -> UcmKVStoreB
     return UcmNfsStore(config)
 
 
-def make_buffers(block_number, device_id, batch_size, head_dim, block_len, block_layer, num_head):
+def make_buffers(
+    block_number, device_id, batch_size, head_dim, block_len, block_layer, num_head
+):
     hashes = [secrets.token_hex(16) for _ in range(block_number)]
     kv_caches = {}
     for i in range(block_layer):
@@ -65,13 +67,20 @@ def store_all_hashes(hashes: List[str]):
             f.write(h + "\n")
 
 
-def embed(store: UcmKVStoreBase, hashes: List[str], kvcaches: Dict[int, torch.Tensor],
-          num_tokens: int, block_len: int, block_layer: int, block_dim: int):
+def embed(
+    store: UcmKVStoreBase,
+    hashes: List[str],
+    kvcaches: Dict[int, torch.Tensor],
+    num_tokens: int,
+    block_len: int,
+    block_layer: int,
+    block_dim: int,
+):
     start_time = time.perf_counter()
-    
+
     total_block_ids, total_offsets, total_tensors = [], [], []
     total_size = 0
-    
+
     for i, hash_val in enumerate(hashes):
         offset = 0
         for layer_id, kv_layer in kvcaches.items():
@@ -85,27 +94,36 @@ def embed(store: UcmKVStoreBase, hashes: List[str], kvcaches: Dict[int, torch.Te
 
     task = store.dump(total_block_ids, total_offsets, total_tensors)
     store.wait(task)
-    
+
     elapsed_time = time.perf_counter() - start_time
-    throughput_gbps = (total_size / (1024 ** 3)) / elapsed_time if elapsed_time > 0 else 0
-    
-    print(f"WRITE: Data Size={(total_size / (1024 ** 3)):.4f} GB, Time={elapsed_time:.4f} s, "
-          f"Speed={throughput_gbps:.4f} GB/s")
-    
+    throughput_gbps = (total_size / (1024**3)) / elapsed_time if elapsed_time > 0 else 0
+
+    print(
+        f"WRITE: Data Size={(total_size / (1024 ** 3)):.4f} GB, Time={elapsed_time:.4f} s, "
+        f"Speed={throughput_gbps:.4f} GB/s"
+    )
+
     return total_size, elapsed_time, throughput_gbps
 
 
-def fetch(store: UcmKVStoreBase, hashes: List[str], kvcaches: Dict[int, torch.Tensor],
-          num_tokens: int, block_len: int, block_layer: int, block_dim: int):
+def fetch(
+    store: UcmKVStoreBase,
+    hashes: List[str],
+    kvcaches: Dict[int, torch.Tensor],
+    num_tokens: int,
+    block_len: int,
+    block_layer: int,
+    block_dim: int,
+):
     start_time = time.perf_counter()
-    
+
     founds = store.lookup(hashes)
     for f in founds:
         assert f, "Cache block miss detected"
 
     block_ids, offsets, tensors = [], [], []
     total_size = 0
-    
+
     for i, hash_val in enumerate(hashes):
         offset = 0
         for layer_id, kv_layer in kvcaches.items():
@@ -120,33 +138,35 @@ def fetch(store: UcmKVStoreBase, hashes: List[str], kvcaches: Dict[int, torch.Te
     task = store.load(block_ids, offsets, tensors)
     ret = store.wait(task)
     assert ret == 0, "Load operation failed"
-    
+
     elapsed_time = time.perf_counter() - start_time
-    throughput_gbps = (total_size / (1024 ** 3)) / elapsed_time if elapsed_time > 0 else 0
-    
-    print(f"READ: Data Size={(total_size / (1024 ** 3)):.4f} GB, Time={elapsed_time:.4f} s, "
-          f"Speed={throughput_gbps:.4f} GB/s")
-    
+    throughput_gbps = (total_size / (1024**3)) / elapsed_time if elapsed_time > 0 else 0
+
+    print(
+        f"READ: Data Size={(total_size / (1024 ** 3)):.4f} GB, Time={elapsed_time:.4f} s, "
+        f"Speed={throughput_gbps:.4f} GB/s"
+    )
+
     return total_size, elapsed_time, throughput_gbps
 
 
 def main():
     storage_backends = "."
     device_id = 1
     mla = False
-    repeat = 3  
+    repeat = 3
     block_elem_size = 2
     num_tokens_list = [2048, 4096, 8192, 16384, 32768]
-    
+
     if mla:
-        block_lens = [ 64, 128 ]
+        block_lens = [64, 128]
         block_layer = 61
         head_size = 576
         kv = 1
         model_name = "deepseek-v3"
         num_head_list = [1]
     else:
-        block_lens = [ 128, 256 ]
+        block_lens = [128, 256]
         block_layer = 64
         head_size = 128
         kv = 2
@@ -156,18 +176,31 @@ def main():
     SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
     csv_file = os.path.join(SCRIPT_DIR, "embed_fetch_result1.csv")
     need_header = not os.path.exists(csv_file)
-    
+
     with open(csv_file, "a", newline="", encoding="utf-8") as csv_fp:
         writer = csv.writer(csv_fp)
-        
+
         if need_header:
-            writer.writerow([
-                "Model", "Sequence Length", "Batch Size", "Layers", "Element Size",
-                "KV", "Num Head", "Block Size", "IO Count", "IO Size(B)",  
-                "Total Size(GB)", "Write Avg Time(s)", "Write Avg Bandwidth(GB/s)",
-                "Read Avg Time(s)", "Read Avg Bandwidth(GB/s)"  
-            ])
-        
+            writer.writerow(
+                [
+                    "Model",
+                    "Sequence Length",
+                    "Batch Size",
+                    "Layers",
+                    "Element Size",
+                    "KV",
+                    "Num Head",
+                    "Block Size",
+                    "IO Count",
+                    "IO Size(B)",
+                    "Total Size(GB)",
+                    "Write Avg Time(s)",
+                    "Write Avg Bandwidth(GB/s)",
+                    "Read Avg Time(s)",
+                    "Read Avg Bandwidth(GB/s)",
+                ]
+            )
+
         for num_head in num_head_list:
             for block_len in block_lens:
                 block_dim = head_size * num_head
@@ -177,35 +210,58 @@ def main():
 
                 for num_tokens in num_tokens_list:
                     sep = "=" * 60
-                    print(f"\n{sep}\n= num_head={num_head} | num_tokens={num_tokens:>6} | Repeat {repeat} times =\n{sep}\n")
+                    print(
+                        f"\n{sep}\n= num_head={num_head} | num_tokens={num_tokens:>6} | Repeat {repeat} times =\n{sep}\n"
+                    )
 
                     batch_size = int(num_tokens / block_len)
                     io_num = int(num_tokens / block_len * block_layer)
-                    
+
                     w_bw_list, r_bw_list = [], []
                     w_time_list, r_time_list = [], []
                     w_size_sum, r_size_sum = 0.0, 0.0
 
                     for r in range(repeat):
                         print(f"\n--- Round {r+1} ---")
-                        store = setup_store(storage_backends, block_size, device_id, io_size)
-                        
+                        store = setup_store(
+                            storage_backends, block_size, device_id, io_size
+                        )
+
                         hashes, kvcaches = make_buffers(
-                            real_blocks, device_id, batch_size, head_size,
-                            block_len, block_layer, num_head
+                            real_blocks,
+                            device_id,
+                            batch_size,
+                            head_size,
+                            block_len,
+                            block_layer,
+                            num_head,
                         )
 
                         results = store.create(hashes[:batch_size])
                         assert sum(results) == 0, "Create operation failed"
 
-                        w_size, w_time, w_bw = embed(store, hashes[:batch_size], kvcaches,
-                                                    num_tokens, block_len, block_layer, block_dim)
+                        w_size, w_time, w_bw = embed(
+                            store,
+                            hashes[:batch_size],
+                            kvcaches,
+                            num_tokens,
+                            block_len,
+                            block_layer,
+                            block_dim,
+                        )
                         store.commit(hashes[:batch_size], True)
-                        
+
                         store_all_hashes(hashes[:batch_size])
 
-                        r_size, r_time, r_bw = fetch(store, hashes[:batch_size], kvcaches,
-                                                    num_tokens, block_len, block_layer, block_dim)
+                        r_size, r_time, r_bw = fetch(
+                            store,
+                            hashes[:batch_size],
+                            kvcaches,
+                            num_tokens,
+                            block_len,
+                            block_layer,
+                            block_dim,
+                        )
 
                         w_bw_list.append(w_bw)
                         r_bw_list.append(r_bw)
@@ -222,22 +278,34 @@ def main():
                     avg_r_bw = sum(r_bw_list) / repeat
                     avg_w_time = sum(w_time_list) / repeat
                     avg_r_time = sum(r_time_list) / repeat
-                    avg_w_size = w_size_sum / (1024 ** 3) / repeat
-                    avg_r_size = r_size_sum / (1024 ** 3) / repeat
-
-                    writer.writerow([
-                        model_name,
-                        num_tokens, batch_size, block_layer, block_elem_size,
-                        kv, num_head, block_len, io_num, io_size,  
-                        f"{avg_w_size:.4f}", f"{avg_w_time:.4f}", f"{avg_w_bw:.4f}",
-                        f"{avg_r_time:.4f}", f"{avg_r_bw:.4f}"
-                    ])
-                    
+                    avg_w_size = w_size_sum / (1024**3) / repeat
+                    avg_r_size = r_size_sum / (1024**3) / repeat
+
+                    writer.writerow(
+                        [
+                            model_name,
+                            num_tokens,
+                            batch_size,
+                            block_layer,
+                            block_elem_size,
+                            kv,
+                            num_head,
+                            block_len,
+                            io_num,
+                            io_size,
+                            f"{avg_w_size:.4f}",
+                            f"{avg_w_time:.4f}",
+                            f"{avg_w_bw:.4f}",
+                            f"{avg_r_time:.4f}",
+                            f"{avg_r_bw:.4f}",
+                        ]
+                    )
+
                     csv_fp.flush()
-    
+
     print("\n" + "=" * 60 + "\n= All combinations tested =\n" + "=" * 60 + "\n")
 
 
 if __name__ == "__main__":
     os.environ["UC_LOGGER_LEVEL"] = "debug"
-    main()
+    main()