EleutherAI
diff --git a/‎bergson/collection.py‎
Lines changed: 3 additions & 1 deletion b/‎bergson/collection.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎bergson/data.py‎
Lines changed: 23 additions & 7 deletions b/‎bergson/data.py‎
Lines changed: 23 additions & 7 deletions
diff --git a/‎bergson/distributed.py‎
Lines changed: 24 additions & 7 deletions b/‎bergson/distributed.py‎
Lines changed: 24 additions & 7 deletions
diff --git a/‎bergson/gradients.py‎
Lines changed: 18 additions & 8 deletions b/‎bergson/gradients.py‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎bergson/hessians/attribute.py‎
Lines changed: 25 additions & 8 deletions b/‎bergson/hessians/attribute.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎bergson/hessians/collector.py‎
Lines changed: 39 additions & 12 deletions b/‎bergson/hessians/collector.py‎
Lines changed: 39 additions & 12 deletions
@@ -72,7 +72,9 @@ def callback(name: str, g: torch.Tensor):
     grad_sizes = {name: math.prod(s) for name, s in collector.shapes().items()}
 
     # Allocate structured space ahead of time for the gradients
-    grad_buffer = create_index(cfg.run_path, num_grads=len(data), grad_sizes=grad_sizes, dtype=np.float16)
+    grad_buffer = create_index(
+        cfg.run_path, num_grads=len(data), grad_sizes=grad_sizes, dtype=np.float16
+    )
 
     per_doc_losses = torch.full(
         (len(data),),
 
@@ -18,6 +18,7 @@
 
 Precision = Literal["bf16", "fp16", "fp32", "int4", "int8"]
 
+
 @dataclass
 class DataConfig:
     dataset: str = "EleutherAI/SmolLM2-135M-10B"
@@ -100,7 +101,9 @@ def ceildiv(a: int, b: int) -> int:
     return -(-a // b)  # Equivalent to math.ceil(a / b) but faster for integers
 
 
-def allocate_batches(doc_lengths: list[int], N: int, world_size: Optional[int] = None) -> list[list[int]]:
+def allocate_batches(
+    doc_lengths: list[int], N: int, world_size: Optional[int] = None
+) -> list[list[int]]:
     """
     Allocate documents into batches that are then distributed evenly across
     a fixed number of workers.
@@ -184,7 +187,9 @@ def allocate_batches(doc_lengths: list[int], N: int, world_size: Optional[int] =
         while len(batches) < world_size:
             big = batches.pop(0)  # take the current largest
             if len(big) == 1:  # cannot split a singleton
-                raise RuntimeError("Not enough documents to give each worker at least one batch.")
+                raise RuntimeError(
+                    "Not enough documents to give each worker at least one batch."
+                )
             batches.append([big.pop()])  # move one doc into new batch
             batches.append(big)  # put the remainder back
             # preserve cost constraint automatically
@@ -206,7 +211,9 @@ def allocate_batches(doc_lengths: list[int], N: int, world_size: Optional[int] =
         i += 1
 
     assert len(batches) == target_batches
-    assert all(max(doc_lengths[i] for i in batch) * len(batch) <= N for batch in batches)
+    assert all(
+        max(doc_lengths[i] for i in batch) * len(batch) <= N for batch in batches
+    )
 
     # ---------------------------------------------------------------------
     # 4) Round-robin assignment to workers
@@ -220,7 +227,9 @@ def allocate_batches(doc_lengths: list[int], N: int, world_size: Optional[int] =
     return allocation[rank]
 
 
-def create_index(root: str, num_grads: int, grad_sizes: dict[str, int], dtype: DTypeLike) -> np.memmap:
+def create_index(
+    root: str, num_grads: int, grad_sizes: dict[str, int], dtype: DTypeLike
+) -> np.memmap:
     """Create a memory-mapped file for storing structured gradients
     and persist metadata."""
     grad_path = os.path.join(root, "gradients.bin")
@@ -311,7 +320,9 @@ def load_shard(dir: str) -> Dataset:
         if concatenate_gradients:
             unstructured_data = structured_to_unstructured(mmap)
             flat = pa.array(unstructured_data.reshape(-1))
-            col_arrow = pa.FixedSizeListArray.from_arrays(flat, unstructured_data.shape[1])
+            col_arrow = pa.FixedSizeListArray.from_arrays(
+                flat, unstructured_data.shape[1]
+            )
 
             ds = ds.add_column("gradients", col_arrow, new_fingerprint="gradients")
         # Add a column for each module's gradient vectors
@@ -375,7 +386,9 @@ def tokenize(batch: dict, *, args: DataConfig, tokenizer):
                 {"role": "user", "content": assert_type(str, prompt)},
                 {"role": "assistant", "content": assert_type(str, resp)},
             ]
-            for prompt, resp in zip(batch[args.prompt_column], batch[args.completion_column])
+            for prompt, resp in zip(
+                batch[args.prompt_column], batch[args.completion_column]
+            )
         ]
     elif args.conversation_column:
         # We're dealing with a conversation dataset
@@ -422,4 +435,7 @@ def tokenize(batch: dict, *, args: DataConfig, tokenizer):
 def unflatten(x: torch.Tensor, shapes: dict[str, Sequence[int]], dim: int = -1):
     """Unflatten a tensor `x` into a dictionary of tensors with specified shapes."""
     numels = [math.prod(shape) for shape in shapes.values()]
-    return {name: x.unflatten(dim, shape) for (name, shape), x in zip(shapes.items(), x.split(numels, dim=dim))}
+    return {
+        name: x.unflatten(dim, shape)
+        for (name, shape), x in zip(shapes.items(), x.split(numels, dim=dim))
+    }
@@ -64,22 +64,30 @@ def setup_data_pipeline(cfg: IndexConfig) -> Dataset | IterableDataset:
             ds = load_dataset(data_str, split="train")
 
             if isinstance(ds, DatasetDict) or isinstance(ds, IterableDatasetDict):
-                raise NotImplementedError("DatasetDicts and IterableDatasetDicts are not supported.")
+                raise NotImplementedError(
+                    "DatasetDicts and IterableDatasetDicts are not supported."
+                )
         except ValueError as e:
             # Automatically use load_from_disk if appropriate
             if "load_from_disk" in str(e):
                 ds = Dataset.load_from_disk(data_str, keep_in_memory=False)
             else:
                 raise e
 
-    tokenizer = AutoTokenizer.from_pretrained(cfg.model, model_max_length=cfg.token_batch_size)
+    tokenizer = AutoTokenizer.from_pretrained(
+        cfg.model, model_max_length=cfg.token_batch_size
+    )
 
-    ds = ds.map(tokenize, batched=True, fn_kwargs=dict(args=cfg.data, tokenizer=tokenizer))
+    ds = ds.map(
+        tokenize, batched=True, fn_kwargs=dict(args=cfg.data, tokenizer=tokenizer)
+    )
 
     return ds
 
 
-def setup_model_and_peft(cfg: IndexConfig, rank: int, dtype: torch.dtype) -> tuple[AutoModelForCausalLM, set | None]:
+def setup_model_and_peft(
+    cfg: IndexConfig, rank: int, dtype: torch.dtype
+) -> tuple[AutoModelForCausalLM, set | None]:
     """Handle model loading, quantization, FSDP, and PEFT detection"""
 
     torch.manual_seed(42)
@@ -141,7 +149,9 @@ def setup_model_and_peft(cfg: IndexConfig, rank: int, dtype: torch.dtype) -> tup
                     model.get_submodule(processed_name)
                     target_modules.add(processed_name)
                 except AttributeError:
-                    print(f"Adapter parameter '{processed_name}' not found in the model.")
+                    print(
+                        f"Adapter parameter '{processed_name}' not found in the model."
+                    )
 
     # Configure gradients
     model.requires_grad_(False)
@@ -223,7 +233,11 @@ def worker_wrapper(
                 case "fp32":
                     dtype = torch.float32
                 case "int4" | "int8":
-                    dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
+                    dtype = (
+                        torch.bfloat16
+                        if torch.cuda.is_bf16_supported()
+                        else torch.float16
+                    )
                 case other:
                     raise ValueError(f"Unsupported precision: {other}")
 
@@ -305,7 +319,10 @@ def distributed_computing(
             ctx = start_processes(
                 "build",
                 worker_wrapper,
-                args={i: (i, world_size, cfg, ds, worker_fn, setup_model, setup_processor) for i in range(world_size)},
+                args={
+                    i: (i, world_size, cfg, ds, worker_fn, setup_model, setup_processor)
+                    for i in range(world_size)
+                },
                 envs={
                     i: {
                         "LOCAL_RANK": str(i),
 
@@ -162,7 +162,9 @@ def to_adafactor(self) -> AdafactorNormalizer:
         and the factored second moments.
         """
         # We assume avg_sq is a square matrix of shape [O, I]
-        assert self.avg_sq.ndim == 2, f"Expected 2D tensor for avg_sq, got {self.avg_sq.ndim}D"
+        assert (
+            self.avg_sq.ndim == 2
+        ), f"Expected 2D tensor for avg_sq, got {self.avg_sq.ndim}D"
 
         # Compute row and column means
         return AdafactorNormalizer(
@@ -213,9 +215,6 @@ def save(self, path: str):
             json.dump(cfg, f, indent=2)
 
 
-
-
-
 @dataclass
 class GradientCollector(ContextDecorator):
     """
@@ -346,7 +345,12 @@ def _save_input(self, module: nn.Module, inp: tuple, _):
         if p is not None and not isinstance(norm, AdamNormalizer):
             i = module.in_features
 
-            x = x @ self.projection(name=name, m=p, n=i, side="right", dtype=x.dtype, device=x.device).T
+            x = (
+                x
+                @ self.projection(
+                    name=name, m=p, n=i, side="right", dtype=x.dtype, device=x.device
+                ).T
+            )
 
         module._inputs = x
 
@@ -387,14 +391,20 @@ def _process_grad(self, module: nn.Module, _, grad_out):
 
             # Project the gradients to the lower-dimensional space
             if p is not None:
-                A = self.projection(name=name, m=p, n=o, side="left", dtype=G.dtype, device=G.device)
-                B = self.projection(name=name, m=p, n=i, side="right", dtype=G.dtype, device=G.device)
+                A = self.projection(
+                    name=name, m=p, n=o, side="left", dtype=G.dtype, device=G.device
+                )
+                B = self.projection(
+                    name=name, m=p, n=i, side="right", dtype=G.dtype, device=G.device
+                )
                 P = A @ P @ B.T  # [N, p, q]
 
         # Both Adafactor and no normalizer, we can project G first
         else:
             if p is not None:
-                A = self.projection(name=name, m=p, n=o, side="left", dtype=G.dtype, device=G.device)
+                A = self.projection(
+                    name=name, m=p, n=o, side="left", dtype=G.dtype, device=G.device
+                )
                 G = G @ A.T  # [N, S, p]
 
             P = G.mT @ I  # [N, O/p, S] @ [N, S, I/q] → [N, O/p, I/q]
 
@@ -13,14 +13,20 @@
 # ## 1. Load index for query and train data
 
 parser = argparse.ArgumentParser(description="Process normalization flag.")
-parser.add_argument("--normalize", action="store_true", help="Gradients will be unit normalized.")
+parser.add_argument(
+    "--normalize", action="store_true", help="Gradients will be unit normalized."
+)
 args = parser.parse_args()
 
 device = "cuda:1"
 
 # %%
-base_path = "/mnt/ssd-1/gpaulo/emergent-misalignment/emergent-misalignment-eleuther/data/"
-index_dataset = load_dataset("json", data_files=f"{base_path}merged-medical-reformatted.jsonl")["train"]
+base_path = (
+    "/mnt/ssd-1/gpaulo/emergent-misalignment/emergent-misalignment-eleuther/data/"
+)
+index_dataset = load_dataset(
+    "json", data_files=f"{base_path}merged-medical-reformatted.jsonl"
+)["train"]
 index_path = "/mnt/ssd-1/gpaulo/emergent-misalignment/qwen14_merged_medical_proj16/merged_medical_no_normalizer"
 queries_path = "/mnt/ssd-1/louis/emergent_misalignment/test_query_ekfac"
 
@@ -37,17 +43,25 @@
 normalize = args.normalize
 
 attribution_dict = {}
-output_path = "/mnt/ssd-1/louis/emergent_misalignment/test_query_ekfac_attribution_no_normalizer"
+output_path = (
+    "/mnt/ssd-1/louis/emergent_misalignment/test_query_ekfac_attribution_no_normalizer"
+)
 if normalize:
     output_path += "_unit_norm"
 os.makedirs(output_path, exist_ok=True)
 
 for name in tqdm(list(names)):
     index_tensor = torch.from_numpy(index[name]).to(device=device, dtype=torch.float32)
-    queries_tensor = torch.from_numpy(queries[name]).to(device=device, dtype=torch.float32)
+    queries_tensor = torch.from_numpy(queries[name]).to(
+        device=device, dtype=torch.float32
+    )
     if normalize:
-        index_tensor = index_tensor / (torch.norm(index_tensor, dim=1, keepdim=True) + 1e-10)
-        queries_tensor = queries_tensor / (torch.norm(queries_tensor, dim=1, keepdim=True) + 1e-10)
+        index_tensor = index_tensor / (
+            torch.norm(index_tensor, dim=1, keepdim=True) + 1e-10
+        )
+        queries_tensor = queries_tensor / (
+            torch.norm(queries_tensor, dim=1, keepdim=True) + 1e-10
+        )
     # Compute result on GPU
     result_tensor = index_tensor @ queries_tensor.T
 
@@ -56,7 +70,10 @@
 
     # Create memory-mapped file with .bin extension
     mmap_file = np.memmap(
-        os.path.join(output_path, f"{name}_attribution.npy"), dtype=np.float32, mode="w+", shape=result_shape
+        os.path.join(output_path, f"{name}_attribution.npy"),
+        dtype=np.float32,
+        mode="w+",
+        shape=result_shape,
     )
 
     # Copy GPU result directly to memmap
 
@@ -263,8 +263,14 @@ def teardown(self) -> None:
         os.makedirs(gradient_path, exist_ok=True)
 
         # Save sharded covariance matrices
-        save_file(self.A_cov_dict, os.path.join(activation_path, f"shard_{self.rank}.safetensors"))
-        save_file(self.S_cov_dict, os.path.join(gradient_path, f"shard_{self.rank}.safetensors"))
+        save_file(
+            self.A_cov_dict,
+            os.path.join(activation_path, f"shard_{self.rank}.safetensors"),
+        )
+        save_file(
+            self.S_cov_dict,
+            os.path.join(gradient_path, f"shard_{self.rank}.safetensors"),
+        )
 
 
 @dataclass(kw_only=True)
@@ -286,11 +292,15 @@ def setup(self) -> None:
         """Load eigenvectors and initialize storage."""
         # Load precomputed eigenvectors
         self.eigen_a = load_file(
-            os.path.join(self.path, f"activation_eigen_sharded/shard_{self.rank}.safetensors"),
+            os.path.join(
+                self.path, f"activation_eigen_sharded/shard_{self.rank}.safetensors"
+            ),
             device=f"cuda:{self.rank}",
         )
         self.eigen_g = load_file(
-            os.path.join(self.path, f"gradient_eigen_sharded/shard_{self.rank}.safetensors"),
+            os.path.join(
+                self.path, f"gradient_eigen_sharded/shard_{self.rank}.safetensors"
+            ),
             device=f"cuda:{self.rank}",
         )
 
@@ -303,7 +313,9 @@ def forward_hook(self, name: str, a: Tensor) -> None:
         # a shape: [N, S, I]
 
         # Transform: a @ eigen_a
-        transformed = self.shard_computer._matmul(vector_nsa=a, matrix_cb=self.eigen_a[name])  # shape [N, S, I]
+        transformed = self.shard_computer._matmul(
+            vector_nsa=a, matrix_cb=self.eigen_a[name]
+        )  # shape [N, S, I]
 
         # Cache for use in backward pass
         self.transformed_a_cache[name] = transformed
@@ -313,11 +325,15 @@ def backward_hook(self, name: str, g: Tensor) -> None:
         # g shape: [N, S, O]
 
         # Transform: g @ eigen_g
-        transformed_g = self.shard_computer._matmul(vector_nsa=g, matrix_cb=self.eigen_g[name])  # shape [N, S, O]
+        transformed_g = self.shard_computer._matmul(
+            vector_nsa=g, matrix_cb=self.eigen_g[name]
+        )  # shape [N, S, O]
 
         # Compute outer product: sum_n (transformed_a_n^T @ transformed_g_n)
         # Einstein notation: [N, S, I] x [N, S, O] -> [N, O, I]
-        transformed_grad_shard = torch.einsum("N S I, N S O -> N O I", self.transformed_a_cache[name], transformed_g)
+        transformed_grad_shard = torch.einsum(
+            "N S I, N S O -> N O I", self.transformed_a_cache[name], transformed_g
+        )
 
         # Square and sum over batch
         transformed_grad_shard = (transformed_grad_shard**2).sum(dim=0).contiguous()
@@ -333,15 +349,26 @@ def backward_hook(self, name: str, g: Tensor) -> None:
 
         # Accumulate (with CPU offloading for memory efficiency)
         if name not in self.eigenvalue_corrections:
-            self.eigenvalue_corrections[name] = transformed_grad_shard[start_row:end_row, :].contiguous()
+            self.eigenvalue_corrections[name] = transformed_grad_shard[
+                start_row:end_row, :
+            ].contiguous()
         else:
-            self.eigenvalue_corrections[name] = self.eigenvalue_corrections[name].to(device=self.device)
-            self.eigenvalue_corrections[name].add_(transformed_grad_shard[start_row:end_row, :].contiguous())
-            self.eigenvalue_corrections[name] = self.eigenvalue_corrections[name].to(device="cpu", non_blocking=False)
+            self.eigenvalue_corrections[name] = self.eigenvalue_corrections[name].to(
+                device=self.device
+            )
+            self.eigenvalue_corrections[name].add_(
+                transformed_grad_shard[start_row:end_row, :].contiguous()
+            )
+            self.eigenvalue_corrections[name] = self.eigenvalue_corrections[name].to(
+                device="cpu", non_blocking=False
+            )
 
     def teardown(self) -> None:
         """Save eigenvalue corrections to disk."""
         output_path = os.path.join(self.path, "eigenvalue_correction_sharded")
         os.makedirs(output_path, exist_ok=True)
 
-        save_file(self.eigenvalue_corrections, os.path.join(output_path, f"shard_{self.rank}.safetensors"))
+        save_file(
+            self.eigenvalue_corrections,
+            os.path.join(output_path, f"shard_{self.rank}.safetensors"),
+        )