create experiment notebook and refactoring

lakshith-403 · lakshith-403 · commit 23b7e2ee8e07 · 2024-07-29T19:41:24.000+05:30
diff --git a/labml_nn/transformers/LoRA/GPT2.py b/labml_nn/transformers/LoRA/GPT2.py
@@ -14,7 +14,7 @@
 }
 
 
-class HeadFFN(nn.Module):  # todo rename
+class FFN(nn.Module):
     def __init__(self, dim):
         super().__init__()
         self.c_fc = nn.Linear(config['n_embd'], dim)
@@ -28,7 +28,7 @@ def forward(self, hidden_states):
         return hidden_states
 
 
-class MultiHead(nn.Module):
+class MultiHeadAttention(nn.Module):
     def __init__(self):
         super().__init__()
         self.embed_dim = config['n_embd']
@@ -65,7 +65,6 @@ def forward(self, hidden_states):
             is_causal=True,  # for the triangular mask
         )
 
-        # todo why this?
         attn_output = attn_output.transpose(1, 2).contiguous()
         attn_output = attn_output.view(batch_size, seq_length, self.embed_dim)
 
@@ -78,9 +77,9 @@ class Block(nn.Module):
     def __init__(self):
         super().__init__()
         self.pre_norm = nn.LayerNorm(config['n_embd'], eps=config['layer_norm_epsilon'])
-        self.attn = MultiHead()
+        self.attn = MultiHeadAttention()
         self.post_norm = nn.LayerNorm(config['n_embd'], eps=config['layer_norm_epsilon'])
-        self.ffn = HeadFFN(config['n_embd'] * 4)
+        self.ffn = FFN(config['n_embd'] * 4)
 
     def forward(self, hidden_states):
         residual = hidden_states
@@ -98,7 +97,6 @@ def forward(self, hidden_states):
 
 
 class GPTModel(nn.Module):
-    # todo ignored token type embeds, past key values
     def __init__(self):
         super().__init__()
 
@@ -128,31 +126,3 @@ def forward(self, input_ids):
         logits = self.lm_head(hidden_states)
 
         return logits
-
-
-model = GPTModel()
-
-state_dict = torch.load('transformed.pth')
-
-missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
-if missing_keys:
-    print(f"Missing keys: {missing_keys}")
-if unexpected_keys:
-    print(f"Unexpected keys: {unexpected_keys}")
-
-prompt = "hello how are you"
-tokenized = tokenizer(prompt, return_tensors="pt")
-
-with torch.no_grad():
-    model.eval()
-    res = model(tokenized['input_ids'])
-
-print(res)
-
-output_ids = torch.argmax(res, dim=-1)
-
-# Decode the token indices back to text
-output_text = tokenizer.decode(output_ids[0])
-
-# Print the tokens of the output
-print(output_text)
diff --git a/labml_nn/transformers/LoRA/experiment.ipynb b/labml_nn/transformers/LoRA/experiment.ipynb
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-07-29T07:14:27.781097Z",
+     "start_time": "2024-07-29T07:14:24.819976Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from labml_nn.transformers.LoRA.GPT2 import GPTModel\n",
+    "import torch"
+   ],
+   "id": "cffa3ec341b4905a",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-07-29T07:14:28.183960Z",
+     "start_time": "2024-07-29T07:14:27.782683Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")"
+   ],
+   "id": "c2b0b7e18394ea9e",
+   "outputs": [],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-07-29T07:14:29.840925Z",
+     "start_time": "2024-07-29T07:14:28.185080Z"
+    }
+   },
+   "source": [
+    "model = GPTModel()\n",
+    "\n",
+    "state_dict = torch.load('transformed.pth')\n",
+    "\n",
+    "missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)\n",
+    "if missing_keys:\n",
+    "    print(f\"Missing keys: {missing_keys}\")\n",
+    "if unexpected_keys:\n",
+    "    print(f\"Unexpected keys: {unexpected_keys}\")"
+   ],
+   "outputs": [],
+   "execution_count": 3
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-07-29T07:22:30.408855Z",
+     "start_time": "2024-07-29T07:22:30.168376Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "prompt = \"hello how are you\"\n",
+    "tokenized = tokenizer(prompt, return_tensors=\"pt\")\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    model.eval()\n",
+    "    res = model(tokenized['input_ids'])\n",
+    "\n",
+    "output_ids = torch.argmax(res, dim=-1)\n",
+    "for id in output_ids[0]:\n",
+    "    print(tokenizer.decode(id))"
+   ],
+   "id": "f4f7826ec3729b66",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ",\n",
+      " to\n",
+      " you\n",
+      " doing\n"
+     ]
+    }
+   ],
+   "execution_count": 17
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "c12776360008a974"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (ml)",
+   "language": "python",
+   "name": "ml"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/labml_nn/transformers/LoRA/load_hf.py b/labml_nn/transformers/LoRA/load_hf.py