Fix: Shorten embedding filenames using model_id_to_filename (#122)

Rahulkumarsharma01 · Rahulkumarsharma01 · commit 7437f294f82d · 2025-08-07T00:18:08.000+05:30
diff --git a/cookbook/populate_embeddings.ipynb b/cookbook/populate_embeddings.ipynb
@@ -24,21 +24,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 25,
    "id": "c5498911",
    "metadata": {
     "id": "c5498911"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2025-05-27 13:21:11.840076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
-      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "import os.path\n",
@@ -50,7 +41,10 @@
     "import math\n",
     "# import numpy as np\n",
     "import pandas as pd\n",
-    "from sentence_transformers import SentenceTransformer"
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "def model_id_to_filename(model_id):\n",
+    "    return model_id.split(\"/\")[-1].lower()"
    ]
   },
   {
@@ -65,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 26,
    "id": "45b95c55",
    "metadata": {
     "id": "45b95c55"
@@ -85,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 27,
    "id": "b87a3c65-0e08-4fa9-aa8f-2f9a2f6c3499",
    "metadata": {
     "colab": {
@@ -101,7 +95,7 @@
        "False"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -122,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 28,
    "id": "95fb523c",
    "metadata": {
     "id": "95fb523c"
@@ -150,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 29,
    "id": "cd09f66b",
    "metadata": {
     "id": "cd09f66b"
@@ -220,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 30,
    "id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2",
    "metadata": {
     "colab": {
@@ -248,19 +242,33 @@
       "\n",
       "Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n",
       "Request url: https://router.huggingface.co/hf-inference/models/BAAI/bge-large-en-v1.5/pipeline/feature-extraction\n",
-      "Dimensions from hugging face API response: 1024\n",
+      "Dimensions from hugging face API response: 1\n",
       "Dimensions from json file: 1024\n",
       "Old prompts:  2217\n",
       "New prompts:  0\n",
       "Errors:  0\n",
       "Successes:  0\n",
-      "Updating centroids.\n",
+      "Updating centroids.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Rahul\\AppData\\Local\\Temp\\ipykernel_17512\\3081262251.py:43: UserWarning: Dimensions are different: API=1 while JSON sentences file=1024\n",
+      "  warnings.warn( f\"Dimensions are different: API={api_response_dimensions} while JSON sentences file={json_file_dimensions}\" )\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "Saving into file:  ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n",
       "\n",
       "\n",
       "Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json\n",
       "Request url: https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large/pipeline/feature-extraction\n",
-      "Dimensions from hugging face API response: 1024\n",
+      "Dimensions from hugging face API response: 1\n",
       "Dimensions from json file: 1024\n",
       "Old prompts:  2217\n",
       "New prompts:  0\n",
@@ -458,7 +466,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -472,7 +480,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.13.2"
   }
  },
  "nbformat": 4,