|
24 | 24 | }, |
25 | 25 | { |
26 | 26 | "cell_type": "code", |
27 | | - "execution_count": 1, |
| 27 | + "execution_count": 25, |
28 | 28 | "id": "c5498911", |
29 | 29 | "metadata": { |
30 | 30 | "id": "c5498911" |
31 | 31 | }, |
32 | | - "outputs": [ |
33 | | - { |
34 | | - "name": "stderr", |
35 | | - "output_type": "stream", |
36 | | - "text": [ |
37 | | - "2025-05-27 13:21:11.840076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", |
38 | | - "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" |
39 | | - ] |
40 | | - } |
41 | | - ], |
| 32 | + "outputs": [], |
42 | 33 | "source": [ |
43 | 34 | "import os\n", |
44 | 35 | "import os.path\n", |
|
50 | 41 | "import math\n", |
51 | 42 | "# import numpy as np\n", |
52 | 43 | "import pandas as pd\n", |
53 | | - "from sentence_transformers import SentenceTransformer" |
| 44 | + "from sentence_transformers import SentenceTransformer\n", |
| 45 | + "\n", |
| 46 | + "def model_id_to_filename(model_id):\n", |
| 47 | + " return model_id.split(\"/\")[-1].lower()" |
54 | 48 | ] |
55 | 49 | }, |
56 | 50 | { |
|
65 | 59 | }, |
66 | 60 | { |
67 | 61 | "cell_type": "code", |
68 | | - "execution_count": 2, |
| 62 | + "execution_count": 26, |
69 | 63 | "id": "45b95c55", |
70 | 64 | "metadata": { |
71 | 65 | "id": "45b95c55" |
|
85 | 79 | }, |
86 | 80 | { |
87 | 81 | "cell_type": "code", |
88 | | - "execution_count": 3, |
| 82 | + "execution_count": 27, |
89 | 83 | "id": "b87a3c65-0e08-4fa9-aa8f-2f9a2f6c3499", |
90 | 84 | "metadata": { |
91 | 85 | "colab": { |
|
101 | 95 | "False" |
102 | 96 | ] |
103 | 97 | }, |
104 | | - "execution_count": 3, |
| 98 | + "execution_count": 27, |
105 | 99 | "metadata": {}, |
106 | 100 | "output_type": "execute_result" |
107 | 101 | } |
|
122 | 116 | }, |
123 | 117 | { |
124 | 118 | "cell_type": "code", |
125 | | - "execution_count": 4, |
| 119 | + "execution_count": 28, |
126 | 120 | "id": "95fb523c", |
127 | 121 | "metadata": { |
128 | 122 | "id": "95fb523c" |
|
150 | 144 | }, |
151 | 145 | { |
152 | 146 | "cell_type": "code", |
153 | | - "execution_count": 5, |
| 147 | + "execution_count": 29, |
154 | 148 | "id": "cd09f66b", |
155 | 149 | "metadata": { |
156 | 150 | "id": "cd09f66b" |
|
220 | 214 | }, |
221 | 215 | { |
222 | 216 | "cell_type": "code", |
223 | | - "execution_count": 6, |
| 217 | + "execution_count": 30, |
224 | 218 | "id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2", |
225 | 219 | "metadata": { |
226 | 220 | "colab": { |
|
248 | 242 | "\n", |
249 | 243 | "Opening existing file locally: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n", |
250 | 244 | "Request url: https://router.huggingface.co/hf-inference/models/BAAI/bge-large-en-v1.5/pipeline/feature-extraction\n", |
251 | | - "Dimensions from hugging face API response: 1024\n", |
| 245 | + "Dimensions from hugging face API response: 1\n", |
252 | 246 | "Dimensions from json file: 1024\n", |
253 | 247 | "Old prompts: 2217\n", |
254 | 248 | "New prompts: 0\n", |
255 | 249 | "Errors: 0\n", |
256 | 250 | "Successes: 0\n", |
257 | | - "Updating centroids.\n", |
| 251 | + "Updating centroids.\n" |
| 252 | + ] |
| 253 | + }, |
| 254 | + { |
| 255 | + "name": "stderr", |
| 256 | + "output_type": "stream", |
| 257 | + "text": [ |
| 258 | + "C:\\Users\\Rahul\\AppData\\Local\\Temp\\ipykernel_17512\\3081262251.py:43: UserWarning: Dimensions are different: API=1 while JSON sentences file=1024\n", |
| 259 | + " warnings.warn( f\"Dimensions are different: API={api_response_dimensions} while JSON sentences file={json_file_dimensions}\" )\n" |
| 260 | + ] |
| 261 | + }, |
| 262 | + { |
| 263 | + "name": "stdout", |
| 264 | + "output_type": "stream", |
| 265 | + "text": [ |
258 | 266 | "Saving into file: ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json\n", |
259 | 267 | "\n", |
260 | 268 | "\n", |
261 | 269 | "Opening existing file locally: ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json\n", |
262 | 270 | "Request url: https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large/pipeline/feature-extraction\n", |
263 | | - "Dimensions from hugging face API response: 1024\n", |
| 271 | + "Dimensions from hugging face API response: 1\n", |
264 | 272 | "Dimensions from json file: 1024\n", |
265 | 273 | "Old prompts: 2217\n", |
266 | 274 | "New prompts: 0\n", |
|
458 | 466 | "provenance": [] |
459 | 467 | }, |
460 | 468 | "kernelspec": { |
461 | | - "display_name": "Python 3 (ipykernel)", |
| 469 | + "display_name": "Python 3", |
462 | 470 | "language": "python", |
463 | 471 | "name": "python3" |
464 | 472 | }, |
|
472 | 480 | "name": "python", |
473 | 481 | "nbconvert_exporter": "python", |
474 | 482 | "pygments_lexer": "ipython3", |
475 | | - "version": "3.9.6" |
| 483 | + "version": "3.13.2" |
476 | 484 | } |
477 | 485 | }, |
478 | 486 | "nbformat": 4, |
|
0 commit comments