diff --git a/learn/vlm-finetuning/qwen-2p5-vl-7b-finetune.ipynb b/learn/vlm-finetuning/qwen-2p5-vl-7b-finetune.ipynb new file mode 100644 index 0000000..3a1846d --- /dev/null +++ b/learn/vlm-finetuning/qwen-2p5-vl-7b-finetune.ipynb @@ -0,0 +1,311 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Finetune Qwen 2.5 VL 7B and deploy to fireworks\n", + "\n", + "### Pre-requisites\n", + "- 1 H100 GPU\n", + " - Should work on 1 A100 GPU as well\n", + "- python" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting uv\n", + " Using cached uv-0.7.3-py3-none-macosx_11_0_arm64.whl.metadata (11 kB)\n", + "Using cached uv-0.7.3-py3-none-macosx_11_0_arm64.whl (15.6 MB)\n", + "Installing collected packages: uv\n", + "Successfully installed uv-0.7.3\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "\u001b[2mUsing Python 3.10.16 environment at: /Users/aidando/miniconda3/envs/finetune-env-3\u001b[0m\n", + "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 41ms\u001b[0m\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# Install dependencies\n", + "%pip install uv\n", + "%uv pip install datasets pillow scikit-learn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we'll fine-tune a model to classify apparel whether it's a shirt, pants, shoe, watches etc.\n", + "\n", + "We'll use the [Fashion Product Images (Small)](https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-small) dataset to do this. We'll use only 500 rows for the train set, 100 rows for the test set, and 100 rows for validation to demonstrate you don't need a huge amount of data to get results with fine-tuning." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 15970,\n", + " 'gender': 'Men',\n", + " 'masterCategory': 'Apparel',\n", + " 'subCategory': 'Topwear',\n", + " 'articleType': 'Shirts',\n", + " 'baseColour': 'Navy Blue',\n", + " 'season': 'Fall',\n", + " 'year': 2011.0,\n", + " 'usage': 'Casual',\n", + " 'productDisplayName': 'Turtle Check Men Navy Blue Shirt',\n", + " 'image': }" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCABQADwDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigBK88+IXxRsvBYOnwxG51eSLzI48fJGCcAuc/Xgeleh14x4o8HT6j8WrrU57Rb20FokiQn+IgbNvP4monNQV2VGLk7IreD/jfd3+uQWGv2lrHBcOEW6t8rsYnAyCTkZ4z2r3CvC/EXgLTdQFnb6LojWGozyx5mjH7uH5ud34ZPA7V7jGCsaqzFiAAWPf3pU6imroc4ODsySiiitCAoorI8R6odI0G5vF3b1XCbF3HJ6cUAaFxPDbQtLPMkUa9XdgoH4mvOIvF51zVbu7so0eztJ/IjaP77pgZcj0znA9MVyupTXesWU5vZpbiR7OPf8AK74IkIbHIH6VJ4Xtl8Oaw8Mgne0nuWtw0iKPLYDKng/dIOOe9RXpt0nbc0oytUV9jstB8SWMnjJNJluN1xLbmaE42rkHG3nnOCSPxr0GvnzxBpAvfElzdweYqqs+3ZFuG5Mdwc9fy6V3HhDxReHWI9MvboTRzyMqeZIC6/IrADgHHJ4OaKMOWmgrS5ps9MoooqzMK4zx/KTa6daKV/fXSkgybTgEdDXZ15z45nb+37GPcqIsZc+aMKVAYsc9Mjgc9zVR3A4G3dXF5FNsy0U2DcOWPzDzVGBx2atC48mSclPs+XuraVSLZj99CCap6fIsTRzeUIfujy1lVRtC4Htn5RyPWrLjZDAJWfCz2almvOcBM9vrWzJIYo4ItPUgWqgWt22WidMbn29adp90RrUV5bO7JazIQqfP0KL949OMce9TQ+c1vZwKZ8SxqrbbhX4Nxk9fYVk6coa71FfKZyZRuZsgqJtuG/ujaQM1LGfRdFZ2h3b32h2NzLnzZIVL5/vYwf1rRrEY3qK5bXXSe+uIJVVo1iUFWGQeprqe9cbq6+f4gntQcee0ak+g28/pmufEXcbLub4aynd9jhb5YvMmAT5YbE4/0YMobYCePqwpJwVmVUXcwv7dQq2eOkI7n+dUYplntLu4RkbzILg5FwYzjeijg/TrWhc7jqICsBm/JG67yPljA6fU13xVlY527u5VhaOOyWUiPKWce3daFTkzH0rW0XTNP+0WTfZ4G3WCb8KdrFWI5B+g/Ks22DG1gCmbAgs1+S6B6yFj1rb0bfJYwX0obKyyxEkgkhwHHT0IP51z4q/s3Y2w9vaK56VpD+ZpNq3rGKu1n6IMaJZ/9clP6VoVMPhREviYnaubm2Q+JJLiQAhWXr7piukPFYuoabczXvnWwiIkAV/MJ+XB6j3xU1YtpWLouKk+bseK654gsbDxRrlgIEtrKGY28aRoJQoDKW46gEhjgdM0g8X6KbiCZNQU7Z7lyBabSN33evrXMXkRk1TWZpcFnvZyWPrvaudSwuLz+0p7djiytWuX4z8oZV/9mrs2RyKTbsegR+KNFhijX7VHM6/ZS4+xnPyAk+3BIrv/AAXLFfeAYpDbxqZrhyu3vsAUE/lXhGkQrJHLLJyx9elfQXgjSLuLwFoUcEcW14XlcyHkF2LAj3wR+FYYhNwaRvh5L2iv0O701Qmm2qDoIlH6VbqG3iEMCRKSQihQT7VNSirIJbn/2Q==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAADwAAABQCAIAAADKqIEEAAAnKklEQVR4AWL8//8/AykAopoRogXCYfgH4v3+w8DCwsDIxAAyECzPyPDjxx8OTpb/DAxfPn9mY2NjZ2P//+/fj29fOHl4GBiYQLqQMMgwRgawTiRRbEx0ndjUECfGyvL965ffP35AVP/585eBgYGFheXP3/8fPnzi5eVlZ2f/8P79v3//QC7+C5KFqISTxDgXopjkkIZog5KgwIEyQQHMCLX37x9QiDMxMdy+fZ+VneXNmzdiYmKCgoK8PFwMDAzfv35jZGTk4OSE6USloWagCqLyWFC5FPAYGT9++szMyMTMysLGys7ExLBp0/aJEyfcvX+HnZ2dlZVVU1MzIjzUwcFBWEiYAmtAWkkPaeTQRQ2VX7//s7Ew/mdkePr05e3bt7u6uk6cOMHJyc7Nw/kTDBgZmKOjo+tqa3l4eJiZkTTjNhPkRkz8n1Tw7/9/OILp/ff//+MnLyDChw4fd3P3VtfQYmBgkldQUlJSUlZWFBTk5+XnY+fkUFRSmTx56t+/MJ0QGqITQkJE8JKkhzTDPwZQwIBzMDiw/jNAig9QgNy+czcxMfHksePSMpLfvnzl4+Ph5uL48OHD379/mZhZlZSUnj59zsrGsXr1al1dbQYGWFkBMhCkHYTBZoIYuDHYbtzS6DL/GcAuRggju/jV6zdXLl46ffK4oAAPC/N/ZqZ/P75++v754+sXr9SVFf/++n771g1ubs5nTx9v3LgRYQQaC9kDaFIwLomOhmnDpP/8/SchKnL82CEudhYudqbnj58oyokL83H8+f6Rj43h3s3L/358YWX6+/Hdaw521vPnzsBNABXrYM7/P38YGMFFPpiLh6Cg9GAEhzosNn99/8HAynj/3m12lv+CfJz83CLBfh4iAgJ8XOxPHj++/+DRnQePePiF79x//PLth9evXjx6+FBSSoaNlfnPn7+szEwMjIyMLOAQhBlIXUeDjUY1komBgZeHg4Hh79dP79iZ/zP/+yErLmyso6ampMD4+6eAnfmP3//uPnj85ee/OYuW3b1798G922/fvBIVFWVl5frPwPDn318WFhaG/0z///5jZEY1GhsPiwuwKYOJoQbDf2Tu/7/fPr3j5WT99/MXw+9vxrrqfOz/GX98lBbi5WFl4Gb5Ly0moKuh7OvuLCzAIybEz8/Py8YGimdWVmaQixkYGP7/Z2Qiyj0gbTAXEUdDMgpS2mACFQL/GBj/MTH8tbYwvnbuiAAHs6mB1stHd+9dOsnLxqaipKymq8/LxnDvwS0uVgZeLjZfby95WRkWFlCzBORaBgZQIfjvDysrKzGOIN3RyKGLsOEfA8M/Dh4ONyf7Uwe2C/KwaavKX75w9s2zB9wSEiwMP/9+/XDs4LFVG3ey8QoZ6+s2tLcwMLIy/P/748d3VnYOZibmf//+/f9HVC4ENWkQ9pLLgvriz68fnz+Ii4kYGehIi/DLy0jysxl911D4/eULJyvj6xdPWP//dba3+fDjr5i8GqhkBxUUTMzMzH///mVmYmZhYf7zB1ScQk3D6xjSQxrJOEZIUgHVLf8ZmNk42LgZeQRVVTTFBTl//PglLCvPwKnC8PIZw69fn7/9VlJRdVDXO3npppahJcPv/3/+/WLhZGVjY/v9G9TiY2RkYGJi+vPnHyukDEGyBZNJVMKHa4PULSCn/mdg+MvA8PM3w+/fDP/+Mfxj/Pf5NwO7wL9/HJra5gxMfD9/MjP8Z2f4+P335y8MrGy8olLP3n5mYBPg4JPg4BdjYONh4eBm+A+ynZWVmRHkbQZGZkYmFiaQ4XD7cDBIDumffxiYmRlA+YWJgYGVGRSjoFrxHxMX37d3r7jEJKV+fvvw+tnjJ8/Zmf5yC3H/+f3v94dPn359l1dWu/v4uYikHKeg+Pcv3zl5+EBO+g/KweAOwT8wCRIjiEF+JagIWQErKwMLpHfy5x8DCP39++P39+8/Gf794+Dm+fPpy+vXr798/sDG8v//7+/fnz7i5Ob++u377Tv33r599+rVK05OLgYGRk5ObiQzSXYDyQ0mcLAyMP77z8TAiOgxMTJ8fP+VX4C7tqRARVZQkPW3ioyAEDsDLwfLgwcPfv9j+sbI+Y2B08jWQ0hJm+EvMwMrG8zR/yCNJpCxYOPABShMEgdNsi//gTI7AxMzIwMzuHz+z/D9D8PXXww8gtwnT52bv2jRp0+fBAR47t68eu3yOW5uVgkJCVFJKRVV9UdPXpw+c56BgenHr9+/fv6GuYdkBzAQn4xgdjCAG++gMvX3n7+//zL8ZmBgZGNgZmN494WhZ+IUeWUVGTlZaSkpxn9/P7x+dvfGVWFxcQ4u3m8//ly5eq2mtv7zm3cc3NxMzKBMATUTnPUYYU6hRZH3j+H/v7+/fv3+95+Nnfs/Eyjb/2Ng+P6boX/K9MvXb5poqf768+/v3z+a6iqfnv178ujh3cfv/7ILfPvP9ubtx6fPX+0/fNQ3MATkZrBbQfmYkQlUbEN9QBRFcunBwMDAzMzMzMr2n4nh73+G2/dfHTx++tLlq8ePHxcQEb1x67aqNJ8sj5KWrKgY178P797fe/XtLzvvvXvPHjx9yisoNGfefCFxWVMzczY2cGJGDVgi0wrOjPjv3z8mWPMFzv775xczMxPD378fPn1bvW4jKwf/z39Mm3fuvXrjJgMDg7gwjwQ3s7QAY4CNjgT3bxUx7q9fP7/+znrvxeft+47dff7+ww8WTkEJTm4BXX3D5MR4OVlpdnaGzx8+MLEycfPw/Gdg+v7jOxcHjl46UhzgDGlGRpB/ICR8QOfzl2+nT534/evv46fP1qzdoGdoLimn9O/fP0F+ITZ2FlbGvx8+fVCXleUREP788ckfRjYRGUWW78xHzu/T0Tdk4H528Mw1BSmpt+++Xrl2bd+BQ0z/f+vqaFpYmjIwMHz+9OE/AxMvH7jwRnIfViY+R//584eFhYURDP7////06dNTp07t2LHL2NhYUEBYTVXDwcFBUETy6vU7f//+Z2RkvHD2xN8vb1l+vrfSU7E3NOVi/Prz9x8+QTFTS5tl67c9fPri5+8/b959UlRSff/x89evX08eP7p0ycLszFQdHR1NXVBH+MuXLzygwSesTkUI4nQ0qMUI6wkxMjL++/fvw4cP9+49+PnrDysb5/fvv86cOcPIzC4lLXfnxnURCUl9AyOGX980FGUe37647+AxbRV5BnZmhn//Xr18x8XD//P3PwEhMbFf7EyMLGwcXDdPnzc3Nfb09Hzx9BE/n+DatWvtXr+0c3AixsWEW3m/f/9mAgNG8OgROwfHxcvXlFV0TI0NDfWNzE1NFeQV37//LCIm7uDo/P/XNylRAR1V+fNHd52+cM3P1Y6DhZGTgXnDtj22dk4/GDi27T/u7OEnLCb15u3HN6/fPXv66PTRw3w8nK9fv37x4sW3b9+4uLnB2RMRqFhZOEP6379/rKys//+D4p2BgeHx48enT5++fPmqq5uXkIgIBzv3v38Mr5694OPhe//6xfu37xRk5S6dO7/6xpWK4mwWdp7ZC5bJSUtJSYhwcXG9eP3m0+M3/zn4Hj56cufeA04eITY2NkkZaS42lpePH0pJST15+ujZs2cfPnwAORqrM1EFcRYyv379Ao1LgAOYgYHh+fPn165de/HiBSMj07Wrtx48ePD+7bsnjx59+/SJmeH/549vuTg5TI1N9HV09fSMdPSNfv5hvHbnEQev0I+ffyUkJFhZWWWk5ZSUVN6++/Dk6bMXL1/duHGLgYGBm5ubjY1NkF/g169ff/78+Q22FNWFWHg4Q5qZGdTD/P37758/fzg52TnYeaQk5YSFpf/8Z/v86bm4uKSahoYgv4CahuavP3/ff/gkwMcjKSn+5ZPiw0dPxKXlvQPDbz98eeHafWVFxS27Dnv6h8irap+5+sDZzU1IWPT3r59M//4yMjLdvf/gzLmL165cfP/+vYmZlYycPBY3YgjhdDQTE0iKlYWZlYX57ZsvD+4/ffrkjamZ+d//zPfvPvr89fvPX/8ePH524/a9K9dvnT59UlBU7Nr1K2fPnhUQ4OPh5d6066ClucXVh+9XbDp46e47xQdvP/y8e/7cBUZmNi9Pn4unTyspK9hZmRsamQkKiYlKyPMJiT17/oYJta7BcC1UAOQyKBOV+vPnz/9/rL9//+HkZGFiZPv+/efTJy+4eW7w8wnfunVLSEjo05dvHz58YGJhk1dU/P7zp7iEKDOL/vfv38WlpGVlpQ2MLeRVNERERL7+ZPn6m1VLx1hERERdXVNeRpaFieHn96/v37w+duzY5UtXFBWUhEXFfv369eLl21+/GdhYQe0wVLeg83CmaXZ2VliFyPDt27e/f/+CbVV//fr1nz9/mJiYnJ2dtbS0FBUVJSUlGRgYfv36JSEhwcPD8+XLF05OTl5e3o8fP3758kVERAQ0Ms3Ly8zMLCwsrKysLC0tra6ubmBgoKSkJC8vD8nib9++vXv37p0799AdiI2PM6RBLUAmBnZ2ln//QEXHkydPWFlZ5eTk/v5hfPfxw8+fP79//379+nVpaek/f37dvn1bQlJMVlb2zp07r169EhUVfvTo0ffv37W0tL5++n7z5s1bt259+/bt/PnzIiIiX758uX//vqam+tevX0VERN6+fcvIyCgiIgLJ+tgciS6G09G/f/9lZGBmYQH1AN+9e/fs2bOnT17cunXrxfM3TExM9+7dY2Nje/369Zs3b9TVVQUFBYWEhGRkZKSlpdnZ2VVUVJSVld++fWtmZnbj6q2vX7/q6uo+evRIXFxcUFDw06dPv3//fv/+/bVr19jZ2SUkJPT0dbm4uC5evPjkyRMtTSV0N2LwcSYPVlZm8BgxaFqCg4NDSkpKTk6On5//7NmzzMzM7Ozs3Nzcjo6OYmJiampqMjIyXFxcYmJikpKS3NzcXFxcPDw8bGxsoqKiPDw8wsLCUlJSfHx8cnJy+vr6ampqFhYWvLy8oFEmXt7fv3+/efPmw4cPz549u3DhAoYLsQjgDGkGBgZWVsZ//xi+fv357ds3Xl5eLk5eFRUVRUVFFXW1z58/P336VF1d/fTp02JiIh8+fDh+/DgPD8+TJ0+ePn0qLy979erVFy9eXLp06cyZM48ePdLR0Tl//vzt2zeNjY1fvnz55MmT379/srOzv3//npWV9e69F05OTsLCwoKCgljciCGE09E/f/5mZQHlxU+fPt0FAxZmdnlFhV+/fv3////z589v377V0dG5deuWkZHBz58/mf4w8PDwSEhIfP78mY+PT0FBgZeXV05O7s3Ld6ysrKqqqq9evfr06QMbG9vjx4+vXbv25YvMu3fvXr58GRUVJSDIr62tfefOHSIdjTN5sLOzfv36nYGB4dGjR0pKSlJSUgoKCk+fPt27d++HDx/+/PljYmLy588fT0/PL1++hIaGSklJycjISElJiYiIQBIJuHXICPEANze3uLi4urq6rq6ukJCQk5OTubk5ExMTGxvb379/r1+/fv78+a9fv545c+bPH1DAwhvDv3+DepNwLkiOgQFnSP/48YuXF9Qe//379+XLl9+9e2dvp/eP4b+7u7uFhcXVq1dv3brFxMRkamrKysp8586do0ePKikpXbp06dmzZ7dv3z5//vzdu3eFhIRu37j748cPFRWV69evf/36+fnz5y9evJCSkmJnZ//////z589v3rz5/ft3bm5ueXn5p08f3rx5U0tbDd4q/gce4IM01yAuxtfKY2Zm/veP4devv+/evWNiYgLVJp8+ff3+7cePH0+fPhUWFpaQkHj06NG9e/fExUVlZGQ4ODjY2Ng4OTkhRYeysjInJ6exsfGPr7++fv0qJib2+/fvb9++3b179+zZsxwcHE+fPn737p2Ojo6dnd2Nm9eFhYXBufzkuXPnNLVUGRkZ//4FDVpDwhjedYK4G2dIs7Iyf3j/hZ+f5/3793///r158+b7d5/sHR2+fvl5/fr1T58+aWhoSEpKgkpoCQl5eXlLS0stLa3Xr19DChY2NjZ+fn5RUVEJCYnfv38rKSlJS0uLi4tCkpmjo+OtWzfu3r37+/fvZ8+e3b9/n4GBwcLCQlNT886dO//+/WNhYUEOYzRH40zTDAwM/Pw8X7/+ZGBg0NXV/fbtGwcHx/fv369evfrw4UMeHp47d+58+fLl1atX79+/P3DgwLNnz+7evQsuIm5fvHjx6tWr9+7du3PnztmzZ69du3bjxo179+69ffv2xYsXf//+hRSL6urqr169unHjhqqq6vv37/ft2yckJPT69euPHz/CuyCQdhskgOEkTkd//PiZkZHh6tWrnz9/ZmdnFxMT8/Ly+vjx49WrV3/8+CEjI8PAwPD79++vX79++/bt48ePv3///vfv358/fwTAQExMTEJCghcM+Pn5//379+PHj/fv31+/fv3ly5c3b97cvXu3uLg4MzPzly9fnJ2dxcXFDxw4ICQk9P///2fPnv379+8veP4cMkkAIeGOxpM8QOMpQkJCjIyMK1as+P37t5iY2NPnz3x9fb/9/PHt2zdmZmYhISEjIyNxcVF5efl3798YGBhcv35dQ0NDSkpKVFT0w4cPP378UFNT4+XlVVBQ0NTUZGZm5OLiYmFhUVFROXbsCAsLy9evX8+ePXvs2DEODg5JSUllZeVbN6+9ffsW3IhggqQKSKaEuxiUEf9DG1XwUXimv+BZK3Yujpcv3qqqKHwx0b50dK+Rvv7ZI9s3bd7OwsmrpKLOxMDw9df3yxfPf/vxA1Slc7AzMv5/8PDx7Tv32Fg5GBkZHzx48PHde34e3idPXoiIiEhISIBHH9nBqxC4f/75Laug+Ob9OxlZaR5ONk6Wf9bmJj/ePT62Z7OsGN+/n5+ZGP8xsTCB5jQYGH7++sPMwgJttIKc+w/qRIb/fxn+/2b4/+8/A8NPBobvDAzf/zGISQgz/P/C/+9Nf2VihKM626eb+nJcsvxM/Cx/9FTlZMUEvn56z83JycvPd/vuvb8MrO8+fvvw6SsTCysLM9vXT1/5eHkN9PU/ffr0+fPn79+/379758GDB38Z/j9/8erqtRvrN2/h4ePl5+P6/vH1/cvHXt86xfjqGuv721baMjaG6gz/fzD8/ckEHsFi5+KAhCNoOAo0X/yP5R84pEG9FNBQMdM/8GA5KNghXvv5heHHWw4OLl4eZjMtGUVZqecfGIUklX7+YXjI8F9GSlJWQVlBVVNaTvH7959GJqa3b91SVVFTVVWxsLBgYWQQEhIyMTERl5Q2NTV9eP8eFw+3qob67z//OLi5FBUVlZWVhLjZDu/eJMzLxsn41cfBRE1D9dH7r0x/fzAw/GIADYKzwQfPQMmDETLDzQCdXwIJgTETeCDwPwMDC2iRyz+Gn9+/f/vyjeUv738Ofl4eNl6uS3eviclp3n/y6PHjx6zc/K/evObgEzh16pSZmcWTJ09u3LghxM/HzsZ68eJFTjZWKRnpK5evffn2Q1Za8tmLl0pKoLodlKwZmaQlxOXlZGxN9F/dv6ohK8TByiIvKcbFz/f39UdIYQd2DnYCS0ZkBIc9qFj5/+//98//f/3694fp29ffv/4zf/z16/3799x8vL///mXjYOfm42NiYvrx48e9u7cVFBT+//97+/ZtbQ3179+/f/r0iYkf1PD/9u3b589fnjx+Bq5BGSQkJG5ev8HEwnj33u1b16+J8xmyMP7l4WYX4OFiYvz77snDX7/+QMoNBlifGpSMUR0PCmlIFIDF/0GGXP8zMPz7y8Dw7/fnD+8Y//3m5OBjZfzHwsDy4edfETFxcQkpJpbrAkIinLz80nKKCiqq5ubmsrKyurq6jx89MjIykpGWMjIyYvj/9+evP0ampvLy8ppqaufOq6qrq8vJyQgIgsa+5GRkOFhZ/v36zszw79P7N3zyQpwsDL/+/v8L7paDHYMgkFwIEsQM6X+MDKD+JRszA8PfP18/f/z98wcLw19GBgZ20AqUX//+/n/99v29+w9+/2V8+PQlKwc3Exv7hw8fBPj5f/z48fHj+x8/v7969er27bssTIwCgsKgsoWdXVJSgomJiZ+fn4eHh5uDk4ODTUSIX0JMVERYSExE4PvnVwx/fv379ZeNg+vXr1+gkP7/n4GRAVSHg0iwg0CuhWJQRgRlOxAXnMwZQY6GziEw/v/988e/P7/+/P717/cvxn9M3779+MvA+PLNu3cfPsnKKzx49vzv37937oCKhadPn969e/fE0WPKigqiwiKPHz+Wk5GWl1fcsHkXOwcXPx/fvXv3hIUF//79ffbsaWERIWZGBtZ/P2VFuH/++M71/++f3z8ZWBj+/Pr59w/Y0SD3gDBaGIOEsLTywCmIkYHh7++/zH/+MDGAJvaYmZn//PzH+O/fn7//efkE/v5nZOfkUlRWffX+s6a29uu379SUVeTl5QUE+B7dv6eqqirAx2tobCwvKy0pKWlkZKSgrKSsrCwlIyMuKibAzw/pmH3+8P77j6+gIurvP34eHmZG0CTOt29fQFO3//+D+ksgOYgj0UlQfkMS+8cAntxjZGBgZmJgYGN7//79fwYmVi7uH7//MLOwPX35kpmd4/T5S2/efXz3/tPHjx//////69cvXj4ePm6uzx8+CgkJsbOzc3Bw/Pnz5+fP36/evIN0V2VlZXl4eCDdAiEhIVNTUx8fnx8/fvz8+VNWVvbRo0eMjIw/fvxgZ2f/9esXGxsbw09Qmwc0Hfr3LyxDIpyJNU2DppmYwCU7qPQBLWZgZGRlf/PhEwsr54uPn3/+/svIxPLoydNLV65Z2Tx98uz5iRMnNNQ1379/f/zEMRZWZgE+3hs3bvDz8n758uXixYvMzMwXhASvXLokwMfDyMxw48YNBUV5ESHBTx+/fPj4UVNH++qpvb/+/OPnYv/wBbT6DdR6hrkU1JNAuBbKAo1tgEIbNBcLjhRwOc3AAFo0wsDA8IeBmYGJ+fcfBgYmtmev33MLCQsIin75+lNCStrY1Fxf31BCQkpYUEhJQVFKUkJOVkZKQlJdVU1SUlpYWFRIROz////Kysq6utqCggLc3NwCAgK/f//+8uXbr5+/b925++LVy5t37srKyf/5+//rtx+cvPwMTMz/GZhAjga5C+pETArkYJAoaHodRKNgRsb/jMz/GFm+/Pj14x/T5++/ZORUnr16c+na9c9fv////58VDCDtIX5+fgEBAREREUjvkJubm4WF5cOHD5yc7NJSUtwcnOLi4jo6OioqKjr6emaWVgJCQorKKr///GXlFZRRUH7/+es/BmZQkcEAmiQHFdLg0XGQB1DcBOIgJw9wKQKqzEESf/+BApmJmeUPA/Pn77++ff/BxMkvIafEcOoJDx8/v4DgxUuXjxw7qqOn++jBw6NHj6qrqz579uwqGDwCA1l5OS4urhvXr+vq6Pz/8+f+/fsvX7788+fPi+evXrx8+ezFCxYW1mev3j5/8kxL1+Dtw+uff/xiYmP/x8gISpmwkGYCpxO0MgQU0qBs+h/sYpBroRhcRjL9Z2T6+5/h+6+/bz//EBARZ+cTZGFl5+LiUVJVk1WQ11DXlJaWZmVlFQQDFhYWCQkJERERVlZWeXl5SUlJRUVFMTExERFhZhZGdnb2P3/+PH/18s27t48ePbl398HXHz8/ff7+7OVbNXXtX/8YPn3+/vUnqFuL7GiQ20BrbqCuglCwJh+EByb/Qab1GEFrA37//f/7z9+/jCyfv/2Q0RD/8+ff0WPHL918aGhoKCoqrKqqqqio+PTxEzY2FhkpqTevXunqaqurq7549ZKZlYWPl19IUFhMTFRUVJSZ8b+GGqhG5OHne/P2vZauzqu3b+RkxP9++yQmKcUnysnEyvbhy7cfv77/B6UO0Doshn+I8AWlFIjzwS4EORoU2qACA9TUY2AEVeygFQFM//79/gkanv7HwMjE+uPPX05evo9fP/34+VWAn5fh3+8Tx45evXrV2Mjw+vXr9+/e1lBTu3Llyuev369du3H27Nk/f/58+/r987cvt+/fu3331se3b169evX6xcs3z19+/fzl3dsPX75/+/DlC9Pfn/9YmL5//87KzgnKBmysr999+8vIwgJyDwMDA6imQ7idgQm0Bu4/KCb+MIImi5kY/rMzMHAyMLL+AbVO//1j+MHE+uf375/CwqK//zDwCYiIyUhw8HGwsP7l4WbSUFdkZfknJyvJwsTA9J+Bk4ObiZHlwf1HMjKyXFxcf/78+ffvn56hPg8fLwM70w/GP2/ev7t/787/Hz/ePn95eO/+j+/f33v46OOvnycunP327zensKC0tPSje/d///zDyM71n4WDgZEFtDCY4T8zwz9QWIJTL9j1TAyMTCxQF4Om51kYGBn+glzM8I/hNyPDz///fzIzMbAysbKxsf7+z8DAw8nNzNXZ0TJr7rKdu7apKKspyP5RVla8f+8heJxOXl1LS0lJSUZWTkND4+v374KCgr///1ZUVZGWlWH////r+/fysnJfPn318PAwMTE5fPaUlIwkN5epvKIiw/+vXFwcEmKiHDzcL78yMLBAxqhBzTZQ2cvAwAxbwwN2N7g9Da65QSshwAmGgZHhHxOox/WP8e9/NkZmFkYGHg72/6AyEZSs5OTkSspKc3Pyf//59+PbDx5+gWMnT5iamj55/uzFy5e8fHysnBz3Hjz48eunxN1b33/+uHDhgrKiPPPPXzw8PP9A3ZYXjIyMd+7cYudg/f3zp4yUFCcrK8PPv0ICfEz/JT5+/c7BwYbcjf3PAFpHB0oQEMeBSUijH5yqwXwIwcjAAJq6ZmRgZ2Zm+veXg4mJnZGR4csXhj9/fvz48Y+B6cHjJ4+ePGXh4OQTEv7y4/u9Rw8fPn1y/9HDN+/fffry+fXbN//+gbrljCzMP3//+vuf8cGjJzdv37p248b5S+fffXj7/v3bj+/fnTl14s2L5wz/fzF8+8rNwcHDzfn96xcuDk4WFjaIMxgYmECFGKh9DwovmCADyz9Quw+0fAgiBPEEaNaegZGBkYmThZnl3z8OZsZfjAwM794w/GPmEBStrGq+cOmik6Pzp29fvv74bmxuysfLb2Rqcu/hA2tbWxUVlUtXLnFzc8orKnz+8c3Y2NTe3vEiBwcPK7uqlsa12zf/MTIIiwh+ePfm9ZtnmgrijL9///zy+c/3rxx/fjMw/gc1PGCFNHT0A8XBIGeCygpIQgF1G8HSYOIfI8N/hv//2JlY2P7+Ymdl5WJh/vX5CwMbH5sgw6NnT4XFxd58fP/w6bOXb9+xsXLq6emdu3zx6asXD588/sf0//Xrl2/fMZw+c/LSlWuvP368d+/ei+ev/jD8f/byxedvX588f/bhx5fv377++P5VXlqKgYXl55cv39+/Z2MBRc5rZmZIigC5DorREwILuOZkArkYqgJEQXpcDAyM7MxMzL9BMwJM/xi+fvz0n5VXQPq/mISkoNCjn3//SMhKv33z/uq10wePHuHk4Hj//v2pUyclJCTevH7Jysr8/Pnzz1+/Pn/9fuanWdcvXGT4+2vzlo2/fvz48+8vxw0udk42VgZGaQlJBoZ/P75/+/vvNxMTMycnJ+M/ZlC7FOQKKAalEHC9COVjaU+DEhATE6iYBIU1ExML478fDP/+//nz58uPb4zsXzh+/bl+8+bNO7cVFJUfPX7689cfDS3N1+/e/vj6TUJS8tX7ty/fvBQVFvz67cfL508FhUVZWFju37//+dtXCTGhF29ey8nKsLCxvn//9tfn72yM/0Fr7H/8+f3jJyc7FxPL/z8/fv9jBOc9UPZjZPjP+P8faN0RWAicAsAOB/WswAwEASlpGECFICMbB9d/RiaG778YGVhY2TjZOLlevnx949YtXj6Bn79/CYgI8wjy/2Vk4OHlFRQR5uLn5ebmhjSmIZNdf3//ZmNm+f/nPw8v78/fv0WlJBjZWf8w/OXi4WZnZ//378//v/8ZmJlZWTmevnj5889fFg7OX5A1dIyM/2HDYn/+/AGNDCAcCBopQJpCh6Zuhv8MTD9+/uRgYGRkYn7x+h0vLz+PhhbHpw8Pvv149ePV9+/feXj52Tm5Obl5GFhZ//75948RPG335/fPX78Y///5+fMP0///v3/9ZPj3//fPX7/BALRynYmRhYOdlY2Z8d//F48e/fn5W1tb+/vbD3/+MejauTC8efT63ac//5l+//jFysLx9+9vFlZQuYCaNEBuZ2ICdVXArSWIi/+Dkvd/BkZ2di4GVlYhBSVWXr7fTKwMPLw/GBj/MrE+fvbs54/fHBxc7OzsoAFpDi4ODo7/jAws7Gy/Qc7/9////48fPvz+/fvVi5dvX7/68fXb75+/mJiY2Dk4OHm52Hg52Lk42Tg5mFlZREVFxSTlvn/7+enzD9DiaSa2D1+/8/AKghIEqFKBpgfM1il68vgHaiYx/GNg+cvA8v/nHwZeAT4Rscev314/cmL/yTOfvv/89PkrA6gjxsbIyPznN3gXBSMDaISOk5OJiYmVjZmVlRU0Wikq+v/P729fvvz4/p3x339OTk4BAQEeAX5WTo4/jP+///rJxMKmoqrG8Pf/f0bW56/e7dy07emrd59//JWVU2Dh5mZgYARNB4KyFiho//6FhCiIDR6bBDVKQBUgKITBgpBlpH8ZmL78+s3AxSUkJcvAzvXy45fPf/5zCYkxMDGzsrJDRhkhbQzQMixWVgbQ6qH/rMws7CysXOwcFmbmsjIyHGxsDP9ADRyQZ9hZmFhZGJmZ/jOCevy//vwWFpUEjQ//Z/r9n/Htp29ffv7l4hPiEZcAuwIU1BAGMzNan+sfqJUHcjLMJ5BcCB7RY+Tg5WP4x8gpI6/CzqPAwvzx/x8hOaUz15+yMrOwsrCwgLstfxn+M7GAiqpPH3+COjqMjH9+/2RhZrI0M33x8P71K9d5eEQZmJj//Pv34/cv1t+/mNiYmNhYOP5z//77X1xSipWDmZWTR0Vdy8rC9P+frxw8vOAeNQNoyvU/I2hYlAXUXkKqbUAeQeq5QJMQKMT/gQYAQf5gZWRkYOdkF5dk5+TgZmH6ycDKzMbGxMTCwsICHnPh+Prj+58/oIEs0E4WUCeJ8fvPnyxMzMZGRscPHfzz5zczE+M/wECBC4qIv//+/Qb1rpmZGBmYWVkN9A0ZGBgEREUF+LkY2BlAI6Wg1MrE8PsPAyMzaPUtKINAV8mAHAvD6JUNOEuC3M3MwPqfkZnh918GZkYGdo7/P37+Z2D8C4o0kPVMoJzKzsLC8vfv3+/fv3/79u33j5+grjvYXG5ubm1tdQUFBWZG0BJ0RkZGVlZWNg6Q+n8M/3/+/Pnl21dGBmZtXZ03b76D0i075+93H0AOBfWvwctvwatNQOuHwQaCOgFgBoQAABtFrgTgwgSIAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load dataset\n", + "from datasets import load_dataset\n", + "from IPython.display import display\n", + "\n", + "dataset = load_dataset(\"ashraq/fashion-product-images-small\")\n", + "\n", + "# Sample a random row\n", + "display(dataset[\"train\"][0])\n", + "dataset[\"train\"][0]['image']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original dataset size: 44072\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgendermasterCategorysubCategoryarticleTypebaseColourseasonyearusageproductDisplayNameimage
236654712UnisexAccessoriesWatchesWatchesBlackWinter2016.0SportsADIDAS Unisex Cambridge Analogue Black Dial Watch<PIL.Image.Image image mode=RGB size=60x80 at ...
2554310495WomenApparelTopwearShirtsBlueFall2011.0CasualFlying Machine Women HS Blue Shirt<PIL.JpegImagePlugin.JpegImageFile image mode=...
3193449924WomenApparelSareeSareesGreenFall2012.0EthnicFabindia Green Maheshwari Maheshwari Zari Saree<PIL.Image.Image image mode=RGB size=60x80 at ...
4042242669WomenApparelTopwearKurtasBlackSummer2012.0EthnicAlma Women Black & White Kurta<PIL.JpegImagePlugin.JpegImageFile image mode=...
4066921304WomenAccessoriesEyewearSunglassesBrownWinter2016.0CasualCeline Dion Women Brown Frame Sunglasses<PIL.Image.Image image mode=RGB size=60x80 at ...
\n", + "
" + ], + "text/plain": [ + " id gender masterCategory subCategory articleType baseColour \\\n", + "23665 4712 Unisex Accessories Watches Watches Black \n", + "25543 10495 Women Apparel Topwear Shirts Blue \n", + "31934 49924 Women Apparel Saree Sarees Green \n", + "40422 42669 Women Apparel Topwear Kurtas Black \n", + "40669 21304 Women Accessories Eyewear Sunglasses Brown \n", + "\n", + " season year usage \\\n", + "23665 Winter 2016.0 Sports \n", + "25543 Fall 2011.0 Casual \n", + "31934 Fall 2012.0 Ethnic \n", + "40422 Summer 2012.0 Ethnic \n", + "40669 Winter 2016.0 Casual \n", + "\n", + " productDisplayName \\\n", + "23665 ADIDAS Unisex Cambridge Analogue Black Dial Watch \n", + "25543 Flying Machine Women HS Blue Shirt \n", + "31934 Fabindia Green Maheshwari Maheshwari Zari Saree \n", + "40422 Alma Women Black & White Kurta \n", + "40669 Celine Dion Women Brown Frame Sunglasses \n", + "\n", + " image \n", + "23665