From c99e9f1184cb3eab824e5dfd030007767e0fd492 Mon Sep 17 00:00:00 2001 From: Nicola Dall'Asen Date: Wed, 13 Mar 2024 12:26:26 +0100 Subject: [PATCH 1/5] automatically detect the Python version, if above 3.10 patch collections to use collections.abc --- deepseek_vl/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/deepseek_vl/__init__.py b/deepseek_vl/__init__.py index 8cb7640..8f00116 100644 --- a/deepseek_vl/__init__.py +++ b/deepseek_vl/__init__.py @@ -16,3 +16,14 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +# check if python version is above 3.10 +import sys +if sys.version_info >=(3, 10): + print("Python version is above 3.10, patching the collections module.") + # Monkey patch collections + import collections + import collections.abc + for type_name in collections.abc.__all__: + setattr(collections, type_name, getattr(collections.abc, type_name)) From 5db8156747f82d67ea3f70e771619c53ae85d795 Mon Sep 17 00:00:00 2001 From: Nicola Dall'Asen Date: Wed, 13 Mar 2024 12:30:50 +0100 Subject: [PATCH 2/5] use transformers 4.38.1 instead of 4.38.2 as autocast fails on mps on latest version --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a391008..da88b54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ readme = "README.md" requires-python = ">=3.8" dependencies = [ "torch>=2.0.1", - "transformers>=4.38.2", + "transformers==4.38.1", "timm>=0.9.16", "gradio>=4.13.0", "accelerate", diff --git a/requirements.txt b/requirements.txt index 7a93ed8..7535a74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch>=2.0.1 -transformers>=4.38.2 +transformers==4.38.1 timm>=0.9.16 gradio>=4.13.0 accelerate From 494e622544b7aefd5634fdc72878a0289d4e548f Mon Sep 17 00:00:00 2001 From: Nicola Dall'Asen Date: Wed, 13 Mar 2024 12:31:28 +0100 Subject: [PATCH 3/5] add an util function to detect platflorm and suitable dtype --- deepseek_vl/utils/io.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/deepseek_vl/utils/io.py b/deepseek_vl/utils/io.py index 081f7a2..a160f00 100644 --- a/deepseek_vl/utils/io.py +++ b/deepseek_vl/utils/io.py @@ -27,6 +27,27 @@ from deepseek_vl.models import MultiModalityCausalLM, VLChatProcessor +def get_device_and_dtype(): + """ + Get the device and dtype for the model. + """ + + if torch.cuda.is_available(): + print("Using CUDA and BFloat16") + device = torch.device("cuda") + dtype = torch.bfloat16 + elif torch.backends.mps.is_available(): + print("Using MPS and FP16") + device = torch.device("mps") + dtype = torch.float16 + else: + print("Using CPU and FP32") + device = torch.device("cpu") + dtype = torch.float32 + + return device, dtype + + def load_pretrained_model(model_path: str): vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path) tokenizer = vl_chat_processor.tokenizer From 40ec6491d4b3c0667e427622fb185878d3e98127 Mon Sep 17 00:00:00 2001 From: Nicola Dall'Asen Date: Wed, 13 Mar 2024 12:32:14 +0100 Subject: [PATCH 4/5] load model on detected device and use correct dtype --- cli_chat.py | 6 +++--- deepseek_vl/utils/io.py | 4 +++- inference.py | 8 +++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cli_chat.py b/cli_chat.py index bbea14a..e34a88e 100644 --- a/cli_chat.py +++ b/cli_chat.py @@ -8,7 +8,7 @@ import torch from transformers import TextIteratorStreamer -from deepseek_vl.utils.io import load_pretrained_model +from deepseek_vl.utils.io import load_pretrained_model, get_device_and_dtype def load_image(image_file): @@ -34,13 +34,13 @@ def get_help_message(image_token): @torch.inference_mode() def response(args, conv, pil_images, tokenizer, vl_chat_processor, vl_gpt, generation_config): - + _, dtype = get_device_and_dtype() prompt = conv.get_prompt() prepare_inputs = vl_chat_processor.__call__( prompt=prompt, images=pil_images, force_batchify=True - ).to(vl_gpt.device) + ).to(vl_gpt.device, dtype=dtype) # run image encoder to get the image embeddings inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs) diff --git a/deepseek_vl/utils/io.py b/deepseek_vl/utils/io.py index a160f00..06c1a47 100644 --- a/deepseek_vl/utils/io.py +++ b/deepseek_vl/utils/io.py @@ -52,10 +52,12 @@ def load_pretrained_model(model_path: str): vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path) tokenizer = vl_chat_processor.tokenizer + device, dtype = get_device_and_dtype() + vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained( model_path, trust_remote_code=True ) - vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() + vl_gpt = vl_gpt.to(device, dtype=dtype).eval() return tokenizer, vl_chat_processor, vl_gpt diff --git a/inference.py b/inference.py index b6fe38c..b22a632 100644 --- a/inference.py +++ b/inference.py @@ -2,7 +2,7 @@ from transformers import AutoModelForCausalLM from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM -from deepseek_vl.utils.io import load_pil_images +from deepseek_vl.utils.io import load_pil_images, get_device_and_dtype # specify the path to the model @@ -11,7 +11,9 @@ tokenizer = vl_chat_processor.tokenizer vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) -vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() + +device, dtype = get_device_and_dtype() +vl_gpt = vl_gpt.to(dtype).to(device).eval() conversation = [ { @@ -32,7 +34,7 @@ conversations=conversation, images=pil_images, force_batchify=True -).to(vl_gpt.device) +).to(vl_gpt.device, dtype=dtype) # run image encoder to get the image embeddings inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs) From 4eac5b59982e37dfdb8fe9c06e763235a09c64d3 Mon Sep 17 00:00:00 2001 From: Nicola Dall'Asen Date: Wed, 13 Mar 2024 12:33:52 +0100 Subject: [PATCH 5/5] modify example in README to use the new device and dtype function --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d24eaf..9bfaeeb 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ import torch from transformers import AutoModelForCausalLM from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM -from deepseek_vl.utils.io import load_pil_images +from deepseek_vl.utils.io import load_pil_images, get_device_and_dtype # specify the path to the model @@ -121,7 +121,9 @@ vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path) tokenizer = vl_chat_processor.tokenizer vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) -vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() + +device, dtype = get_device_and_dtype() +vl_gpt = vl_gpt.to(dtype).to(device).eval() conversation = [ {