Skip to content

Commit 9bb02cc

Browse files
feat: add multiple images (or in-context learning) conversation examples (#47)
Co-authored-by: Bo Liu <benjaminliu.eecs@gmail.com>
1 parent 3c02b24 commit 9bb02cc

File tree

8 files changed

+121
-5
lines changed

8 files changed

+121
-5
lines changed

.github/workflows/lint.yml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
name: Lint
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
# Allow to trigger the workflow manually
9+
workflow_dispatch:
10+
11+
permissions:
12+
contents: read
13+
14+
concurrency:
15+
group: "${{ github.workflow }}-${{ github.ref }}"
16+
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
17+
18+
env:
19+
CUDA_VERSION: "11.7"
20+
21+
jobs:
22+
lint:
23+
runs-on: ubuntu-latest
24+
timeout-minutes: 30
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
28+
with:
29+
submodules: "recursive"
30+
fetch-depth: 1
31+
32+
- name: Set up Python 3.9
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version: "3.9"
36+
update-environment: true
37+
38+
- name: Upgrade pip
39+
run: |
40+
python -m pip install --upgrade pip setuptools wheel
41+
42+
- name: Install TorchOpt
43+
env:
44+
USE_FP16: "OFF"
45+
TORCH_CUDA_ARCH_LIST: "Auto"
46+
run: |
47+
python -m pip install torch numpy pybind11
48+
python -m pip install -vvv --no-build-isolation --editable '.[lint]'
49+
50+
- name: pre-commit
51+
run: |
52+
make pre-commit
53+
54+
- name: ruff
55+
run: |
56+
make ruff
57+
58+
- name: flake8
59+
run: |
60+
make flake8
61+
62+
- name: isort and black
63+
run: |
64+
make py-format
65+
66+
- name: addlicense
67+
run: |
68+
make addlicense

README.md

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,18 +132,34 @@ tokenizer = vl_chat_processor.tokenizer
132132
vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
133133
vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
134134

135+
## single image conversation example
135136
conversation = [
136137
{
137138
"role": "User",
138139
"content": "<image_placeholder>Describe each stage of this image.",
139-
"images": ["./images/training_pipelines.jpg"]
140+
"images": ["./images/training_pipelines.jpg"],
140141
},
141-
{
142-
"role": "Assistant",
143-
"content": ""
144-
}
142+
{"role": "Assistant", "content": ""},
145143
]
146144

145+
## multiple images (or in-context learning) conversation example
146+
# conversation = [
147+
# {
148+
# "role": "User",
149+
# "content": "<image_placeholder>A dog wearing nothing in the foreground, "
150+
# "<image_placeholder>a dog wearing a santa hat, "
151+
# "<image_placeholder>a dog wearing a wizard outfit, and "
152+
# "<image_placeholder>what's the dog wearing?",
153+
# "images": [
154+
# "images/dog_a.png",
155+
# "images/dog_b.png",
156+
# "images/dog_c.png",
157+
# "images/dog_d.png",
158+
# ],
159+
# },
160+
# {"role": "Assistant", "content": ""}
161+
# ]
162+
147163
# load images and prepare for inputs
148164
pil_images = load_pil_images(conversation)
149165
prepare_inputs = vl_chat_processor(

images/dog_a.png

204 KB
Loading

images/dog_b.png

356 KB
Loading

images/dog_c.png

418 KB
Loading

images/dog_d.png

363 KB
Loading

inference.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
)
3434
vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
3535

36+
# single image conversation example
3637
conversation = [
3738
{
3839
"role": "User",
@@ -42,6 +43,23 @@
4243
{"role": "Assistant", "content": ""},
4344
]
4445

46+
# multiple images (or in-context learning) conversation example
47+
# conversation = [
48+
# {
49+
# "role": "User",
50+
# "content": "<image_placeholder>A dog wearing nothing in the foreground, "
51+
# "<image_placeholder>a dog wearing a santa hat, "
52+
# "<image_placeholder>a dog wearing a wizard outfit, and "
53+
# "<image_placeholder>what's the dog wearing?",
54+
# "images": [
55+
# "images/dog_a.png",
56+
# "images/dog_b.png",
57+
# "images/dog_c.png",
58+
# "images/dog_d.png",
59+
# ],
60+
# },
61+
# {"role": "Assistant", "content": ""}
62+
# ]
4563

4664
# load images and prepare for inputs
4765
pil_images = load_pil_images(conversation)

pyproject.toml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ gradio = [
3434
"markdown==3.4.1",
3535
"SentencePiece==0.1.96"
3636
]
37+
lint = [
38+
"isort",
39+
"black[jupyter] >= 22.6.0",
40+
"pylint[spelling] >= 2.15.0",
41+
"flake8",
42+
"flake8-bugbear",
43+
"flake8-comprehensions",
44+
"flake8-docstrings",
45+
"flake8-pyi",
46+
"flake8-simplify",
47+
"ruff",
48+
"pyenchant",
49+
"pre-commit",
50+
]
3751

3852
[tool.setuptools]
3953
packages = {find = {exclude = ["images"]}}

0 commit comments

Comments
 (0)