Skip to content
4 changes: 3 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def main():

args = parse_args()
assert len(args.data), '--data should be a list of data files'

if args.retry is not None:
for k, v in supported_VLM.items():
if hasattr(v, 'keywords') and 'retry' in v.keywords:
Expand Down Expand Up @@ -89,6 +88,9 @@ def main():
continue

result_file = f'{pred_root}/{model_name}_{dataset_name}.xlsx'
if dataset_name in ['TaskMeAnything_v1_videoqa_random']:
packstr = 'pack' if args.pack else 'nopack'
result_file = f'{pred_root}/{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
if dataset_name in ['MMBench-Video']:
packstr = 'pack' if args.pack else 'nopack'
result_file = f'{pred_root}/{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
Expand Down
3 changes: 2 additions & 1 deletion vlmeval/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .mmbench_video import MMBenchVideo
from .text_mcq import CustomTextMCQDataset, TextMCQDataset
from .videomme import VideoMME
from .video_mcq import VideoMCQDataset
from .utils import *
from ..smp import *

Expand All @@ -23,7 +24,7 @@
]

VIDEO_DATASET = [
MMBenchVideo, VideoMME
MMBenchVideo, VideoMME, VideoMCQDataset
]

TEXT_DATASET = [
Expand Down
38 changes: 17 additions & 21 deletions vlmeval/dataset/image_mcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


class ImageMCQDataset(ImageBaseDataset):

TYPE = 'MCQ'

DATASET_URL = {
Expand All @@ -16,14 +15,14 @@ class ImageMCQDataset(ImageBaseDataset):
'MMBench_DEV_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_CN.tsv',
'MMBench_TEST_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_CN.tsv',
'MMBench': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench.tsv', # Internal Only
'MMBench_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN.tsv', # Internal Only
'MMBench_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN.tsv', # Internal Only
# MMBench v1.1
'MMBench_DEV_EN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_EN_V11.tsv',
'MMBench_TEST_EN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_EN_V11.tsv',
'MMBench_DEV_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_CN_V11.tsv',
'MMBench_TEST_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_CN_V11.tsv',
'MMBench_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_V11.tsv', # Internal Only
'MMBench_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN_V11.tsv', # Internal Only
'MMBench_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN_V11.tsv', # Internal Only
# SEEDBench Series
'SEEDBench_IMG': 'https://opencompass.openxlab.space/utils/VLMEval/SEEDBench_IMG.tsv',
'SEEDBench2': 'https://huggingface.co/datasets/VLMEval/SEEDBench2/resolve/main/SEEDBench2.tsv',
Expand Down Expand Up @@ -66,14 +65,14 @@ class ImageMCQDataset(ImageBaseDataset):
'MMBench_DEV_CN': '08b8fc3324a5ed74155350f57be69fbd',
'MMBench_TEST_CN': '7e1239baf0ee4c8b513e19705a0f317e',
'MMBench': '4115aea3383f3dd0083be6a633e0f820', # Internal Only
'MMBench_CN': '2e053ffc90ea598b1feae13c36dc13ee', # Internal Only
'MMBench_CN': '2e053ffc90ea598b1feae13c36dc13ee', # Internal Only
# MMBench v1.1
'MMBench_DEV_EN_V11': '30c05be8f2f347a50be25aa067248184',
'MMBench_TEST_EN_V11': '26f0f15381a21720255091d3e0316ce6',
'MMBench_DEV_CN_V11': '593f9b5f6bea453d870a798b34ae4f37',
'MMBench_TEST_CN_V11': '74bbe4556dac745613c7cbe5ad787050',
'MMBench_V11': 'b9276414f57af1308dcc4d0cd9b42e7c', # Internal Only
'MMBench_CN_V11': '95f6980dd1b4de38e3cbffe0305a3f25', # Internal Only
'MMBench_CN_V11': '95f6980dd1b4de38e3cbffe0305a3f25', # Internal Only
# SEEDBench
'SEEDBench_IMG': '68017231464752261a2526d6ca3a10c0',
'SEEDBench2': '4ec15cf864c4f16274112284f531813e',
Expand Down Expand Up @@ -103,11 +102,10 @@ class ImageMCQDataset(ImageBaseDataset):
'RealWorldQA': '92321028d2bc29040284b6674721e48f',
'MLLMGuard_DS': '975fc0dd7119386e198c37d71e274b3f',
'BLINK': '3b6649b6a662184ea046908e5506260e',
'TaskMeAnything_v1_imageqa_random': '93b7290b447ef947f3b3abae5ad4bc1b'
'TaskMeAnything_v1_imageqa_random': '023fef69e2ca21827afb77c5ec3bc889',
}

def build_prompt(self, line):

if isinstance(line, int):
line = self.data.iloc[line]

Expand All @@ -117,11 +115,7 @@ def build_prompt(self, line):
tgt_path = self.dump_image(line)

question = line['question']
options = {
cand: line[cand]
for cand in string.ascii_uppercase
if cand in line and not pd.isna(line[cand])
}
options = {cand: line[cand] for cand in string.ascii_uppercase if cand in line and not pd.isna(line[cand])}
options_prompt = 'Options:\n'
for key, item in options.items():
options_prompt += f'{key}. {item}\n'
Expand All @@ -145,10 +139,13 @@ def build_prompt(self, line):

def evaluate(self, eval_file, **judge_kwargs):
from .utils.multiple_choice import report_acc, report_acc_MMT, mcq_circular_eval, mcq_vanilla_eval

# assert dataset is not None
dataset_map = {
'MMBench_TEST_EN': 'MMBench', 'MMBench_TEST_EN_V11': 'MMBench_V11',
'MMBench_TEST_CN': 'MMBench_CN', 'MMBench_TEST_CN_V11': 'MMBench_CN_V11'
'MMBench_TEST_EN': 'MMBench',
'MMBench_TEST_EN_V11': 'MMBench_V11',
'MMBench_TEST_CN': 'MMBench_CN',
'MMBench_TEST_CN_V11': 'MMBench_CN_V11',
}
dataset = self.dataset_name
if dataset in dataset_map:
Expand Down Expand Up @@ -193,9 +190,7 @@ def evaluate(self, eval_file, **judge_kwargs):
meta_q_map = {x: y for x, y in zip(meta['index'], meta['question'])}
data_map = {x: y for x, y in zip(data['index'], data['question'])}
for k in data_map:
assert k in meta_q_map, (
f'eval_file should be the same as or a subset of dataset {self.dataset_name}'
)
assert k in meta_q_map, f'eval_file should be the same as or a subset of dataset {self.dataset_name}'

if circular:
data = mcq_circular_eval(model, data, meta, nproc, result_file, self.dataset_name)
Expand All @@ -216,14 +211,15 @@ def evaluate(self, eval_file, **judge_kwargs):
dump(acc, score_file)

if dataset == 'AesBench_VAL':
warnings.warn('Note that AesBench VAL is just a toy version of AesBench TEST. For full results, \
warnings.warn(
'Note that AesBench VAL is just a toy version of AesBench TEST. For full results, \
please evaluate on AesBench TEST. The AesBench TEST dataset is more than 20 times \
larger than the VAL dataset and the leaderboard results are based on AesBench TEST.')
larger than the VAL dataset and the leaderboard results are based on AesBench TEST.'
)
return acc


class MMMUDataset(ImageMCQDataset):

DATASET_URL = {
'MMMU_DEV_VAL': 'https://opencompass.openxlab.space/utils/VLMEval/MMMU_DEV_VAL.tsv',
'MMMU_TEST': 'https://opencompass.openxlab.space/utils/VLMEval/MMMU_TEST.tsv',
Expand Down Expand Up @@ -264,14 +260,14 @@ def build_prompt(self, line):


class CustomMCQDataset(ImageMCQDataset):

def load_data(self, dataset):
data_path = osp.join(LMUDataRoot(), f'{dataset}.tsv')

if file_size(data_path, 'GB') > 1:
local_path = data_path.replace('.tsv', '_local.tsv')
if not osp.exists(local_path) or os.environ.get('FORCE_LOCAL', None):
from ..tools import LOCALIZE

LOCALIZE(data_path, local_path)
data_path = local_path
return load(data_path)
15 changes: 15 additions & 0 deletions vlmeval/dataset/utils/video_mcq_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import base64


# video_mcq use base64 for mp4 video encoding and decoding.
# using this code to convert mp4 to base64
def mp4_to_base64(mp4_path):
try:
with open(mp4_path, 'rb') as video_file:
video_data = video_file.read()
base64_encoded_video = base64.b64encode(video_data).decode('utf-8')
return base64_encoded_video
except FileNotFoundError:
return 'The file was not found.'
except Exception as e:
return f'An error occurred: {e}'
93 changes: 73 additions & 20 deletions vlmeval/dataset/video_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,68 @@
from ..smp import *


class VideoBaseDataset:
def video_root_map(dataset):
return dataset


class VideoBaseDataset:
MODALITY = 'VIDEO'

def __init__(self,
dataset='MMBench-Video',
pack=False):
def __init__(self, dataset='MMBench-Video', pack=False, skip_novideo=True):
try:
import decord
except:
warnings.warn('Please install decord via `pip install decord`.')

self.dataset_name = dataset
ret = self.prepare_dataset(dataset)
assert ret is not None
lmu_root = LMUDataRoot()
self.frame_root = osp.join(lmu_root, 'images', dataset)
os.makedirs(self.frame_root, exist_ok=True)
self.frame_tmpl = 'frame-{}-of-{}.jpg'
# the init for previous two video dataset
if dataset in ['MMBench-Video', 'Video-MME']:
self.dataset_name = dataset
ret = self.prepare_dataset(dataset)
assert ret is not None
lmu_root = LMUDataRoot()
self.frame_root = osp.join(lmu_root, 'images', dataset)
os.makedirs(self.frame_root, exist_ok=True)
self.frame_tmpl = 'frame-{}-of-{}.jpg'

self.data_root = ret['root']
self.data_file = ret['data_file']
self.data = load(self.data_file)

assert 'question' in self.data and 'video' in self.data
videos = list(set(self.data['video']))
videos.sort()
self.videos = videos
self.pack = pack

# dataset init without prepare_dataset, just like image_base
else:
lmu_root = LMUDataRoot()
# You can override this variable to save image files to a different directory
self.dataset_name = dataset
self.frame_root = osp.join(lmu_root, 'images', dataset)
self.frame_tmpl = 'frame-{}-of-{}.jpg'
data, data_root = self.load_data(dataset)
self.data_root = data_root
self.meta_only = True
self.skip_novideo = skip_novideo
if skip_novideo and 'video' in data:
data = data[~pd.isna(data['video'])]

data['index'] = [str(x) for x in data['index']]
data['index'] = [str(x) for x in data['index']]

self.data_root = ret['root']
self.data_file = ret['data_file']
self.data = load(self.data_file)
if 'video' in data:
self.meta_only = False

assert 'question' in self.data and 'video' in self.data
videos = list(set(self.data['video']))
videos.sort()
self.videos = videos
self.pack = pack
if 'video_path' in data:
paths = [toliststr(x) for x in data['video_path']]
data['video_path'] = [x[0] if len(x) == 1 else x for x in paths]

if np.all([istype(x, int) for x in data['index']]):
data['index'] = [int(x) for x in data['index']]

self.data = data
self.post_build(dataset)

def __len__(self):
return len(self.videos) if self.pack else len(self.data)
Expand All @@ -44,6 +77,26 @@ def __getitem__(self, idx):
assert idx < len(self.data)
return dict(self.data.iloc[idx])

def load_data(self, dataset):
url = self.DATASET_URL[dataset]
file_md5 = self.DATASET_MD5[dataset]
return self.prepare_tsv(url, file_md5)

def prepare_tsv(self, url, file_md5=None):
data_root = LMUDataRoot()
os.makedirs(data_root, exist_ok=True)
file_name = url.split('/')[-1]
data_path = osp.join(data_root, file_name)
if osp.exists(data_path) and (file_md5 is None or md5(data_path) == file_md5):
pass
else:
warnings.warn('The dataset tsv is not downloaded')
download_file(url, data_path)
return load(data_path), data_root

def post_build(self, dataset):
pass

def frame_paths(self, video, num_frames=8):
frame_root = osp.join(self.frame_root, video)
os.makedirs(frame_root, exist_ok=True)
Expand All @@ -68,7 +121,7 @@ def save_video_frames(self, video, num_frames=8):
# Return a list of dataset names that are supported by this class, can override
@classmethod
def supported_datasets(cls):
return ['MMBench-Video', 'Video-MME']
return ['MMBench-Video', 'Video-MME'] + list(cls.DATASET_URL)

# Given the prediction file, return the evaluation results in the format of a dictionary or pandas dataframe
@abstractmethod
Expand Down
Loading