Skip to content

Commit 9515dfc

Browse files
committed
Move changes to new fix file and bump version numbers
1 parent 04b317f commit 9515dfc

File tree

2 files changed

+92
-42
lines changed

2 files changed

+92
-42
lines changed

tools/fix_v022.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,47 +11,6 @@
1111
BIGCODEBENCH_NEW_VERSION = "v0.1.3"
1212

1313
def map_ds(sample):
14-
if sample["task_id"] in ["BigCodeBench/211"]:
15-
sample['test'] = sample['test'].replace(
16-
"""
17-
mock_response = MagicMock()
18-
mock_response.content = MOCK_CONTENT
19-
""",
20-
"""
21-
mock_response = MagicMock()
22-
mock_response.content = MOCK_CONTENT
23-
mock_response.status_code = 200
24-
"""
25-
)
26-
if sample["task_id"] in ["BigCodeBench/215"]:
27-
sample['test'] = sample['test'].replace(
28-
"""
29-
mock_response = Mock()
30-
""",
31-
"""
32-
mock_response = Mock()
33-
mock_response.status_code = 200
34-
"""
35-
)
36-
sample['test'] = sample['test'].replace(
37-
"""
38-
mock_response.text =""",
39-
"""
40-
MOCK_TEXT ="""
41-
)
42-
sample['test'] = sample['test'].replace(
43-
"""
44-
mock_get.return_value = mock_response
45-
""",
46-
"""
47-
mock_response.text = MOCK_TEXT
48-
mock_response.json = lambda: json.loads(MOCK_TEXT)
49-
mock_get.return_value = mock_response
50-
"""
51-
)
52-
sample['complete_prompt'] = sample['complete_prompt'].replace("Thif function will raise", "This function will raise")
53-
sample['instruct_prompt'] = sample['instruct_prompt'].replace("Thif function will raise", "This function will raise")
54-
sample['doc_struct'] = sample['doc_struct'].replace("Thif function will raise", "This function will raise")
5514
if sample["task_id"] in ["BigCodeBench/1005"]:
5615
for k in sample.keys():
5716
sample[k] = sample[k].replace(
@@ -69,7 +28,7 @@ def map_ds(sample):
6928
hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
7029
ds = ds_dict[BIGCODEBENCH_VERSION]
7130
hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
72-
function_id = [211, 215, 1005]
31+
function_id = [1005]
7332

7433
new_ds = ds.map(map_ds)
7534
new_ds.to_json("BigCodeBench.jsonl")

tools/fix_v023.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from datasets import load_dataset, Dataset, DatasetDict
2+
from huggingface_hub import HfApi
3+
4+
import json
5+
import copy
6+
7+
BIGCODEBENCH_HF = "bigcode/bigcodebench"
8+
BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard"
9+
BIGCODEBENCH_VERSION = "v0.1.3"
10+
BIGCODEBENCH_UPDATE = "bigcode/bcb_update"
11+
BIGCODEBENCH_NEW_VERSION = "v0.1.4"
12+
13+
def map_ds(sample):
14+
if sample["task_id"] in ["BigCodeBench/211"]:
15+
sample['test'] = sample['test'].replace(
16+
"""
17+
mock_response = MagicMock()
18+
mock_response.content = MOCK_CONTENT
19+
""",
20+
"""
21+
mock_response = MagicMock()
22+
mock_response.content = MOCK_CONTENT
23+
mock_response.status_code = 200
24+
"""
25+
)
26+
if sample["task_id"] in ["BigCodeBench/215"]:
27+
sample['test'] = sample['test'].replace(
28+
"""
29+
mock_response = Mock()
30+
""",
31+
"""
32+
mock_response = Mock()
33+
mock_response.status_code = 200
34+
"""
35+
)
36+
sample['test'] = sample['test'].replace(
37+
"""
38+
mock_response.text =""",
39+
"""
40+
MOCK_TEXT ="""
41+
)
42+
sample['test'] = sample['test'].replace(
43+
"""
44+
mock_get.return_value = mock_response
45+
""",
46+
"""
47+
mock_response.text = MOCK_TEXT
48+
mock_response.json = lambda: json.loads(MOCK_TEXT)
49+
mock_get.return_value = mock_response
50+
"""
51+
)
52+
sample['complete_prompt'] = sample['complete_prompt'].replace("Thif function will raise", "This function will raise")
53+
sample['instruct_prompt'] = sample['instruct_prompt'].replace("Thif function will raise", "This function will raise")
54+
sample['doc_struct'] = sample['doc_struct'].replace("Thif function will raise", "This function will raise")
55+
return sample
56+
57+
if __name__ == "__main__":
58+
api = HfApi()
59+
ds_dict = load_dataset(BIGCODEBENCH_HF)
60+
hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
61+
ds = ds_dict[BIGCODEBENCH_VERSION]
62+
hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
63+
function_id = [211, 215]
64+
65+
new_ds = ds.map(map_ds)
66+
new_ds.to_json("BigCodeBench.jsonl")
67+
ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds
68+
ds_dict.push_to_hub(BIGCODEBENCH_HF)
69+
70+
new_hard_ds = hard_ds.map(map_ds)
71+
new_hard_ds.to_json("BigCodeBench-Hard.jsonl")
72+
hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds
73+
hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF)
74+
75+
for i in function_id:
76+
old_sample = ds.select([i])
77+
new_sample = new_ds.select([i])
78+
old_sample.to_json("old.jsonl")
79+
new_sample.to_json("new.jsonl")
80+
api.upload_file(
81+
path_or_fileobj="old.jsonl",
82+
path_in_repo=f"{i}/old.jsonl",
83+
repo_id=BIGCODEBENCH_UPDATE,
84+
# repo_type="dataset"
85+
)
86+
api.upload_file(
87+
path_or_fileobj="new.jsonl",
88+
path_in_repo=f"{i}/new.jsonl",
89+
repo_id=BIGCODEBENCH_UPDATE,
90+
# repo_type="dataset"
91+
)

0 commit comments

Comments
 (0)