diff --git a/bigbio/biodatasets/gad/gad.py b/bigbio/biodatasets/gad/gad.py
index b1ac6164..a63dc519 100644
--- a/bigbio/biodatasets/gad/gad.py
+++ b/bigbio/biodatasets/gad/gad.py
@@ -34,22 +34,23 @@
 annotation procedure based on the Genetic Association Database
 """
 
-_HOMEPAGE = "https://github.com/dmis-lab/biobert" # This data source is used by the BLURB benchmark
+_PUBMED = True
+
+_HOMEPAGE = "https://github.com/dmis-lab/biobert"  # This data source is used by the BLURB benchmark
 
 _LICENSE = "Creative Common Attribution 4.0 International"
 
 _URLs = {
     "source": "https://drive.google.com/uc?export=download&id=1-jDKGcXREb2X9xTFnuiJ36PvsqoyHWcw",
-    "bigbio_text": "https://drive.google.com/uc?export=download&id=1-jDKGcXREb2X9xTFnuiJ36PvsqoyHWcw"
+    "bigbio_text": "https://drive.google.com/uc?export=download&id=1-jDKGcXREb2X9xTFnuiJ36PvsqoyHWcw",
 }
 
-_SUPPORTED_TASKS = [
-    Tasks.TEXT_CLASSIFICATION
-]
+_SUPPORTED_TASKS = [Tasks.TEXT_CLASSIFICATION]
 
 _SOURCE_VERSION = "1.0.0"
 _BIGBIO_VERSION = "1.0.0"
 
+
 class GAD(datasets.GeneratorBasedBuilder):
     """GAD is a weakly labeled dataset for Entity Relations (REL) task which is treated as a sentence classification task."""
 
@@ -61,7 +62,8 @@ class GAD(datasets.GeneratorBasedBuilder):
             description="GAD source schema",
             schema="source",
             subset_id=f"gad_fold{i}",
-        ) for i in range(10)
+        )
+        for i in range(10)
     ] + [
         # 10-fold bigbio schema
         BigBioConfig(
@@ -70,7 +72,8 @@ class GAD(datasets.GeneratorBasedBuilder):
             description="GAD BigBio schema",
             schema="bigbio_text",
             subset_id=f"gad_fold{i}",
-        ) for i in range(10)
+        )
+        for i in range(10)
     ]
 
     DEFAULT_CONFIG_NAME = "gad_fold0_source"
@@ -81,7 +84,7 @@ def _info(self):
                 {
                     "index": datasets.Value("string"),
                     "sentence": datasets.Value("string"),
-                    "label": datasets.Value("int32")
+                    "label": datasets.Value("int32"),
                 }
             )
         elif self.config.schema == "bigbio_text":
@@ -99,12 +102,12 @@ def _split_generators(
         self, dl_manager: datasets.DownloadManager
     ) -> List[datasets.SplitGenerator]:
         fold_id = int(self.config.subset_id.split("_fold")[1][0]) + 1
-        
+
         my_urls = _URLs[self.config.schema]
         data_dir = Path(dl_manager.download_and_extract(my_urls))
         data_files = {
             "train": data_dir / "GAD" / str(fold_id) / "train.tsv",
-            "test": data_dir / "GAD" / str(fold_id) / "test.tsv"
+            "test": data_dir / "GAD" / str(fold_id) / "test.tsv",
         }
 
         return [
@@ -119,19 +122,19 @@ def _split_generators(
         ]
 
     def _generate_examples(self, filepath: Path):
-        if 'train.tsv' in str(filepath):
-            df = pd.read_csv(filepath, sep='\t', header=None).reset_index()
+        if "train.tsv" in str(filepath):
+            df = pd.read_csv(filepath, sep="\t", header=None).reset_index()
         else:
-            df = pd.read_csv(filepath, sep='\t')
-        df.columns = ['id', 'sentence', 'label']
+            df = pd.read_csv(filepath, sep="\t")
+        df.columns = ["id", "sentence", "label"]
 
         if self.config.schema == "source":
             for id, row in enumerate(df.itertuples()):
                 ex = {
                     "index": row.id,
                     "sentence": row.sentence,
-                    "label": int(row.label)
-                }                
+                    "label": int(row.label),
+                }
                 yield id, ex
         elif self.config.schema == "bigbio_text":
             for id, row in enumerate(df.itertuples()):
@@ -139,8 +142,8 @@ def _generate_examples(self, filepath: Path):
                     "id": id,
                     "document_id": row.id,
                     "text": row.sentence,
-                    "labels": [str(row.label)]
+                    "labels": [str(row.label)],
                 }
-                yield id, ex            
+                yield id, ex
         else:
             raise ValueError(f"Invalid config: {self.config.name}")
diff --git a/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py b/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
index 10a2e184..883ecdbc 100644
--- a/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
+++ b/bigbio/biodatasets/genia_ptm_event_corpus/genia_ptm_event_corpus.py
@@ -61,6 +61,8 @@
 multiple PTM types at once in a unified framework.
 """
 
+_PUBMED = True
+
 _HOMEPAGE = "http://www.geniaproject.org/other-corpora/ptm-event-corpus"
 
 _LICENSE = "GENIA Project License for Annotated Corpora"
@@ -69,7 +71,11 @@
     _DATASETNAME: "http://www.geniaproject.org/other-corpora/ptm-event-corpus/post-translational_modifications_training_data.tar.gz?attredirects=0&d=1",
 }
 
-_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION, Tasks.COREFERENCE_RESOLUTION, Tasks.EVENT_EXTRACTION]
+_SUPPORTED_TASKS = [
+    Tasks.NAMED_ENTITY_RECOGNITION,
+    Tasks.COREFERENCE_RESOLUTION,
+    Tasks.EVENT_EXTRACTION,
+]
 
 _SOURCE_VERSION = "1.0.0"
 
@@ -119,7 +125,9 @@ def _info(self) -> datasets.DatasetInfo:
                     "events": [  # E line in brat
                         {
                             "id": datasets.Value("string"),
-                            "type": datasets.Value("string"),  # refers to the text_bound_annotation of the trigger
+                            "type": datasets.Value(
+                                "string"
+                            ),  # refers to the text_bound_annotation of the trigger
                             "trigger": datasets.Value("string"),
                             "arguments": [
                                 {
@@ -183,12 +191,16 @@ def _generate_examples(self, data_dir) -> Tuple[int, Dict]:
                 if filename.endswith(".txt"):
                     txt_file_path = Path(dirpath, filename)
                     if self.config.schema == "source":
-                        example = parsing.parse_brat_file(txt_file_path, annotation_file_suffixes=[".a1", ".a2"])
+                        example = parsing.parse_brat_file(
+                            txt_file_path, annotation_file_suffixes=[".a1", ".a2"]
+                        )
                         example["id"] = str(guid)
                         for key in ["attributes", "normalizations"]:
                             del example[key]
                         yield guid, example
                     elif self.config.schema == "bigbio_kb":
-                        example = parsing.brat_parse_to_bigbio_kb(parsing.parse_brat_file(txt_file_path))
+                        example = parsing.brat_parse_to_bigbio_kb(
+                            parsing.parse_brat_file(txt_file_path)
+                        )
                         example["id"] = str(guid)
                         yield guid, example
diff --git a/bigbio/biodatasets/medical_data/medical_data.py b/bigbio/biodatasets/medical_data/medical_data.py
index 897ce924..5c9c1836 100644
--- a/bigbio/biodatasets/medical_data/medical_data.py
+++ b/bigbio/biodatasets/medical_data/medical_data.py
@@ -42,6 +42,8 @@
 
 _LICENSE = ""
 
+_PUBMED = False
+
 _URLS = {}
 
 _SUPPORTED_TASKS = [Tasks.TEXTUAL_ENTAILMENT]
diff --git a/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py b/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
index 69eabb0b..590febb6 100644
--- a/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
+++ b/bigbio/biodatasets/n2c2_2014_risk_factors/n2c2_2014_risk_factors.py
@@ -78,6 +78,8 @@
 }
 """
 
+_PUBMED = False
+
 _DATASETNAME = "n2c2_2014_risk_factors"
 
 _DESCRIPTION = """\
@@ -268,4 +270,4 @@ def _read_task2_file(self, file_object, file_name):
             risk_factors.append(risk_factor)
 
         document = {"document_id": file_name, "text": text, "cardiac_risk_factors": risk_factors}
-        return document
\ No newline at end of file
+        return document