diff --git a/flair/datasets/biomedical.py b/flair/datasets/biomedical.py
index 27588d7e0b..aa0f0c9335 100644
--- a/flair/datasets/biomedical.py
+++ b/flair/datasets/biomedical.py
@@ -22,6 +22,7 @@
 from zipfile import BadZipFile, LargeZipFile
 
 import ftfy
+from deprecated import deprecated
 from lxml import etree
 from lxml.etree import XMLSyntaxError
 
@@ -230,7 +231,6 @@ def bioc_to_internal(bioc_file: Path):
             document_text += " " + text
 
             for annotation in passage.xpath(".//annotation"):
-
                 entity_types = [
                     i.text.replace(" ", "_")
                     for i in annotation.xpath("./infon")
@@ -350,9 +350,12 @@ def __init__(
         self.sentence_splitter = sentence_splitter
 
     def process_dataset(self, datasets: Dict[str, InternalBioNerDataset], out_dir: Path):
-        self.write_to_conll(datasets["train"], out_dir / "train.conll")
-        self.write_to_conll(datasets["dev"], out_dir / "dev.conll")
-        self.write_to_conll(datasets["test"], out_dir / "test.conll")
+        if "train" in datasets:
+            self.write_to_conll(datasets["train"], out_dir / "train.conll")
+        if "dev" in datasets:
+            self.write_to_conll(datasets["dev"], out_dir / "dev.conll")
+        if "test" in datasets:
+            self.write_to_conll(datasets["test"], out_dir / "test.conll")
 
     def write_to_conll(self, dataset: InternalBioNerDataset, output_file: Path):
         os.makedirs(str(output_file.parent), exist_ok=True)
@@ -641,6 +644,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return merge_datasets([train_data, test_data])
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class JNLPBA(ColumnCorpus):
     """
     Original corpus of the JNLPBA shared task.
@@ -987,6 +991,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return data
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class MIRNA(ColumnCorpus):
     """
     Original miRNA corpus.
@@ -1614,6 +1619,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return filter_and_map_entities(dataset, {"protein": GENE_TAG})
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class CHEMDNER(ColumnCorpus):
     """
     Original corpus of the CHEMDNER shared task.
@@ -1717,6 +1723,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return all_data
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class IEPA(ColumnCorpus):
     """
     IEPA corpus as provided by http://corpora.informatik.hu-berlin.de/
@@ -1832,6 +1839,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return merge_datasets([train_data, test_data])
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class LINNEAUS(ColumnCorpus):
     """
     Original LINNEAUS corpus containing species annotations.
@@ -1936,6 +1944,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return LINNEAUS.download_and_parse_dataset(data_dir)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class CDR(ColumnCorpus):
     """
     CDR corpus as provided by https://github.com/JHnlp/BioCreative-V-CDR-Corpus
@@ -2051,6 +2060,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return all_data
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class VARIOME(ColumnCorpus):
     """
     Variome corpus as provided by http://corpora.informatik.hu-berlin.de/corpora/brat2bioc/hvp_bioc.xml.zip
@@ -2210,6 +2220,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return all_data
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class NCBI_DISEASE(ColumnCorpus):
     """
     Original NCBI disease corpus containing disease annotations.
@@ -2465,6 +2476,7 @@ def parse_input_file(input_file: Path):
         return InternalBioNerDataset(documents=documents, entities_per_document=entities_per_document)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class SCAI_CHEMICALS(ScaiCorpus):
     """
     Original SCAI chemicals corpus containing chemical annotations.
@@ -2493,6 +2505,7 @@ def perform_corpus_download(data_dir: Path) -> Path:
         return corpus_file
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class SCAI_DISEASE(ScaiCorpus):
     """
     Original SCAI disease corpus containing disease annotations.
@@ -2572,6 +2585,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return filter_and_map_entities(corpus, entity_mapping)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class OSIRIS(ColumnCorpus):
     """
     Original OSIRIS corpus containing variation and gene annotations.
@@ -2642,7 +2656,6 @@ def parse_dataset(cls, corpus_folder: Path, fix_annotation=True):
             file for file in os.listdir(str(corpus_folder)) if file.endswith(".txt") and not file.startswith("README")
         ]
         for text_file in input_files:
-
             with open(os.path.join(str(corpus_folder), text_file), encoding="utf8") as text_reader:
                 document_text = text_reader.read()
                 if not document_text:
@@ -2767,7 +2780,7 @@ def parse_dataset(data_dir: Path) -> InternalBioNerDataset:
                 start, end = int(fields[2]), int(fields[3])
 
                 if start == end:
-                    continue
+                    continue  # Illegal annotation
 
                 entities_per_document[fname].append(Entity((start, end), "Species"))
 
@@ -3446,6 +3459,7 @@ def parse_dataset(data_dir: Path) -> InternalBioNerDataset:
         return InternalBioNerDataset(documents=documents, entities_per_document=entities_per_document)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class BC2GM(ColumnCorpus):
     """
     Original BioCreative-II-GM corpus containing gene annotations.
@@ -3751,6 +3765,7 @@ def to_internal(self, data_dir: Path) -> InternalBioNerDataset:
         return filter_and_map_entities(dataset, entity_type_mapping)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class CHEBI(ColumnCorpus):
     """
     Original CHEBI corpus containing all annotations.
@@ -4015,6 +4030,7 @@ def parse_input_files(input_folder: Path) -> InternalBioNerDataset:
         return InternalBioNerDataset(documents=documents, entities_per_document=entities_per_document)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class BIONLP2013_PC(BioNLPCorpus):
     """
     Corpus of the BioNLP'2013 Pathway Curation shared task
@@ -4057,6 +4073,7 @@ def download_corpus(download_folder: Path) -> Tuple[Path, Path, Path]:
         return train_folder, dev_folder, test_folder
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class BIONLP2013_CG(BioNLPCorpus):
     """
     Corpus of the BioNLP'2013 Cancer Genetics shared task
@@ -4085,6 +4102,7 @@ def download_corpus(download_folder: Path) -> Tuple[Path, Path, Path]:
         return train_folder, dev_folder, test_folder
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class ANAT_EM(ColumnCorpus):
     """
     Corpus for anatomical named entity mention recognition.
@@ -4964,6 +4982,7 @@ def parse_corpus(input_file: Path) -> InternalBioNerDataset:
         return InternalBioNerDataset(documents=documents, entities_per_document=entities_per_document)
 
 
+@deprecated(version="0.13", reason="Please use data set implementation from BigBio instead (see BIGBIO_NER_CORPUS)")
 class PDR(ColumnCorpus):
     """
     Corpus of plant-disease relations from Kim et al., consisting of named entity annotations
@@ -5140,3 +5159,569 @@ class HUNER_SPECIES(HunerMultiCorpus):
 
     def __init__(self, sentence_splitter: SentenceSplitter = None):
         super(HUNER_SPECIES, self).__init__(entity_type="SPECIES", sentence_splitter=sentence_splitter)
+
+
+class BIGBIO_NER_CORPUS(ColumnCorpus):
+    """
+    This class implements an adapter to data sets implemented in the BigBio framework:
+
+        https://github.com/bigscience-workshop/biomedical
+
+    The BigBio framework harmonizes over 120 biomedical data sets and provides a uniform
+    programming api to access them. This adapter allows to use all named entity recognition
+    data sets by using the bigbio_kb schema.
+    """
+
+    def __init__(
+        self,
+        dataset_name: str,
+        base_path: Union[str, Path] = None,
+        in_memory: bool = True,
+        sentence_splitter: Optional[SentenceSplitter] = None,
+        train_split_name: Optional[str] = None,
+        dev_split_name: Optional[str] = None,
+        test_split_name: Optional[str] = None,
+    ):
+        """
+        :param dataset_name: Name of the dataset in the huggingface hub (e.g. nlmchem or bigbio/nlmchem)
+        :param base_path: Path to the corpus on your machine
+        :param in_memory: If True, keeps dataset in memory giving speedups in training.
+        :param sentence_splitter: Custom implementation of :class:`SentenceSplitter` which
+            segments the text into sentences and tokens (default :class:`SciSpacySentenceSplitter`)
+        :param train_split_name: Name of the training split in bigbio, usually train (default: None)
+        :param dev_split_name: Name of the development split in bigbio, usually validation (default: None)
+        :param test_split_name: Name of the test split in bigbio, usually test (default: None)
+        """
+
+        if base_path is None:
+            base_path = flair.cache_root / "datasets"
+        else:
+            base_path = Path(base_path)
+
+        # column format
+        columns = {0: "text", 1: "ner"}
+
+        # build dataset name and full huggingface reference name
+        if dataset_name.startswith("/"):  # Absolute path for local BigBio datasets
+            full_dataset_name = dataset_name
+            dataset_name = dataset_name.split("/")[-1]
+            dataset_name = dataset_name.split(".")[0]
+        elif not dataset_name.startswith("bigbio/"):
+            full_dataset_name = "bigbio" + "/" + dataset_name
+        else:
+            full_dataset_name = dataset_name
+            dataset_name = dataset_name.replace("bigbio/", "")
+
+        self.sentence_splitter = sentence_splitter if sentence_splitter else SciSpacySentenceSplitter()
+
+        dataset_dir_name = self.build_corpus_directory_name(dataset_name)
+        data_folder = base_path / dataset_dir_name / self.sentence_splitter.name
+
+        train_file = data_folder / "train.conll"
+
+        # Download data if necessary
+        # Some datasets in BigBio only have train or test splits, not both
+        # If only test split, assign it to train split
+        # If only train split, sample other from it (sample_missing_splits=True)
+        if not train_file.exists():
+            from datasets import load_dataset
+
+            dataset = load_dataset(full_dataset_name, name=dataset_name + "_bigbio_kb")
+
+            if "train" in dataset:
+                train_split_name = "train"
+            if "test" in dataset:
+                test_split_name = "test"
+            if "validation" in dataset:
+                dev_split_name = "validation"
+
+            assert not (train_split_name is None and test_split_name is None)
+
+            splits = {}
+            # Not every dataset has a dev / validation set!
+            if train_split_name is not None:
+                splits["train"] = self.to_internal_dataset(dataset, train_split_name)
+            if test_split_name is not None:
+                splits["test"] = self.to_internal_dataset(dataset, test_split_name)
+            if dev_split_name is not None:
+                splits["dev"] = self.to_internal_dataset(dataset, dev_split_name)
+            if "train" not in splits and "test" in splits:
+                splits["train"] = splits.pop("test")
+
+            # Perform type mapping if necessary
+            type_mapping = self.get_entity_type_mapping()
+            if type_mapping:
+                splits = {split: filter_and_map_entities(dataset, type_mapping) for split, dataset in splits.items()}
+
+            conll_writer = CoNLLWriter(sentence_splitter=self.sentence_splitter)
+            conll_writer.process_dataset(splits, data_folder)
+
+        super(BIGBIO_NER_CORPUS, self).__init__(
+            data_folder, columns, in_memory=in_memory, comment_symbol="#", sample_missing_splits=True
+        )
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        """
+        Return the mapping of entity type given in the dataset to canonical types. Note, if
+        a entity type is not present in the map it is discarded.
+        """
+        return None
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        """
+        Builds the directory name for the given data set.
+        """
+        return "bigbio-" + dataset_name.lower()
+
+    def to_internal_dataset(self, dataset, split: str) -> InternalBioNerDataset:
+        """
+        Converts a dataset given in hugging datasets format to our internal corpus representation.
+        """
+        id_to_text = {}
+        id_to_entities: Dict[str, List] = {}
+        for document in dataset[split]:
+            document_id = document["document_id"]
+            passage_offsets = []
+
+            # Collect all texts of the document, each passage will be
+            # a text in our internal format
+            for passage in document["passages"]:
+                passage_id = document_id + "#" + str(passage["id"])
+                id_to_text[passage_id] = " ".join(passage["text"])
+                passage_offsets.append((passage_id, passage["offsets"]))
+
+                id_to_entities[passage_id] = []
+
+            # Sort passages by start offset
+            passage_offsets = sorted(passage_offsets, key=lambda e: e[1][0][0])
+
+            # Transform all entity annotations into internal format
+            for entity in document["entities"]:
+                # Find the passage of the entity (necessary for offset adaption)
+                passage_id, passage_offset = self.bin_search_passage(
+                    passage_offsets, 0, len(passage_offsets) - 1, entity
+                )
+
+                # Adapt entity offsets according to passage offsets
+                entity_offset = entity["offsets"][0]
+                entity_offset = (entity_offset[0] - passage_offset[0], entity_offset[1] - passage_offset[0])
+
+                id_to_entities[passage_id].append(Entity(char_span=entity_offset, entity_type=entity["type"]))
+
+                # FIXME: This is just for debugging purposes
+                # passage_text = id_to_text[passage_id]
+                # doc_text = passage_text[entity_offset[0] : entity_offset[1]]
+                # mention_text = entity["text"][0]
+                # if doc_text != mention_text:
+                #     print(f"Annotation error ({document['document_id']}) - Doc: {doc_text} vs. Mention: {mention_text}")
+
+        return InternalBioNerDataset(documents=id_to_text, entities_per_document=id_to_entities)
+
+    def bin_search_passage(self, passages: List[Tuple[str, List[Tuple[int, int]]]], low: int, high: int, entity: Dict):
+        """
+        Helper methods to find the passage to a given entity mention (incl. offset). The implementation
+        uses binary search to find the passage in the ordered sequence passages.
+        """
+        # Check base case
+        if high >= low:
+            # Get element in the middle
+            mid = (high + low) // 2
+            first_text_offset = passages[mid][1][0]
+            first_mention_offset = entity["offsets"][0]
+
+            # Is the mention with the passage offsets?
+            if first_mention_offset[0] >= first_text_offset[0] and first_mention_offset[1] <= first_text_offset[1]:
+                return passages[mid][0], first_text_offset
+
+            # If element is smaller than mid, then it can only
+            # be present in left subarray
+            elif first_text_offset[0] > first_mention_offset[0]:
+                return self.bin_search_passage(passages, low, mid - 1, entity)
+            else:
+                # Else the element can only be present in right subarray
+                return self.bin_search_passage(passages, mid + 1, high, entity)
+
+        else:
+            # This should never happen :-D
+            return -1, -1
+
+
+class HUNER_GENE_NLM_GENE(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_NLM_GENE, self).__init__(*args, dataset_name="nlm_gene", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Gene": GENE_TAG, "GENERIF": GENE_TAG, "STARGENE": GENE_TAG, "Domain": GENE_TAG, "Other": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_DRUGPROT(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_DRUGPROT, self).__init__(*args, dataset_name="drugprot", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"GENE-N": GENE_TAG, "GENE-Y": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_DRUGPROT(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_DRUGPROT, self).__init__(*args, dataset_name="drugprot", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"CHEMICAL": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIORED(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIORED, self).__init__(*args, dataset_name="biored", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"GeneOrGeneProduct": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_BIORED(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_BIORED, self).__init__(*args, dataset_name="biored", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"ChemicalEntity": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_DISEASE_BIORED(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_DISEASE_BIORED, self).__init__(*args, dataset_name="biored", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"DiseaseOrPhenotypicFeature": DISEASE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_SPECIES_BIORED(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_SPECIES_BIORED, self).__init__(*args, dataset_name="biored", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"OrganismTaxon": SPECIES_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CELL_LINE_BIORED(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CELL_LINE_BIORED, self).__init__(*args, dataset_name="biored", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"CellLine": CELL_LINE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_CPI(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_CPI, self).__init__(*args, dataset_name="cpi", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_CPI(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_CPI, self).__init__(*args, dataset_name="cpi", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"compound": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2013_PC(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2013_PC, self).__init__(*args, dataset_name="bionlp_st_2013_pc", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Gene_or_gene_product": GENE_TAG, "Complex": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_BIONLP_ST_2013_PC(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_BIONLP_ST_2013_PC, self).__init__(*args, dataset_name="bionlp_st_2013_pc", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Simple_chemical": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2013_GE(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2013_GE, self).__init__(*args, dataset_name="bionlp_st_2013_ge", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2011_GE(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2011_GE, self).__init__(*args, dataset_name="bionlp_st_2011_ge", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2011_ID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2011_ID, self).__init__(*args, dataset_name="bionlp_st_2011_id", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_BIONLP_ST_2011_ID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_BIONLP_ST_2011_ID, self).__init__(*args, dataset_name="bionlp_st_2011_id", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Chemical": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_SPECIES_BIONLP_ST_2011_ID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_SPECIES_BIONLP_ST_2011_ID, self).__init__(*args, dataset_name="bionlp_st_2011_id", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Organism": SPECIES_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2011_REL(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2011_REL, self).__init__(*args, dataset_name="bionlp_st_2011_rel", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIONLP_ST_2011_EPI(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIONLP_ST_2011_EPI, self).__init__(*args, dataset_name="bionlp_st_2011_epi", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_SPECIES_BIONLP_ST_2019_BB(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_SPECIES_BIONLP_ST_2019_BB, self).__init__(*args, dataset_name="bionlp_st_2019_bb", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Microorganism": SPECIES_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_BIOID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_BIOID, self).__init__(*args, dataset_name="bioid", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"gene": GENE_TAG, "protein": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_BIOID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_BIOID, self).__init__(*args, dataset_name="bioid", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"chemical": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_SPECIES_BIOID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_SPECIES_BIOID, self).__init__(*args, dataset_name="bioid", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"species": SPECIES_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CELL_LINE_BIOID(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CELL_LINE_BIOID, self).__init__(*args, dataset_name="bioid", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"cell": CELL_LINE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_GNORMPLUS(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_GNORMPLUS, self).__init__(*args, dataset_name="gnormplus", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Gene": GENE_TAG, "FamilyName": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_PROGENE(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        # Special case for ProGene: We need to use the split_0_train and split_0_test splits
+        # as they are currently provided in BigBio
+        train_split_name = "split_0_train"
+        dev_split_name = "split_0_validation"
+        test_split_name = "split_0_test"
+        super(HUNER_GENE_PROGENE, self).__init__(
+            *args,
+            dataset_name="progene",
+            **kwargs,
+            train_split_name=train_split_name,
+            dev_split_name=dev_split_name,
+            test_split_name=test_split_name,
+        )
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"progene_text": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_CHEMICAL_NLM_CHEM(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CHEMICAL_NLM_CHEM, self).__init__(*args, dataset_name="nlmchem", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Chemical": CHEMICAL_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_SETH_CORPUS(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_SETH_CORPUS, self).__init__(*args, dataset_name="seth_corpus", **kwargs)
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Gene": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+
+class HUNER_GENE_TMVAR_V3(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_GENE_TMVAR_V3, self).__init__(
+            *args,
+            dataset_name="tmvar_v3",
+            **kwargs,
+        )
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"Gene": GENE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+    # Some offsets are broken in tmvar_v3, we need to fix them
+    def to_internal_dataset(self, dataset, split: str) -> InternalBioNerDataset:
+        """
+        Converts a dataset given in hugging datasets format to our internal corpus representation.
+        """
+        # dataset = dataset.map(map_fn, batched=True)
+        return super(HUNER_GENE_TMVAR_V3, self).to_internal_dataset(dataset, split)
+
+
+class HUNER_SPECIES_TMVAR_V3(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_SPECIES_TMVAR_V3, self).__init__(
+            *args,
+            dataset_name="tmvar_v3",
+            **kwargs,
+        )
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"['Species']": SPECIES_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()
+
+    # Some offsets are broken in tmvar_v3, we need to fix them
+    def to_internal_dataset(self, dataset, split: str) -> InternalBioNerDataset:
+        """
+        Converts a dataset given in hugging datasets format to our internal corpus representation.
+        """
+        # dataset = dataset.map(map_fn, batched=True)
+        return super(HUNER_SPECIES_TMVAR_V3, self).to_internal_dataset(dataset, split)
+
+
+class HUNER_CELL_LINE_TMVAR_V3(BIGBIO_NER_CORPUS):
+    def __init__(self, *args, **kwargs):
+        super(HUNER_CELL_LINE_TMVAR_V3, self).__init__(
+            *args,
+            dataset_name="tmvar_v3",
+            **kwargs,
+        )
+
+    def get_entity_type_mapping(self) -> Optional[Dict]:
+        return {"['CellLine']": CELL_LINE_TAG}
+
+    def build_corpus_directory_name(self, dataset_name: str) -> str:
+        return self.__class__.__name__.lower()