flairNLP · alanakbik · Jun 27, 2019 · Jun 27, 2019 · Jun 27, 2019
diff --git a/flair/__init__.py b/flair/__init__.py
@@ -15,6 +15,7 @@
 from . import models
 from . import visual
 from . import trainers
+from . import nn
 
 import logging.config
 

diff --git a/flair/datasets.py b/flair/datasets.py
@@ -699,7 +699,7 @@ def __init__(
 
                 if self.in_memory:
 
-                    text = " || ".join(
+                    text = " ".join(
                         [row[text_column] for text_column in self.text_columns]
                     )
 
@@ -739,7 +739,7 @@ def __getitem__(self, index: int = 0) -> Sentence:
         else:
             row = self.raw_data[index]
 
-            text = " || ".join([row[text_column] for text_column in self.text_columns])
+            text = " ".join([row[text_column] for text_column in self.text_columns])
 
             if self.max_chars_per_doc > 0:
                 text = text[: self.max_chars_per_doc]

diff --git a/flair/embeddings.py b/flair/embeddings.py
@@ -425,10 +425,6 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
     def __str__(self):
         return self.name
 
-    @property
-    def embedding_length(self) -> int:
-        return self.__embedding_length
-
     def extra_repr(self):
         return "min_freq={}".format(self.min_freq)
 
@@ -567,8 +563,8 @@ def __init__(
         # put on Cuda if available
         from flair import device
 
-        if re.fullmatch(r'cuda:[0-9]+', str(device)):
-            cuda_device = int(str(device).split(':')[-1])
+        if re.fullmatch(r"cuda:[0-9]+", str(device)):
+            cuda_device = int(str(device).split(":")[-1])
         elif str(device) == "cpu":
             cuda_device = -1
         else:
@@ -832,7 +828,12 @@ def __str__(self):
 class CharacterEmbeddings(TokenEmbeddings):
     """Character embeddings of words, as proposed in Lample et al., 2016."""
 
-    def __init__(self, path_to_char_dict: str = None, char_embedding_dim: int = 25, hidden_size_char: int = 25):
+    def __init__(
+        self,
+        path_to_char_dict: str = None,
+        char_embedding_dim: int = 25,
+        hidden_size_char: int = 25,
+    ):
         """Uses the default character dictionary if none provided."""
 
         super().__init__()

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
@@ -758,9 +758,9 @@ def _fetch_model(model_name) -> str:
 
         model_map["pos"] = "/".join(
             [
-                aws_resource_path,
-                "POS-ontonotes--h256-l1-b32-%2Bmix-forward%2Bmix-backward--v0.2",
-                "en-pos-ontonotes-v0.2.pt",
+                aws_resource_path_v04,
+                "POS-ontonotes--h256-l1-b32-p3-0.5-%2Bglove%2Bnews-forward%2Bnews-backward-normal-locked0.5-word0.05--v0.4_0",
+                "en-pos-ontonotes-v0.4.pt",
             ]
         )
 
@@ -804,9 +804,9 @@ def _fetch_model(model_name) -> str:
 
         model_map["chunk"] = "/".join(
             [
-                aws_resource_path,
-                "NP-conll2000--h256-l1-b32-%2Bnews-forward%2Bnews-backward--v0.2",
-                "en-chunk-conll2000-v0.2.pt",
+                aws_resource_path_v04,
+                "NP-conll2000--h256-l1-b32-p3-0.5-%2Bnews-forward%2Bnews-backward-normal-locked0.5-word0.05--v0.4_0",
+                "en-chunk-conll2000-v0.4.pt",
             ]
         )
 

diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py
@@ -63,6 +63,7 @@ def train(
         shuffle: bool = True,
         param_selection_mode: bool = False,
         num_workers: int = 8,
+        sampler=None,
         **kwargs,
     ) -> dict:
 
@@ -86,7 +87,7 @@ def train(
         log.info(f' - patience: "{patience}"')
         log.info(f' - anneal_factor: "{anneal_factor}"')
         log.info(f' - max_epochs: "{max_epochs}"')
-        log.info(f' - shuffle: "{train_with_dev}"')
+        log.info(f' - shuffle: "{shuffle}"')
         log.info(f' - train_with_dev: "{train_with_dev}"')
         log_line(log)
         log.info(f'Model training base path: "{base_path}"')
@@ -104,30 +105,8 @@ def train(
 
         # prepare loss logging file and set up header
         loss_txt = init_output_file(base_path, "loss.tsv")
-        with open(loss_txt, "a") as f:
-            f.write(f"EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS")
 
-            dummy_result, _ = self.model.evaluate(
-                [Sentence("d", labels=["0.1"])],
-                eval_mini_batch_size,
-                embeddings_in_memory,
-            )
-            if log_train:
-                f.write(
-                    "\tTRAIN_" + "\tTRAIN_".join(dummy_result.log_header.split("\t"))
-                )
-            if log_dev:
-                f.write(
-                    "\tDEV_LOSS\tDEV_"
-                    + "\tDEV_".join(dummy_result.log_header.split("\t"))
-                )
-            if log_test:
-                f.write(
-                    "\tTEST_LOSS\tTEST_"
-                    + "\tTEST_".join(dummy_result.log_header.split("\t"))
-                )
-
-            weight_extractor = WeightExtractor(base_path)
+        weight_extractor = WeightExtractor(base_path)
 
         optimizer = self.optimizer(self.model.parameters(), lr=learning_rate, **kwargs)
         if self.optimizer_state is not None:
@@ -161,6 +140,9 @@ def train(
         if train_with_dev:
             train_data = ConcatDataset([self.corpus.train, self.corpus.dev])
 
+        if sampler is not None:
+            sampler = sampler(train_data)
+
         dev_score_history = []
         dev_loss_history = []
         train_loss_history = []
@@ -199,6 +181,7 @@ def train(
                     batch_size=mini_batch_size,
                     shuffle=shuffle,
                     num_workers=num_workers,
+                    sampler=sampler,
                 )
 
                 self.model.train()
@@ -313,6 +296,33 @@ def train(
 
                 # output log file
                 with open(loss_txt, "a") as f:
+
+                    # make headers on first epoch
+                    if epoch == 0:
+                        f.write(
+                            f"EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS"
+                        )
+
+                        if log_train:
+                            f.write(
+                                "\tTRAIN_"
+                                + "\tTRAIN_".join(
+                                    train_eval_result.log_header.split("\t")
+                                )
+                            )
+                        if log_dev:
+                            f.write(
+                                "\tDEV_LOSS\tDEV_"
+                                + "\tDEV_".join(dev_eval_result.log_header.split("\t"))
+                            )
+                        if log_test:
+                            f.write(
+                                "\tTEST_LOSS\tTEST_"
+                                + "\tTEST_".join(
+                                    test_eval_result.log_header.split("\t")
+                                )
+                            )
+
                     f.write(
                         f"\n{epoch}\t{datetime.datetime.now():%H:%M:%S}\t{bad_epochs}\t{learning_rate:.4f}\t{train_loss}"
                     )