flairNLP · alanakbik · Feb 9, 2022 · Jan 27, 2022 · Jan 31, 2022 · Jan 31, 2022
diff --git a/flair/models/dependency_parser_model.py b/flair/models/dependency_parser_model.py
@@ -372,7 +372,7 @@ def _obtain_labels_(
 
     def _get_state_dict(self):
         model_state = {
-            "state_dict": self.state_dict(),
+            **super()._get_state_dict(),
             "token_embeddings": self.token_embeddings,
             "use_rnn": self.use_rnn,
             "lstm_hidden_size": self.lstm_hidden_size,
@@ -385,10 +385,10 @@ def _get_state_dict(self):
         }
         return model_state
 
-    @staticmethod
-    def _init_model_with_state_dict(state):
-
-        model = DependencyParser(
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+        return super()._init_model_with_state_dict(
+            state,
             token_embeddings=state["token_embeddings"],
             relations_dictionary=state["relations_dictionary"],
             use_rnn=state["use_rnn"],
@@ -398,9 +398,8 @@ def _init_model_with_state_dict(state):
             lstm_layers=state["lstm_layers"],
             mlp_dropout=state["mlp_dropout"],
             lstm_dropout=state["lstm_dropout"],
+            **kwargs,
         )
-        model.load_state_dict(state["state_dict"])
-        return model
 
     @property
     def label_type(self):

diff --git a/flair/models/diagnosis/distance_prediction_model.py b/flair/models/diagnosis/distance_prediction_model.py
@@ -146,7 +146,7 @@ def forward(self, sentence: Sentence):
 
     def _get_state_dict(self):
         model_state = {
-            "state_dict": self.state_dict(),
+            **super()._get_state_dict(),
             "word_embeddings": self.word_embeddings,
             "max_distance": self.max_distance,
             "beta": self.beta,
@@ -156,23 +156,23 @@ def _get_state_dict(self):
         }
         return model_state
 
-    @staticmethod
-    def _init_model_with_state_dict(state):
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+
         beta = 1.0 if "beta" not in state.keys() else state["beta"]
         weight = 1 if "loss_max_weight" not in state.keys() else state["loss_max_weight"]
 
-        model = DistancePredictor(
+        return super()._init_model_with_state_dict(
+            state,
             word_embeddings=state["word_embeddings"],
             max_distance=state["max_distance"],
             beta=beta,
             loss_max_weight=weight,
             regression=state["regression"],
             regr_loss_step=state["regr_loss_step"],
+            **kwargs,
         )
 
-        model.load_state_dict(state["state_dict"])
-        return model
-
     # So far only one sentence allowed
     # If list of sentences is handed the function works with the first sentence of the list
     def forward_loss(self, data_points: Union[List[Sentence], Sentence]) -> torch.Tensor:

diff --git a/flair/models/entity_linker_model.py b/flair/models/entity_linker_model.py
@@ -3,7 +3,6 @@
 from typing import List, Optional, Union
 
 import torch
-import torch.nn as nn
 
 import flair.embeddings
 import flair.nn
@@ -40,7 +39,13 @@ def __init__(
         :param label_type: name of the label you use.
         """
 
-        super(EntityLinker, self).__init__(label_dictionary, **classifierargs)
+        super(EntityLinker, self).__init__(
+            label_dictionary=label_dictionary,
+            final_embedding_size=word_embeddings.embedding_length * 2
+            if pooling_operation == "first&last"
+            else word_embeddings.embedding_length,
+            **classifierargs,
+        )
 
         self.word_embeddings = word_embeddings
         self.pooling_operation = pooling_operation
@@ -55,16 +60,6 @@ def __init__(
         if dropout > 0.0:
             self.dropout = torch.nn.Dropout(dropout)
 
-        # if we concatenate the embeddings we need double input size in our linear layer
-        if self.pooling_operation == "first&last":
-            self.decoder = nn.Linear(2 * self.word_embeddings.embedding_length, len(self.label_dictionary)).to(
-                flair.device
-            )
-        else:
-            self.decoder = nn.Linear(self.word_embeddings.embedding_length, len(self.label_dictionary)).to(flair.device)
-
-        nn.init.xavier_uniform_(self.decoder.weight)
-
         cases = {
             "average": self.emb_mean,
             "first": self.emb_first,
@@ -110,13 +105,10 @@ def forward_pass(
         span_labels = []
         sentences_to_spans = []
         empty_label_candidates = []
+        embedded_entity_pairs = None
 
-        # if the entire batch has no sentence with candidates, return empty
-        if len(filtered_sentences) == 0:
-            scores = None
-
-        # otherwise, embed sentence and send through prediction head
-        else:
+        # embed sentences and send through prediction head
+        if len(filtered_sentences) > 0:
             # embed all tokens
             self.word_embeddings.embed(filtered_sentences)
 
@@ -152,23 +144,19 @@ def forward_pass(
                         empty_label_candidates.append(candidate)
 
             if len(embedding_list) > 0:
-                embedding_tensor = torch.cat(embedding_list, 0).to(flair.device)
+                embedded_entity_pairs = torch.cat(embedding_list, 0)
 
                 if self.use_dropout:
-                    embedding_tensor = self.dropout(embedding_tensor)
-
-                scores = self.decoder(embedding_tensor)
-            else:
-                scores = None
+                    embedded_entity_pairs = self.dropout(embedded_entity_pairs)
 
         if return_label_candidates:
-            return scores, span_labels, sentences_to_spans, empty_label_candidates
+            return embedded_entity_pairs, span_labels, sentences_to_spans, empty_label_candidates
 
-        return scores, span_labels
+        return embedded_entity_pairs, span_labels
 
     def _get_state_dict(self):
         model_state = {
-            "state_dict": self.state_dict(),
+            **super()._get_state_dict(),
             "word_embeddings": self.word_embeddings,
             "label_type": self.label_type,
             "label_dictionary": self.label_dictionary,
@@ -177,19 +165,18 @@ def _get_state_dict(self):
         }
         return model_state
 
-    @staticmethod
-    def _init_model_with_state_dict(state):
-        model = EntityLinker(
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+        return super()._init_model_with_state_dict(
+            state,
             word_embeddings=state["word_embeddings"],
             label_dictionary=state["label_dictionary"],
             label_type=state["label_type"],
             pooling_operation=state["pooling_operation"],
             loss_weights=state["loss_weights"] if "loss_weights" in state else {"<unk>": 0.3},
+            **kwargs,
         )
 
-        model.load_state_dict(state["state_dict"])
-        return model
-
     @property
     def label_type(self):
         return self._label_type
diff --git a/flair/models/lemmatizer_model.py b/flair/models/lemmatizer_model.py
@@ -641,7 +641,7 @@ def predict(
 
     def _get_state_dict(self):
         model_state = {
-            "state_dict": self.state_dict(),
+            **super()._get_state_dict(),
             "embeddings": self.encoder_embeddings,
             "rnn_input_size": self.rnn_input_size,
             "rnn_hidden_size": self.rnn_hidden_size,
@@ -660,8 +660,10 @@ def _get_state_dict(self):
 
         return model_state
 
-    def _init_model_with_state_dict(state):
-        model = Lemmatizer(
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+        return super()._init_model_with_state_dict(
+            state,
             embeddings=state["embeddings"],
             encode_characters=state["encode_characters"],
             rnn_input_size=state["rnn_input_size"],
@@ -676,9 +678,8 @@ def _init_model_with_state_dict(state):
             start_symbol_for_encoding=state["start_symbol"],
             end_symbol_for_encoding=state["end_symbol"],
             bidirectional_encoding=state["bidirectional_encoding"],
+            **kwargs,
         )
-        model.load_state_dict(state["state_dict"])
-        return model
 
     def _print_predictions(self, batch, gold_label_type):
         lines = []

diff --git a/flair/models/pairwise_classification_model.py b/flair/models/pairwise_classification_model.py
@@ -33,28 +33,20 @@ def __init__(
         :param loss_weights: Dictionary of weights for labels for the loss function
         (if any label's weight is unspecified it will default to 1.0)
         """
-        super().__init__(**classifierargs)
+        super().__init__(
+            **classifierargs,
+            final_embedding_size=2 * document_embeddings.embedding_length
+            if embed_separately
+            else document_embeddings.embedding_length,
+        )
 
         self.document_embeddings: flair.embeddings.DocumentEmbeddings = document_embeddings
 
         self._label_type = label_type
 
         self.embed_separately = embed_separately
 
-        # if embed_separately == True the linear layer needs twice the length of the embeddings as input size
-        # since we concatenate the embeddings of the two DataPoints in the DataPairs
-        if self.embed_separately:
-            self.decoder = torch.nn.Linear(
-                2 * self.document_embeddings.embedding_length,
-                len(self.label_dictionary),
-            ).to(flair.device)
-
-            torch.nn.init.xavier_uniform_(self.decoder.weight)
-
-        else:
-            # representation for both sentences
-            self.decoder = torch.nn.Linear(self.document_embeddings.embedding_length, len(self.label_dictionary))
-
+        if not self.embed_separately:
             # set separator to concatenate two sentences
             self.sep = " "
             if isinstance(
@@ -66,8 +58,6 @@ def __init__(
                 else:
                     self.sep = " [SEP] "
 
-        torch.nn.init.xavier_uniform_(self.decoder.weight)
-
         # auto-spawn on GPU if available
         self.to(flair.device)
 
@@ -136,7 +126,7 @@ def forward_pass(
 
     def _get_state_dict(self):
         model_state = {
-            "state_dict": self.state_dict(),
+            **super()._get_state_dict(),
             "document_embeddings": self.document_embeddings,
             "label_dictionary": self.label_dictionary,
             "label_type": self.label_type,
@@ -147,10 +137,10 @@ def _get_state_dict(self):
         }
         return model_state
 
-    @staticmethod
-    def _init_model_with_state_dict(state):
-
-        model = TextPairClassifier(
+    @classmethod
+    def _init_model_with_state_dict(cls, state, **kwargs):
+        return super()._init_model_with_state_dict(
+            state,
             document_embeddings=state["document_embeddings"],
             label_dictionary=state["label_dictionary"],
             label_type=state["label_type"],
@@ -160,6 +150,5 @@ def _init_model_with_state_dict(state):
             else state["multi_label_threshold"],
             loss_weights=state["weight_dict"],
             embed_separately=state["embed_separately"],
+            **kwargs,
         )
-        model.load_state_dict(state["state_dict"])
-        return model