From 5f5279dd324547d9cf86cb9859eb877ca4cd76c0 Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Wed, 7 Sep 2022 15:23:00 -0700
Subject: [PATCH 1/7] Split LSTM Model

---
 src/prog_models/data_models/lstm_model.py | 47 ++++++++++++++++-------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index ee45ab6a7..ae504e838 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -42,11 +42,11 @@ class LSTMStateTransitionModel(DataModel):
         'measurement_noise': 0,  # Default 0 noise
     }
 
-    def __init__(self, model, **kwargs):
+    def __init__(self, state_model, output_model, **kwargs):
         # Setup inputs, outputs, states 
-        self.outputs = kwargs.get('output_keys', [f'z{i}' for i in range(model.output.shape[1])])
+        self.outputs = kwargs.get('output_keys', [f'z{i}' for i in range(output_model.output.shape[1])])
 
-        input_shape = model.input.shape
+        input_shape = state_model.input.shape
         input_keys = kwargs.get('input_keys', [f'u{i}' for i in range(input_shape[2]-len(self.outputs))])
         self.inputs = input_keys.copy()
         # Outputs from the last step are part of input
@@ -57,14 +57,16 @@ def __init__(self, model, **kwargs):
         for j in range(input_shape[1]-1, -1, -1):
             self.states.extend([f'{input_i}_t-{j}' for input_i in input_keys])
             self.states.extend([f'{output_i}_t-{j+1}' for output_i in self.outputs])
+        self.states.extend([f'_model_output{i}' for i in range(state_model.output.shape[1])])
 
         kwargs['window'] = input_shape[1]
-        kwargs['model'] = model  # Putting it in the parameters dictionary simplifies pickling
+        kwargs['state_model'] = state_model  
+        kwargs['output_model'] = output_model
+        # Putting it in the parameters dictionary simplifies pickling
 
         super().__init__(**kwargs)
 
         # Save Model
-        self.model = model
         self.history = kwargs.get('history', None)
 
     def __getstate__(self):
@@ -96,8 +98,18 @@ def next_state(self, x, u, _):
         # Rotate new input into state
         input_data = u.matrix
             
-        states = x.matrix[len(input_data):]
-        return self.StateContainer(np.vstack((states, input_data)))
+        states = x.matrix[len(input_data):-self.parameters['state_model'].output_shape[1]]
+        states = np.vstack((states, input_data))
+
+        if states[0] is None:
+            return self.StateContainer(states)
+        else:
+            # Enough data has been received to calculate output
+            # Format input into np array with shape (1, window, num_inputs)
+            m_input = states.reshape(1, self.parameters['window'], len(self.inputs))
+            m_input = np.array(m_input, dtype=np.float)
+            internal_states = self.parameters['state_model'](m_input).numpy().T
+        return self.StateContainer(np.vstack((states, internal_states)))
 
     def output(self, x):
         if x.matrix[0,0] is None:
@@ -105,11 +117,9 @@ def output(self, x):
             return self.OutputContainer(np.array([[None] for _ in self.outputs]))
 
         # Enough data has been received to calculate output
-        # Format input into np array with shape (1, window, num_inputs)
-        m_input = x.matrix[:self.parameters['window']*len(self.inputs)].reshape(1, self.parameters['window'], len(self.inputs))
-
-        # Pass into model to calculate output       
-        m_output = self.model(m_input)
+        # Pass internal states into model to calculate output
+        internal_states = x.matrix[-self.parameters['state_model'].output_shape[1]:].T
+        m_output = self.parameters['output_model'](internal_states)
 
         if 'normalization' in self.parameters:
             m_output *= self.parameters['normalization'][1]
@@ -358,14 +368,23 @@ def from_data(cls, inputs, outputs, event_states = None, thresh_met = None, **kw
             # Dropout prevents overfitting
             x = layers.Dropout(params['dropout'])(x)
 
-        x = layers.Dense(z_all.shape[1] if z_all.ndim == 2 else 1)(x)
+        x = layers.Dense(z_all.shape[1] if z_all.ndim == 2 else 1, name='output')(x)
         model = keras.Model(inputs, x)
         model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
         
         # Train model
         history = model.fit(u_all, z_all, epochs=params['epochs'], callbacks = callbacks, validation_split = params['validation_split'])
 
-        return cls(keras.models.load_model("best_model.keras"), history = history, **params)
+        model = keras.models.load_model("best_model.keras")
+
+        # Split model into separate models
+        n_state_layers = params['layers'] + 1 + (params['dropout'] > 0) + (params['normalize'])
+        output_layer_input = layers.Input(model.layers[n_state_layers-1].output.shape[1:])
+        output_layer = model.get_layer('output')(output_layer_input)
+        state_model = keras.Model(model.input, model.layers[n_state_layers-1].output)
+        output_model = keras.Model(output_layer_input, output_layer)
+
+        return cls(state_model, output_model, history = history, **params)
         
     def simulate_to_threshold(self, future_loading_eqn, first_output = None, threshold_keys = None, **kwargs):
         t = kwargs.get('t0', 0)

From eb689ccb22545ef1a7c35a226cf7391d3fdff09f Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Wed, 7 Sep 2022 16:22:38 -0700
Subject: [PATCH 2/7] Fix custom model

---
 src/prog_models/data_models/lstm_model.py | 34 +++++++++++++++--------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index ae504e838..94853f32b 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -23,7 +23,8 @@ class LSTMStateTransitionModel(DataModel):
     Most users will use the `LSTMStateTransitionModel.from_data` method to create a model, but the model can be created by passing in a model directly into the constructor. The LSTM model in this method maps from [u_t-n+1, z_t-n, ..., u_t, z_t-1] to z_t. Past :term:`input` are stored in the :term:`model` internal :term:`state`. Actual calculation of :term:`output` is performed when :py:func`LSTMStateTransitionModel.output` is called. When using in simulation that may not be until the simulation results are accessed.
 
     Args:
-        model (keras.Model): Keras model to use for state transition
+        output_model (keras.Model): If a state model is present, mapps from the state_model outputs to model outputs. Otherwise, maps from model inputs to model outputs
+        state_model (keras.Model): Keras model to use for state transition, optional
 
     Keyword Args:
         input_keys (list[str]): List of input keys
@@ -42,12 +43,15 @@ class LSTMStateTransitionModel(DataModel):
         'measurement_noise': 0,  # Default 0 noise
     }
 
-    def __init__(self, state_model, output_model, **kwargs):
-        # Setup inputs, outputs, states 
-        self.outputs = kwargs.get('output_keys', [f'z{i}' for i in range(output_model.output.shape[1])])
+    def __init__(self, output_model, state_model = None, **kwargs):
+        n_outputs = output_model.output.shape[1]
+        n_internal = 0 if state_model is None else state_model.output.shape[1]
+        input_shape = output_model.input.shape if state_model is None else state_model.input.shape
+        n_inputs = input_shape[-1]-n_outputs
 
-        input_shape = state_model.input.shape
-        input_keys = kwargs.get('input_keys', [f'u{i}' for i in range(input_shape[2]-len(self.outputs))])
+        # Setup inputs, outputs, states 
+        self.outputs = kwargs.get('output_keys', [f'z{i}' for i in range(n_outputs)])
+        input_keys = kwargs.get('input_keys', [f'u{i}' for i in range(n_inputs)])
         self.inputs = input_keys.copy()
         # Outputs from the last step are part of input
         self.inputs.extend([f'{z_key}_t-1' for z_key in self.outputs])
@@ -57,7 +61,7 @@ def __init__(self, state_model, output_model, **kwargs):
         for j in range(input_shape[1]-1, -1, -1):
             self.states.extend([f'{input_i}_t-{j}' for input_i in input_keys])
             self.states.extend([f'{output_i}_t-{j+1}' for output_i in self.outputs])
-        self.states.extend([f'_model_output{i}' for i in range(state_model.output.shape[1])])
+        self.states.extend([f'_model_output{i}' for i in range(n_internal)])
 
         kwargs['window'] = input_shape[1]
         kwargs['state_model'] = state_model  
@@ -97,12 +101,16 @@ def initialize(self, u=None, z=None):
     def next_state(self, x, u, _):
         # Rotate new input into state
         input_data = u.matrix
+
+        if self.parameters['state_model'] is None:
+            states = x.matrix[len(input_data):]
+            return self.StateContainer(np.vstack((states, input_data)))
             
         states = x.matrix[len(input_data):-self.parameters['state_model'].output_shape[1]]
         states = np.vstack((states, input_data))
 
-        if states[0] is None:
-            return self.StateContainer(states)
+        if states[0,0] is None:
+            return self.StateContainer(np.vstack((states, x.matrix[-self.parameters['state_model'].output_shape[1]:])))
         else:
             # Enough data has been received to calculate output
             # Format input into np array with shape (1, window, num_inputs)
@@ -118,7 +126,11 @@ def output(self, x):
 
         # Enough data has been received to calculate output
         # Pass internal states into model to calculate output
-        internal_states = x.matrix[-self.parameters['state_model'].output_shape[1]:].T
+        if self.parameters['state_model'] is None:
+            m_input = x.matrix.reshape(1, self.parameters['window'], len(self.inputs))
+            internal_states = np.array(m_input, dtype=np.float)
+        else:
+            internal_states = x.matrix[-self.parameters['state_model'].output_shape[1]:].T
         m_output = self.parameters['output_model'](internal_states)
 
         if 'normalization' in self.parameters:
@@ -384,7 +396,7 @@ def from_data(cls, inputs, outputs, event_states = None, thresh_met = None, **kw
         state_model = keras.Model(model.input, model.layers[n_state_layers-1].output)
         output_model = keras.Model(output_layer_input, output_layer)
 
-        return cls(state_model, output_model, history = history, **params)
+        return cls(output_model, state_model, history = history, **params)
         
     def simulate_to_threshold(self, future_loading_eqn, first_output = None, threshold_keys = None, **kwargs):
         t = kwargs.get('t0', 0)

From d0ee0f5fc5e1fcd30c604141175715960c1ba464 Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Thu, 8 Sep 2022 07:07:47 -0700
Subject: [PATCH 3/7] Fix summary method

---
 src/prog_models/data_models/lstm_model.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index 94853f32b..a09825f79 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -144,7 +144,13 @@ def summary(self, file= sys.stdout, expand_nested=False, show_trainable=False):
         print("Inputs: ", self.inputs, file = file)
         print("Outputs: ", self.outputs, file = file)
         print("Window_size: ", self.parameters['window'], file = file)
-        self.model.summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
+        if 'state_model' in self.parameters:
+            print('\nState Model: ')
+            self.parameters['state_model'].summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
+        
+        print('\nOutput Model: ')
+        self.parameters['output_model'].summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
+        
         
     @staticmethod
     def pre_process_data(inputs, outputs, window, **kwargs):

From 2445511fd917c74a37044a11374a5457e3c92d5a Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Thu, 8 Sep 2022 07:53:13 -0700
Subject: [PATCH 4/7] Fix failing tests

---
 src/prog_models/data_models/lstm_model.py | 4 ++--
 tests/test_data_model.py                  | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index a09825f79..80591a4f6 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -145,10 +145,10 @@ def summary(self, file= sys.stdout, expand_nested=False, show_trainable=False):
         print("Outputs: ", self.outputs, file = file)
         print("Window_size: ", self.parameters['window'], file = file)
         if 'state_model' in self.parameters:
-            print('\nState Model: ')
+            print('\nState Model: ', file = file)
             self.parameters['state_model'].summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
         
-        print('\nOutput Model: ')
+        print('\nOutput Model: ', file = file)
         self.parameters['output_model'].summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
         
         
diff --git a/tests/test_data_model.py b/tests/test_data_model.py
index c08289be4..0ed64f30c 100644
--- a/tests/test_data_model.py
+++ b/tests/test_data_model.py
@@ -86,10 +86,12 @@ def test_lstm_simple(self):
         m = self._test_simple_case(LSTMStateTransitionModel, window=5, epochs=20, max_error=3)
         self.assertListEqual(m.inputs, ['x_t-1'])
         # Use set below so there's no issue with ordering
-        self.assertSetEqual(set(m.states), set(['x_t-1', 'x_t-2', 'x_t-3', 'x_t-4', 'x_t-5']))
+        keys = ['x_t-1', 'x_t-2', 'x_t-3', 'x_t-4', 'x_t-5']
+        keys.extend([f'_model_output{i}' for i in range(16)])
+        self.assertSetEqual(set(m.states), set(keys))
 
         # Create from model
-        LSTMStateTransitionModel(m.model, output_keys = ['x'])
+        LSTMStateTransitionModel(m.parameters['output_model'], m.parameters['state_model'], output_keys = ['x'])
         try:
         # Test pickling model m
             with self.assertWarns(RuntimeWarning):

From df01a6f11a55cd06018d9f828b2bdcb480e57614 Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Fri, 9 Sep 2022 12:41:02 -0700
Subject: [PATCH 5/7] Fix state_model check

---
 src/prog_models/data_models/lstm_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index 80591a4f6..b1307ae41 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -144,7 +144,7 @@ def summary(self, file= sys.stdout, expand_nested=False, show_trainable=False):
         print("Inputs: ", self.inputs, file = file)
         print("Outputs: ", self.outputs, file = file)
         print("Window_size: ", self.parameters['window'], file = file)
-        if 'state_model' in self.parameters:
+        if self.parameters['state_model'] is not None:
             print('\nState Model: ', file = file)
             self.parameters['state_model'].summary(print_fn= file.write, expand_nested = expand_nested, show_trainable = show_trainable)
         
@@ -226,7 +226,7 @@ def pre_process_data(inputs, outputs, window, **kwargs):
                     n_outputs = len(z[0])
                     z_i = [[z[i][k] for k in range(n_outputs)] for i in range(window+1, len(z))]
                 else:
-                    raise TypeError(f"Unsupported input type: {type(z)} for internal element (data[0][i]")  
+                    raise TypeError(f"Unsupported input type: {type(z)} for internal element (output[i])")  
 
                 # Also add to input (past outputs are part of input)
                 if len(u_i) == 0:

From e48d6fa48d00d95d0fa0209ec3420a58da03a18e Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Fri, 9 Sep 2022 12:41:52 -0700
Subject: [PATCH 6/7] Change number of epochs in example

---
 examples/lstm_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/lstm_model.py b/examples/lstm_model.py
index ecc532095..5c834d722 100644
--- a/examples/lstm_model.py
+++ b/examples/lstm_model.py
@@ -171,7 +171,7 @@ def future_loading3(t, x = None):
         inputs = input_data,
         outputs = output_data,
         window=12, 
-        epochs=3, 
+        epochs=5, 
         units=64,  # Additional units given the increased complexity of the system
         input_keys = ['i', 'dt'],
         output_keys = ['t', 'v']) 

From e5522118d2fcbb2524406d6dcbe9505d20d48509 Mon Sep 17 00:00:00 2001
From: Christopher Teubert <christopher.a.teubert@nasa.gov>
Date: Fri, 9 Sep 2022 12:43:07 -0700
Subject: [PATCH 7/7] Update docs

---
 src/prog_models/data_models/lstm_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/prog_models/data_models/lstm_model.py b/src/prog_models/data_models/lstm_model.py
index b1307ae41..2879a5ef2 100644
--- a/src/prog_models/data_models/lstm_model.py
+++ b/src/prog_models/data_models/lstm_model.py
@@ -23,8 +23,8 @@ class LSTMStateTransitionModel(DataModel):
     Most users will use the `LSTMStateTransitionModel.from_data` method to create a model, but the model can be created by passing in a model directly into the constructor. The LSTM model in this method maps from [u_t-n+1, z_t-n, ..., u_t, z_t-1] to z_t. Past :term:`input` are stored in the :term:`model` internal :term:`state`. Actual calculation of :term:`output` is performed when :py:func`LSTMStateTransitionModel.output` is called. When using in simulation that may not be until the simulation results are accessed.
 
     Args:
-        output_model (keras.Model): If a state model is present, mapps from the state_model outputs to model outputs. Otherwise, maps from model inputs to model outputs
-        state_model (keras.Model): Keras model to use for state transition, optional
+        output_model (keras.Model): If a state model is present, maps from the state_model outputs to model :term:`output`. Otherwise, maps from model inputs to model :term:`output`
+        state_model (keras.Model, optional): Keras model to use for state transition
 
     Keyword Args:
         input_keys (list[str]): List of input keys