Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature space visualization tool + side by side gifs visual #29

Merged
merged 9 commits into from
Nov 30, 2024
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,23 @@ You can visualize the learned decision boundary of your model as such:

Which produces the following GIF:


<p align="center">
<img src="https://github.com/gallettilance/kviz/blob/master/examples/circle_relu_model.gif?raw=true"/>
</p>

To view the learned decision boundary of your model in the feature space as well, set the view_feature_space flag to True as such:
**(please note this can only be done for neural networks with one hidden layer)**
```python
viz = Visualizer(model)
viz.fit(X, Y, snap_freq=20, duration=300, view_feature_space=True, batch_size=4, epochs=1000, verbose=0)
```

Which produces the two GIFs side by side:
<p align="center">
<img src="https://github.com/gallettilance/kviz/blob/master/examples/feature_space.gif?raw=true"/>
</p>

We can try different activation functions, network architectures, etc. to see what works
best. For example, from looking at the GIF we can see that the neural net is trying to
learn a decision boundary that is a combination of two straight lines. Clearly this is
Expand Down
Binary file added examples/feature_space.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
79 changes: 75 additions & 4 deletions kviz/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,21 @@

import numpy as np
from PIL import Image as im
import os
import shutil


import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex, LinearSegmentedColormap


from networkx import DiGraph, set_node_attributes
from networkx.drawing.nx_agraph import to_agraph

import tensorflow.keras as keras



COLORS = np.array(['purple', 'blue'])
tuples = list(zip(
map(plt.Normalize(0, 1), [0, .5, 1]),
Expand Down Expand Up @@ -248,10 +253,58 @@ def _snap_regression(self, X, Y, filename):
ax.set_ylim(y_min, y_max)
fig.savefig(filename + '.png')
plt.close()

return np.asarray(im.open(filename + '.png'))


def _snap_feature_space(self, X, Y, filename):
"""
Generate a snapshot of the feature space after transformation by the first hidden layer

Parameters:
X : ndarray
input data to be transformed by the model's hidden layer
Y : ndarray
target classes corresponding to input data X, used for coloring the scatter plot
filename : str
name of file to save the snapshot as a PNG image

Returns:
np.ndarray
Image array of the saved feature space snapshot
"""
hidden_features = self._int_models[0].predict(X)
h = .02
x_min, x_max = hidden_features[:, 0].min() - .1, hidden_features[:, 0].max() + .1
y_min, y_max = hidden_features[:, 1].min() - .1, hidden_features[:, 1].max() + .1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
meshData = np.c_[xx.ravel(), yy.ravel()]

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].imshow(self._snap_decision_boundary(X, Y, filename))
axes[0].axis('off')
axes[0].set_title("Input Space")

axes[1].scatter(hidden_features[:, 0], hidden_features[:, 1],
color=COLORS[Y].tolist(), s=100, alpha=0.9)

hidden_layer_model = keras.Sequential([self.model.layers[1]])

Z = hidden_layer_model.predict(meshData)
Z = np.array([z[0] for z in Z]).reshape(xx.shape)
axes[1].contourf(xx, yy, Z, alpha=0.4, cmap=CMAP)

axes[1].set_xlim(x_min, x_max)
axes[1].set_ylim(y_min, y_max)
axes[1].set_title("Feature Space at Hidden Layer 1")

fig.savefig(filename + '_combined.png', bbox_inches='tight')
plt.close(fig)

return np.asarray(im.open(filename + '_combined.png'))


def _stack_gifs(self, imgs1, imgs2, filename, duration):
"""
Takes two lists of images and stacks each image in one list on top
Expand Down Expand Up @@ -321,7 +374,7 @@ def _reset(self):
self._graph = self._graph_original_copy.copy()


def fit(self, X, Y, snap_freq=10, filename='decision_boundary', duration=1000, **kwargs):
def fit(self, X, Y, snap_freq=10, filename='decision_boundary', duration=1000, view_feature_space=False, **kwargs):
"""
Make GIF from snapshots of decision boundary at given snap_freq of epochs during training

Expand All @@ -336,6 +389,8 @@ def fit(self, X, Y, snap_freq=10, filename='decision_boundary', duration=1000, *
name of file to save as GIF
duration : int
duration in ms between images in GIF
view_feature_space : bool
flag to display the decision boundary in hidden feature space
**kwargs : other params
paramter inputs to model.fit

Expand All @@ -350,16 +405,32 @@ def fit(self, X, Y, snap_freq=10, filename='decision_boundary', duration=1000, *
else:
epochs = snap_freq

for _ in range(int(epochs / snap_freq)):
temp_dir = ".snapshots"
os.makedirs(temp_dir, exist_ok=True)

for epoch in range(int(epochs / snap_freq)):
self.model.fit(X, Y, epochs=snap_freq, **kwargs)
self._int_models = self._get_int_models() # TODO: make this function more efficient
if (view_feature_space):
if (len(self.model.layers) > 3):
raise ValueError("The model must have only one hidden layer for this visualization")
images.append(im.fromarray(self._snap_feature_space(X, Y, filename)))
else:
images.append(im.fromarray(self._snap_decision_boundary(X, Y, filename)))

if self.model.loss == 'binary_crossentropy':
images.append(im.fromarray(self._snap_decision_boundary(X, Y, filename)))
if (view_feature_space):
if (len(self.model.layers) > 3):
raise ValueError("The model must have only one hidden layer for this visualization")
images.append(im.fromarray(self._snap_feature_space(X, Y, filename)))
else:
images.append(im.fromarray(self._snap_decision_boundary(X, Y, filename)))
if self.model.loss == 'mean_squared_error':
images.append(im.fromarray(self._snap_regression(X, Y, filename)))

self._convert_gif(images, filename, duration)

shutil.rmtree(temp_dir)
return self.model


Expand Down
19 changes: 19 additions & 0 deletions tests/test_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from tensorflow.keras import layers
import sklearn.datasets as datasets



from kviz.visualizer import Visualizer


Expand Down Expand Up @@ -71,3 +73,20 @@ def test_regression():

dg = Visualizer(model)
dg.fit(X, Y, 100, 'test_regression', 100, epochs=10000, verbose=0, batch_size=200)


def test_feature_space():
model = keras.models.Sequential()
model.add(layers.Dense(2, input_dim=2, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss="binary_crossentropy")
print("no. of layers", len(model.layers))

# Generate data that looks like 2 concentric circles
t, _ = datasets.make_blobs(n_samples=200, centers=[[0, 0]], cluster_std=1, random_state=1)
X = np.array(list(filter(lambda x: x[0]**2 + x[1]**2 < 1 or x[0]**2 + x[1]**2 > 1.5, t)))
Y = np.array([1 if x[0]**2 + x[1]**2 >= 1 else 0 for x in X])

viz = Visualizer(model)
viz.fit(X, Y, snap_freq=20, filename='feature space', duration=300,
view_feature_space=True, batch_size=4, epochs=1000, verbose=0)
Loading