Skip to content

Commit

Permalink
Merge pull request #1 from tpospisi/dev
Browse files Browse the repository at this point in the history
Merging `dev` branch: implementing a `fit` method in NNKCDE
  • Loading branch information
Mr8ND authored Jun 27, 2019
2 parents 3ec9762 + f1d51a8 commit ed2df5e
Show file tree
Hide file tree
Showing 9 changed files with 16 additions and 15 deletions.
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
README_TEXT = f.read()

setup(name="nnkcde",
version="0.2",
version="0.3",
license="MIT",
description="Fits nearest neighbor kernel conditional density estimates",
long_description = README_TEXT,
Expand Down
3 changes: 2 additions & 1 deletion python/src/nnkcde/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from .kde import kde

class NNKCDE(object):
def __init__(self, x_train, z_train, k=None):
def __init__(self, k=None):
self.k = k

def fit(self, x_train, z_train):
if len(z_train.shape) == 1:
z_train = z_train.reshape(-1, 1)
if len(x_train.shape) == 1:
Expand Down
9 changes: 3 additions & 6 deletions python/src/nnkcde/kde.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Functions for kernel density estimation."""

import numpy as np
import statsmodels.api as sm
import scipy.stats

def kde(responses, grid, bandwidth):
"""Calculates the kernel density estimate.
Expand Down Expand Up @@ -35,15 +33,14 @@ def kde(responses, grid, bandwidth):

n_grid, n_dim = grid.shape
n_obs, _ = responses.shape
density = np.zeros(n_grid)

if n_dim == 1:
kde = sm.nonparametric.KDEUnivariate(responses[:, 0])
kde.fit(bw = bandwidth, fft = False)
kde.fit(bw=bandwidth, fft=False)
return kde.evaluate(grid[:, 0])
else:
if isinstance(bandwidth, (float, int)):
bandwidth = [bandwidth] * n_dim
kde = sm.nonparametric.KDEMultivariate(responses, var_type = "c" * n_dim,
bw = bandwidth)
kde = sm.nonparametric.KDEMultivariate(responses, var_type="c" * n_dim,
bw=bandwidth)
return kde.pdf(grid)
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions python/tests/test_loss_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def generate_data(n):
x_train, z_train = generate_data(1000)
x_test, z_test = generate_data(1000)

obj = nnkcde.NNKCDE(x_train, z_train)
obj = nnkcde.NNKCDE()
obj.fit(x_train, z_train)
n_grid = 1000
z_grid = np.linspace(-5.0, 5.0, n_grid)

# for bandwidth in (0.1, 1.0, 3.0):
for bandwidth in (1.0, 1.0):
for k in (2, 5, 10, 100):
cde = obj.predict(x_test, z_grid, k=k, bandwidth=bandwidth)
Expand Down
3 changes: 2 additions & 1 deletion python/tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def generate_data(n):
k = 100
bandwidth = 0.1

obj = nnkcde.NNKCDE(x_train, z_train, k=k)
obj = nnkcde.NNKCDE(k=k)
obj.fit(x_train, z_train)
n_grid = 1000
z_grid = np.linspace(0, 1, n_grid)
density = obj.predict(x_test, z_grid, bandwidth=bandwidth)
Expand Down
10 changes: 6 additions & 4 deletions python/tests/test_univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ def test_k_equals_1():
x_train = np.random.uniform(0, 1, n_train)
z_train = np.random.normal(x_train, 1, n_train)

obj = nnkcde.NNKCDE(x_train, z_train)
obj = nnkcde.NNKCDE()
obj.fit(x_train, z_train)

for bandwidth in (0.1, 0.3, 1.0, 2.0, 3.0):
x_obs = np.random.uniform(0, 1, 1)
nearest = np.argmin(np.abs(x_train - x_obs))
expected = scipy.stats.norm.pdf(z_grid, z_train[nearest], bandwidth).reshape(1, -1)
preds = obj.predict(x_obs, z_grid, k = 1, bandwidth = bandwidth)
preds = obj.predict(x_obs, z_grid, k=1, bandwidth=bandwidth)
np.testing.assert_almost_equal(expected, preds)

def test_k_equals_n():
Expand All @@ -30,10 +31,11 @@ def test_k_equals_n():
x_train = np.random.uniform(0, 1, n_train)
z_train = np.random.normal(x_train, 1, n_train)

obj = nnkcde.NNKCDE(x_train, z_train)
obj = nnkcde.NNKCDE()
obj.fit(x_train, z_train)

for bandwidth in (0.1, 0.3, 1.0, 2.0, 3.0):
x_obs = np.random.uniform(0, 1, 1)
expected = nnkcde.kde.kde(z_train, z_grid, bandwidth).reshape(1, -1)
preds = obj.predict(x_obs, z_grid, k = n_train, bandwidth = bandwidth)
preds = obj.predict(x_obs, z_grid, k=n_train, bandwidth=bandwidth)
np.testing.assert_almost_equal(expected, preds)

0 comments on commit ed2df5e

Please sign in to comment.