diff --git a/mgwr/gwr.py b/mgwr/gwr.py index b8c2252..0c39c8d 100755 --- a/mgwr/gwr.py +++ b/mgwr/gwr.py @@ -3,6 +3,7 @@ __author__ = "Taylor Oshan Tayoshan@gmail.com" import copy +import os from typing import Optional import numpy as np import numpy.linalg as la @@ -13,10 +14,10 @@ from spglm.glm import GLM, GLMResults from spglm.iwls import iwls, _compute_betas_gwr from spglm.utils import cache_readonly +from joblib import Parallel, delayed from .diagnostics import get_AIC, get_AICc, get_BIC, corr from .kernels import * from .summary import * -import multiprocessing as mp class GWR(GLM): @@ -86,6 +87,10 @@ class GWR(GLM): name_x : list of strings Names of independent variables for use in output + n_jobs : integer + The number of jobs (default 1) to run in parallel. -1 means using all processors. + + Attributes ---------- coords : array-like @@ -210,7 +215,7 @@ class GWR(GLM): def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, - spherical=False, hat_matrix=False, name_x=None): + spherical=False, hat_matrix=False, name_x=None,n_jobs=1): """ Initialize class """ @@ -234,6 +239,7 @@ def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, self.spherical = spherical self.hat_matrix = hat_matrix self.name_x = name_x + self.n_jobs = n_jobs def _build_wi(self, i, bw): @@ -285,7 +291,7 @@ def _local_fit(self, i): return influ, resid, predy, betas.reshape(-1), w, Si, tr_STS_i, CCT def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', - lite=False, pool=None): + lite=False): """ Method that fits a model with a particular estimation routine. @@ -312,7 +318,6 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', bandwidth selection (could speed up bandwidth selection for GWR) or to estimate a full GWR. Default is False. - pool : A multiprocessing Pool object to enable parallel fitting; default is None. Returns ------- @@ -335,11 +340,7 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls', else: m = self.points.shape[0] - if pool: - rslt = pool.map(self._local_fit, - range(m)) #parallel using mp.Pool - else: - rslt = map(self._local_fit, range(m)) #sequential + rslt = Parallel(n_jobs=self.n_jobs)(delayed(self._local_fit)(i) for i in range(m)) rslt_list = list(zip(*rslt)) influ = np.array(rslt_list[0]).reshape(-1, 1) @@ -1492,6 +1493,9 @@ class MGWR(GWR): name_x : list of strings Names of independent variables for use in output + n_jobs : integer + The number of jobs (default 1) to run in parallel. -1 means using all processors. + Examples -------- @@ -1521,7 +1525,7 @@ class MGWR(GWR): def __init__(self, coords, y, X, selector, sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, - spherical=False, hat_matrix=False, name_x=None): + spherical=False, hat_matrix=False, name_x=None,n_jobs=1): """ Initialize class """ @@ -1544,6 +1548,7 @@ def __init__(self, coords, y, X, selector, sigma2_v1=True, self.exog_scale = None self_fit_params = None self.name_x = name_x + self.n_jobs = n_jobs def _chunk_compute_R(self, chunk_id=0): """ @@ -1599,7 +1604,7 @@ def _chunk_compute_R(self, chunk_id=0): return ENP_j, CCT, pR return ENP_j, CCT - def fit(self, n_chunks=1, pool=None): + def fit(self, n_chunks=1): """ Compute MGWR inference by chunk to reduce memory footprint. See Li and Fotheringham, 2020, IJGIS. @@ -1610,7 +1615,6 @@ def fit(self, n_chunks=1, pool=None): n_chunks : integer, optional A number of chunks parameter to reduce memory usage. e.g. n_chunks=2 should reduce overall memory usage by 2. - pool : A multiprocessing Pool object to enable parallel fitting; default is None. Returns ------- @@ -1627,15 +1631,16 @@ def tqdm(x, total=0, desc=''): #otherwise, just passthrough the range return x - if pool: - self.n_chunks = pool._processes * n_chunks - rslt = tqdm( - pool.imap(self._chunk_compute_R, range(self.n_chunks)), - total=self.n_chunks, desc='Inference') + if self.n_jobs == -1: + max_processors = os.cpu_count() + self.n_chunks = max_processors * n_chunks else: - self.n_chunks = n_chunks - rslt = map(self._chunk_compute_R, - tqdm(range(self.n_chunks), desc='Inference')) + self.n_chunks = self.n_jobs * n_chunks + + # Using joblib for parallel processing with a tqdm progress bar + rslt = tqdm(Parallel(n_jobs=self.n_jobs)( + delayed(self._chunk_compute_R)(i) for i in range(self.n_chunks)), + total=self.n_chunks, desc='Inference') rslt_list = list(zip(*rslt)) ENP_j = np.sum(np.array(rslt_list[0]), axis=0) @@ -1666,7 +1671,7 @@ def exact_fit(self): Q = [] I = np.eye(self.n) for j1 in range(self.k): - Aj = GWR(self.coords,self.y,self.X[:,j1].reshape(-1,1),bw=self.bws[j1],hat_matrix=True,constant=False).fit().S + Aj = GWR(self.coords,self.y,self.X[:,j1].reshape(-1,1),bw=self.bws[j1],hat_matrix=True,constant=False,n_jobs=self.n_jobs).fit().S Pj = [] for j2 in range(self.k): if j1 == j2: diff --git a/mgwr/sel_bw.py b/mgwr/sel_bw.py index ac1e2db..53c0692 100755 --- a/mgwr/sel_bw.py +++ b/mgwr/sel_bw.py @@ -6,7 +6,6 @@ import spreg.user_output as USER import numpy as np -import multiprocessing as mp from scipy.spatial.distance import pdist from scipy.optimize import minimize_scalar from spglm.family import Gaussian, Poisson, Binomial @@ -59,6 +58,8 @@ class Sel_BW(object): spherical : boolean True for shperical coordinates (long-lat), False for projected coordinates (defalut). + n_jobs : integer + The number of jobs (default 1) to run in parallel. -1 means using all processors. Attributes ---------- @@ -175,7 +176,7 @@ class Sel_BW(object): def __init__(self, coords, y, X_loc, X_glob=None, family=Gaussian(), offset=None, kernel='bisquare', fixed=False, multi=False, - constant=True, spherical=False): + constant=True, spherical=False,n_jobs=1): self.coords = np.array(coords) self.y = y self.X_loc = X_loc @@ -194,6 +195,7 @@ def __init__(self, coords, y, X_loc, X_glob=None, family=Gaussian(), self._functions = [] self.constant = constant self.spherical = spherical + self.n_jobs = n_jobs self.search_params = {} def search(self, search_method='golden_section', criterion='AICc', @@ -201,7 +203,7 @@ def search(self, search_method='golden_section', criterion='AICc', max_iter=200, init_multi=None, tol_multi=1.0e-5, rss_score=False, max_iter_multi=200, multi_bw_min=[None], multi_bw_max=[None - ], bws_same_times=5, pool=None, verbose=False): + ], bws_same_times=5, verbose=False): """ Method to select one unique bandwidth for a gwr model or a bandwidth vector for a mgwr model. @@ -247,8 +249,6 @@ def search(self, search_method='golden_section', criterion='AICc', bws_same_times : If bandwidths keep the same between iterations for bws_same_times (default 5) in backfitting, then use the current set of bandwidths as final bandwidths. - pool : A multiprocessing Pool object to enbale parallel fitting; - default is None verbose : Boolean If true, bandwidth searching history is printed out; default is False. @@ -268,7 +268,6 @@ def search(self, search_method='golden_section', criterion='AICc', self.bw_min = bw_min self.bw_max = bw_max self.bws_same_times = bws_same_times - self.pool = pool self.verbose = verbose if len(multi_bw_min) == k: @@ -319,14 +318,13 @@ def search(self, search_method='golden_section', criterion='AICc', self._bw() self.sel_hist = self.bw[-1] - self.pool = None return self.bw[0] def _bw(self): gwr_func = lambda bw: getDiag[self.criterion](GWR( self.coords, self.y, self.X_loc, bw, family=self.family, kernel= self.kernel, fixed=self.fixed, constant=self.constant, offset=self. - offset, spherical=self.spherical).fit(lite=True, pool=self.pool)) + offset, spherical=self.spherical, n_jobs=self.n_jobs).fit(lite=True)) self._optimized_function = gwr_func @@ -381,20 +379,20 @@ def _mbw(self): def gwr_func(y, X, bw): return GWR(coords, y, X, bw, family=family, kernel=kernel, fixed=fixed, offset=offset, constant=False, - spherical=self.spherical, hat_matrix=False).fit( - lite=True, pool=self.pool) + spherical=self.spherical, hat_matrix=False,n_jobs=self.n_jobs).fit( + lite=True) def bw_func(y, X): selector = Sel_BW(coords, y, X, X_glob=[], family=family, kernel=kernel, fixed=fixed, offset=offset, - constant=False, spherical=self.spherical) + constant=False, spherical=self.spherical,n_jobs=self.n_jobs) return selector def sel_func(bw_func, bw_min=None, bw_max=None): return bw_func.search( search_method=search_method, criterion=criterion, bw_min=bw_min, bw_max=bw_max, interval=interval, tol=tol, - max_iter=max_iter, pool=self.pool, verbose=False) + max_iter=max_iter, verbose=False) self.bw = multi_bw(self.init_multi, y, X, n, k, family, self.tol_multi, self.max_iter_multi, self.rss_score, gwr_func, diff --git a/mgwr/tests/test_parallel.py b/mgwr/tests/test_parallel.py index 79d8aab..a485cdf 100644 --- a/mgwr/tests/test_parallel.py +++ b/mgwr/tests/test_parallel.py @@ -6,7 +6,6 @@ import libpysal as ps from libpysal import io import numpy as np -import multiprocessing as mp import unittest import pandas from types import SimpleNamespace @@ -16,7 +15,7 @@ from spglm.family import Gaussian, Poisson, Binomial -class TestGWRGaussianPool(unittest.TestCase): +class TestGWRGaussianParallel(unittest.TestCase): def setUp(self): data_path = ps.examples.get_path("GData_utm.csv") data = io.open(data_path) @@ -37,9 +36,8 @@ def setUp(self): MGWR_path = os.path.join( os.path.dirname(__file__), 'georgia_mgwr_results.csv') self.MGWR = pandas.read_csv(MGWR_path) - self.pool = mp.Pool(4) - def test_BS_NN_Pool(self): + def test_BS_NN_Parallel(self): est_Int = self.BS_NN.by_col(' est_Intercept') se_Int = self.BS_NN.by_col(' se_Intercept') t_Int = self.BS_NN.by_col(' t_Intercept') @@ -69,9 +67,9 @@ def test_BS_NN_Pool(self): spat_var_p_vals = [0., 0.0, 0.5, 0.2] model = GWR(self.coords, self.y, self.X, bw=90.000, fixed=False, - sigma2_v1=False) + sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() adj_alpha = rslt.adj_alpha alpha = 0.01017489 @@ -117,14 +115,14 @@ def test_BS_NN_Pool(self): np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00) sel = Sel_BW(self.coords, self.y, self.X) - bw = sel.search(pool=self.pool) - model = GWR(self.coords, self.y, self.X, bw) - result = model.fit(pool=self.pool) + bw = sel.search() + model = GWR(self.coords, self.y, self.X, bw,n_jobs=-1) + result = model.fit() p_vals = result.spatial_variability(sel, 10) np.testing.assert_allclose(spat_var_p_vals, p_vals, rtol=1e-04) - def test_GS_F_Pool(self): + def test_GS_F_Parallel(self): est_Int = self.GS_F.by_col(' est_Intercept') se_Int = self.GS_F.by_col(' se_Intercept') t_Int = self.GS_F.by_col(' t_Intercept') @@ -145,9 +143,9 @@ def test_GS_F_Pool(self): cooksD = np.array(self.GS_F.by_col(' CooksD')).reshape((-1, 1)) model = GWR(self.coords, self.y, self.X, bw=87308.298, - kernel='gaussian', fixed=True, sigma2_v1=False) + kernel='gaussian', fixed=True, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -177,26 +175,25 @@ def test_GS_F_Pool(self): np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04) np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00) - def test_MGWR_Pool(self): + def test_MGWR_Parallel(self): std_y = (self.y - self.y.mean()) / self.y.std() std_X = (self.mgwr_X - self.mgwr_X.mean(axis=0)) / \ self.mgwr_X.std(axis=0) - selector = Sel_BW(self.coords, std_y, std_X, multi=True, constant=True) - selector.search(multi_bw_min=[2], multi_bw_max=[159], pool=self.pool) + selector = Sel_BW(self.coords, std_y, std_X, multi=True, constant=True,n_jobs=-1) + selector.search(multi_bw_min=[2], multi_bw_max=[159]) model = MGWR(self.coords, std_y, std_X, selector=selector, - constant=True) + constant=True,n_jobs=-1) - rslt = model.fit(pool=self.pool) - rslt_2 = model.fit(n_chunks=2, - pool=self.pool) #testing for n_chunks > 1 - rslt_3 = model.fit(n_chunks=3, pool=self.pool) - rslt_20 = model.fit(n_chunks=20, pool=self.pool) + rslt = model.fit() + rslt_2 = model.fit(n_chunks=2) #testing for n_chunks > 1 + rslt_3 = model.fit(n_chunks=3) + rslt_20 = model.fit(n_chunks=20) model_hat = MGWR(self.coords, std_y, std_X, selector=selector, - constant=True, hat_matrix=True) - rslt_hat = model_hat.fit(pool=self.pool) - rslt_hat_2 = model_hat.fit(n_chunks=2, pool=self.pool) + constant=True, hat_matrix=True,n_jobs=-1) + rslt_hat = model_hat.fit() + rslt_hat_2 = model_hat.fit(n_chunks=2) np.testing.assert_allclose(rslt_hat.R, rslt_hat_2.R, atol=1e-07) np.testing.assert_allclose( @@ -275,7 +272,7 @@ def test_Prediction(self): coords_test = coords[test] model = GWR(self.coords, self.y, self.X, 93, family=Gaussian(), - fixed=False, kernel='bisquare', sigma2_v1=False) + fixed=False, kernel='bisquare', sigma2_v1=False,n_jobs=-1) results = model.predict(coords_test, X_test) params = np.array([ @@ -324,7 +321,7 @@ def test_Prediction(self): np.testing.assert_allclose(predictions, results.predictions, rtol=1e-05) - def test_BS_NN_longlat_Pool(self): + def test_BS_NN_longlat_Parallel(self): GA_longlat = os.path.join( os.path.dirname(__file__), 'ga_bs_nn_longlat_listwise.csv') self.BS_NN_longlat = io.open(GA_longlat) @@ -357,9 +354,9 @@ def test_BS_NN_longlat_Pool(self): 1)) model = GWR(coords_longlat, self.y, self.X, bw=90.000, fixed=False, - spherical=True, sigma2_v1=False) + spherical=True, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -392,7 +389,7 @@ def test_BS_NN_longlat_Pool(self): np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00) -class TestGWRPoissonPool(unittest.TestCase): +class TestGWRPoissonParallel(unittest.TestCase): def setUp(self): data_path = os.path.join( os.path.dirname(__file__), 'tokyo/Tokyomortality.csv') @@ -422,9 +419,8 @@ def setUp(self): os.path.join( os.path.dirname(__file__), 'tokyo/tokyo_BS_NN_OFF_listwise.csv')) - self.pool = mp.Pool(4) - def test_BS_F_Pool(self): + def test_BS_F_Parallel(self): est_Int = self.BS_F.by_col(' est_Intercept') se_Int = self.BS_F.by_col(' se_Intercept') t_Int = self.BS_F.by_col(' t_Intercept') @@ -445,9 +441,9 @@ def test_BS_F_Pool(self): model = GWR(self.coords, self.y, self.X, bw=26029.625, family=Poisson(), kernel='bisquare', fixed=True, - sigma2_v1=False) + sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -475,7 +471,7 @@ def test_BS_F_Pool(self): np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05) np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_BS_NN_Pool(self): + def test_BS_NN(self): est_Int = self.BS_NN.by_col(' est_Intercept') se_Int = self.BS_NN.by_col(' se_Intercept') t_Int = self.BS_NN.by_col(' t_Intercept') @@ -495,9 +491,9 @@ def test_BS_NN_Pool(self): pdev = np.array(self.BS_NN.by_col(' localpdev')).reshape((-1, 1)) model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(), - kernel='bisquare', fixed=False, sigma2_v1=False) + kernel='bisquare', fixed=False, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -527,7 +523,7 @@ def test_BS_NN_Pool(self): np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04) np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_BS_NN_Offset_Pool(self): + def test_BS_NN_Offset_Parallel(self): est_Int = self.BS_NN_OFF.by_col(' est_Intercept') se_Int = self.BS_NN_OFF.by_col(' se_Intercept') t_Int = self.BS_NN_OFF.by_col(' t_Intercept') @@ -548,9 +544,9 @@ def test_BS_NN_Offset_Pool(self): model = GWR(self.coords, self.y, self.X, bw=100, offset=self.off, family=Poisson(), kernel='bisquare', fixed=False, - sigma2_v1=False) + sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -595,7 +591,7 @@ def test_BS_NN_Offset_Pool(self): np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-03, atol=1e-02) np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-04, atol=1e-02) - def test_GS_F_Pool(self): + def test_GS_F_Parallel(self): est_Int = self.GS_F.by_col(' est_Intercept') se_Int = self.GS_F.by_col(' se_Intercept') t_Int = self.GS_F.by_col(' t_Intercept') @@ -615,9 +611,9 @@ def test_GS_F_Pool(self): pdev = np.array(self.GS_F.by_col(' localpdev')).reshape((-1, 1)) model = GWR(self.coords, self.y, self.X, bw=8764.474, family=Poisson(), - kernel='gaussian', fixed=True, sigma2_v1=False) + kernel='gaussian', fixed=True, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -644,7 +640,7 @@ def test_GS_F_Pool(self): np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04) np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_GS_NN_Pool(self): + def test_GS_NN_Parallel(self): est_Int = self.GS_NN.by_col(' est_Intercept') se_Int = self.GS_NN.by_col(' se_Intercept') t_Int = self.GS_NN.by_col(' t_Intercept') @@ -664,9 +660,9 @@ def test_GS_NN_Pool(self): pdev = np.array(self.GS_NN.by_col(' localpdev')).reshape((-1, 1)) model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(), - kernel='gaussian', fixed=False, sigma2_v1=False) + kernel='gaussian', fixed=False, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -694,7 +690,7 @@ def test_GS_NN_Pool(self): np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) -class TestGWRBinomialPool(unittest.TestCase): +class TestGWRBinomialParallel(unittest.TestCase): def setUp(self): data_path = os.path.join( os.path.dirname(__file__), 'clearwater/landslides.csv') @@ -724,9 +720,8 @@ def setUp(self): os.path.join( os.path.dirname(__file__), 'clearwater/clearwater_GS_NN_listwise.csv')) - self.pool = mp.Pool(4) - def test_BS_F_Pool(self): + def test_BS_F_Parallel(self): est_Int = self.BS_F.by_col(' est_Intercept') se_Int = self.BS_F.by_col(' se_Intercept') t_Int = self.BS_F.by_col(' t_Intercept') @@ -753,9 +748,9 @@ def test_BS_F_Pool(self): model = GWR(self.coords, self.y, self.X, bw=19642.170, family=Binomial(), kernel='bisquare', fixed=True, - sigma2_v1=False) + sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -791,7 +786,7 @@ def test_BS_F_Pool(self): # code from Jing's python version, which both yield the same #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_BS_NN_Pool(self): + def test_BS_NN_Parallel(self): est_Int = self.BS_NN.by_col(' est_Intercept') se_Int = self.BS_NN.by_col(' se_Intercept') t_Int = self.BS_NN.by_col(' t_Intercept') @@ -817,9 +812,9 @@ def test_BS_NN_Pool(self): pdev = self.BS_NN.by_col(' localpdev') model = GWR(self.coords, self.y, self.X, bw=158, family=Binomial(), - kernel='bisquare', fixed=False, sigma2_v1=False) + kernel='bisquare', fixed=False, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -858,7 +853,7 @@ def test_BS_NN_Pool(self): # code from Jing's python version, which both yield the same #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_GS_F_Pool(self): + def test_GS_F_Parallel(self): est_Int = self.GS_F.by_col(' est_Intercept') se_Int = self.GS_F.by_col(' se_Intercept') t_Int = self.GS_F.by_col(' t_Intercept') @@ -885,9 +880,9 @@ def test_GS_F_Pool(self): model = GWR(self.coords, self.y, self.X, bw=8929.061, family=Binomial(), kernel='gaussian', fixed=True, - sigma2_v1=False) + sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) @@ -923,7 +918,7 @@ def test_GS_F_Pool(self): # code from Jing's python version, which both yield the same #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05) - def test_GS_NN_Pool(self): + def test_GS_NN_Parallel(self): est_Int = self.GS_NN.by_col(' est_Intercept') se_Int = self.GS_NN.by_col(' se_Intercept') t_Int = self.GS_NN.by_col(' t_Intercept') @@ -949,9 +944,9 @@ def test_GS_NN_Pool(self): pdev = self.GS_NN.by_col(' localpdev') model = GWR(self.coords, self.y, self.X, bw=64, family=Binomial(), - kernel='gaussian', fixed=False, sigma2_v1=False) + kernel='gaussian', fixed=False, sigma2_v1=False,n_jobs=-1) - rslt = model.fit(pool=self.pool) + rslt = model.fit() AICc = get_AICc(rslt) AIC = get_AIC(rslt) diff --git a/notebooks/GWR_MGWR_Parallel_Example.ipynb b/notebooks/GWR_MGWR_Parallel_Example.ipynb index 556be7d..89caeb2 100644 --- a/notebooks/GWR_MGWR_Parallel_Example.ipynb +++ b/notebooks/GWR_MGWR_Parallel_Example.ipynb @@ -1,26 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notes:\n", + "- The parallelization has changed its interface, now it is using `joblib` library by specifying a `n_jobs` parameter in ,`GWR()` or `MGWR()` and `Sel_BW()`.\n", + "\n", + "- `mgwr` has soft dependency of `numba`, please install numba if you need better performance (`pip install numba`)." + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Ziqi/anaconda/lib/python3.5/site-packages/libpysal/io/iohandlers/__init__.py:25: UserWarning: SQLAlchemy and Geomet not installed, database I/O disabled\n", - " warnings.warn('SQLAlchemy and Geomet not installed, database I/O disabled')\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "import geopandas as gp\n", "import multiprocessing as mp\n", "import libpysal as ps\n", "import sys\n", - "sys.path.append('/Users/Ziqi/Desktop/mgwr/')\n", + "\n", + "sys.path.append('/Users/ziqili/Desktop/mgwr-2')\n", "from mgwr.gwr import GWR,MGWR\n", "from mgwr.sel_bw import Sel_BW" ] @@ -50,34 +52,16 @@ "b_coords = list(zip(u, v))" ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: OMP_NUM_THREADS=1\n" - ] - } - ], - "source": [ - "#This might be needed to turn off the OpenMP multi-threading\n", - "%env OMP_NUM_THREADS = 1" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### GWR No Parallel" + "### GWR No Parallel (n_jobs=1)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -85,8 +69,8 @@ "output_type": "stream", "text": [ "192.0\n", - "CPU times: user 13.9 s, sys: 116 ms, total: 14 s\n", - "Wall time: 14.2 s\n" + "CPU times: user 56.2 s, sys: 1.22 s, total: 57.4 s\n", + "Wall time: 5.38 s\n" ] } ], @@ -95,51 +79,79 @@ "gwr_selector = Sel_BW(b_coords, b_y, b_X)\n", "gwr_bw = gwr_selector.search()\n", "print(gwr_bw)\n", - "gwr_results = GWR(b_coords, b_y, b_X, gwr_bw).fit()" + "gwr_results = GWR(b_coords, b_y, b_X, gwr_bw,n_jobs=1).fit() #n_jobs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### MGWR No Parallel" + "### MGWR No Parallel (n_jobs=1)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6b17cdb5d94444769b5396723ca103e6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Backfitting: 0%| | 0/200 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from time import time\n", + "import matplotlib.pyplot as plt\n", + "\n", + "n_jobs_values = np.arange(1,13)\n", + "performance_times_avg = []\n", + "\n", + "for n_jobs in n_jobs_values:\n", + " times = []\n", + " for _ in range(5): \n", + " start_time = time()\n", + "\n", + " gwr_selector = Sel_BW(b_coords, b_y, b_X,n_jobs=n_jobs) #n_jobs\n", + " gwr_bw = gwr_selector.search()\n", + " gwr_results = GWR(b_coords, b_y, b_X, gwr_bw,n_jobs=n_jobs).fit() #n_jobs\n", + "\n", + " end_time = time()\n", + " times.append(end_time - start_time)\n", + " average_time = sum(times) / len(times)\n", + " performance_times_avg.append(average_time)\n", + "\n", + "# Plotting the results\n", + "plt.plot(n_jobs_values, performance_times_avg,marker='o')\n", + "plt.xlabel('n_jobs')\n", + "plt.ylabel('Time Taken (seconds)')\n", + "plt.title('GWR Performance with Different n_jobs Values')" ] }, { @@ -183,38 +266,1819 @@ "execution_count": 9, "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "26f25e3470be4f14bfffdcc2fcdf213b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Backfitting: 0%| | 0/200 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plotting the results\n", + "plt.plot(n_jobs_values, performance_times_avg,marker='o')\n", + "plt.xlabel('n_jobs')\n", + "plt.ylabel('Time Taken (seconds)')\n", + "plt.title('MGWR Performance with Different n_jobs Values')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -228,7 +2092,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.11.5" } }, "nbformat": 4,