Skip to content

Commit

Permalink
Merge pull request EducationalTestingService#118 from nineil/feature/…
Browse files Browse the repository at this point in the history
…add_sgd

Add SGD
  • Loading branch information
dan-blanchard committed Jun 12, 2014
2 parents 29a3a3a + 7bbe8da commit 3f25150
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
34 changes: 26 additions & 8 deletions skll/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
GradientBoostingRegressor, RandomForestClassifier,
RandomForestRegressor)
from sklearn.linear_model import (ElasticNet, Lasso, LinearRegression,
LogisticRegression, Ridge)
LogisticRegression, Ridge, SGDClassifier,
SGDRegressor)
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC, SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from skll.data import ExamplesTuple
from skll.metrics import _CORRELATION_METRICS, use_score_func
from skll.version import VERSION
Expand Down Expand Up @@ -74,15 +74,26 @@
'ElasticNet': [{'alpha': [0.01, 0.1, 1.0, 10.0,
100.0]}],
'SVR': [{'C': [0.01, 0.1, 1.0, 10.0, 100.0]}],
'LinearRegression': [{}]}
'LinearRegression': [{}],
'SGDClassifier': [{'alpha': [0.000001, 0.00001, 0.0001,
0.001, 0.01],
'penalty': ['l1', 'l2',
'elasticnet']}],
'SGDRegressor': [{'alpha': [0.000001, 0.00001, 0.0001,
0.001, 0.01],
'penalty': ['l1', 'l2',
'elasticnet']}]}

_REGRESSION_MODELS = frozenset(['DecisionTreeRegressor', 'ElasticNet',
'GradientBoostingRegressor', 'Lasso',
'LinearRegression', 'RandomForestRegressor',
'Ridge', 'SVR'])
'Ridge', 'SVR', 'SGDRegressor'])

_REQUIRES_DENSE = frozenset(['DecisionTreeClassifier', 'DecisionTreeRegressor',
'GradientBoostingClassifier',
'GradientBoostingRegressor', 'MultinomialNB',
'RandomForestClassifier', 'RandomForestRegressor'])

MAX_CONCURRENT_PROCESSES = int(os.getenv('SKLL_MAX_CONCURRENT_PROCESSES', '5'))


Expand Down Expand Up @@ -301,6 +312,7 @@ def init(self, constrain=True, rescale=True, **kwargs):
# Return modified class
return cls


# Rescaled regressors
@rescaled
class RescaledDecisionTreeRegressor(DecisionTreeRegressor):
Expand Down Expand Up @@ -342,6 +354,11 @@ class RescaledSVR(SVR):
pass


@rescaled
class RescaledSGDRegressor(SGDRegressor):
pass


class Learner(object):
"""
A simpler learner interface around many scikit-learn classification
Expand Down Expand Up @@ -427,13 +444,15 @@ def __init__(self, model_type, probability=False, feature_scaling='none',
elif self._model_type == 'SVR':
self._model_kwargs['cache_size'] = 1000
self._model_kwargs['kernel'] = 'linear'

elif self._model_type == 'SGDClassifier':
self._model_kwargs['loss'] = 'log'
if self._model_type in {'RandomForestClassifier', 'LinearSVC',
'LogisticRegression', 'DecisionTreeClassifier',
'GradientBoostingClassifier',
'GradientBoostingRegressor',
'DecisionTreeRegressor',
'RandomForestRegressor'}:
'RandomForestRegressor', 'SGDClassifier',
'SGDRegressor'}:
self._model_kwargs['random_state'] = 123456789

if model_kwargs:
Expand Down Expand Up @@ -523,7 +542,7 @@ def model_params(self):
for feat, idx in iteritems(self.feat_vectorizer.vocabulary_):
if coef[idx]:
res[feat] = correction * coef[idx]
#res[feat] = coef[idx]
# res[feat] = coef[idx]
elif isinstance(self._model, BaseLibLinear):
label_list = self.label_list

Expand Down Expand Up @@ -677,7 +696,6 @@ def _train_setup(self, examples):
with_mean=False,
with_std=False)


def train(self, examples, param_grid=None, grid_search_folds=5,
grid_search=True, grid_objective='f1_score_micro',
grid_jobs=None, shuffle=True):
Expand Down
2 changes: 1 addition & 1 deletion tests/configs/test_regression1.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ task=evaluate
[Input]
train_location=
featuresets=[["test_regression1"]]
learners = ['DecisionTreeRegressor', 'GradientBoostingRegressor', 'RandomForestRegressor', 'Ridge', 'SVR', 'RescaledDecisionTreeRegressor', 'RescaledGradientBoostingRegressor', 'RescaledRandomForestRegressor', 'RescaledRidge', 'RescaledSVR', 'Lasso', 'RescaledLasso', 'LinearRegression', 'RescaledLinearRegression', 'ElasticNet', 'RescaledElasticNet']
learners = ['DecisionTreeRegressor', 'GradientBoostingRegressor', 'RandomForestRegressor', 'Ridge', 'SVR', 'RescaledDecisionTreeRegressor', 'RescaledGradientBoostingRegressor', 'RescaledRandomForestRegressor', 'RescaledRidge', 'RescaledSVR', 'Lasso', 'RescaledLasso', 'LinearRegression', 'RescaledLinearRegression', 'ElasticNet', 'RescaledElasticNet', 'SGDRegressor', 'RescaledSGDRegressor']
suffix=.jsonlines

[Tuning]
Expand Down
2 changes: 1 addition & 1 deletion tests/configs/test_sparse.template.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ task=evaluate
train_location=
test_location=
featuresets=[["test_sparse"]]
learners=['LogisticRegression', 'LinearSVC', 'SVC', 'MultinomialNB', 'DecisionTreeClassifier', 'RandomForestClassifier', 'GradientBoostingClassifier']
learners=['LogisticRegression', 'LinearSVC', 'SVC', 'MultinomialNB', 'DecisionTreeClassifier', 'RandomForestClassifier', 'GradientBoostingClassifier', 'SGDClassifier']
suffix=.jsonlines

[Tuning]
Expand Down

0 comments on commit 3f25150

Please sign in to comment.