Skip to content

Commit

Permalink
Update pre-commit (#162)
Browse files Browse the repository at this point in the history
* Update pre-commit configuration
* Black formatting
* Fix Flake8 warnings
* Fix mypy issues
  • Loading branch information
PGijsbers authored Aug 2, 2022
1 parent 1bed842 commit 565348d
Show file tree
Hide file tree
Showing 64 changed files with 407 additions and 350 deletions.
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
repos:
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 22.6.0
hooks:
- id: black
language_version: python3.8
language_version: python3.10
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.761
rev: v0.971
hooks:
- id: mypy
files: gama/*
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.9
files: gama/.*
- repo: https://github.com/pycqa/flake8
rev: 5.0.3
hooks:
- id: flake8
files: gama/*
files: gama/.*
12 changes: 6 additions & 6 deletions gama/GamaClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class GamaClassifier(Gama):
""" Gama with adaptations for (multi-class) classification. """
"""Gama with adaptations for (multi-class) classification."""

def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
if not config:
Expand All @@ -38,7 +38,7 @@ def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
super().__init__(*args, **kwargs, config=config, scoring=scoring)

def _predict(self, x: pd.DataFrame):
""" Predict the target for input X.
"""Predict the target for input X.
Parameters
----------
Expand All @@ -57,7 +57,7 @@ def _predict(self, x: pd.DataFrame):
return y

def _predict_proba(self, x: pd.DataFrame):
""" Predict the class probabilities for input x.
"""Predict the class probabilities for input x.
Predict target for x, using the best found pipeline(s) during the `fit` call.
Expand All @@ -75,7 +75,7 @@ def _predict_proba(self, x: pd.DataFrame):
return self.model.predict_proba(x) # type: ignore

def predict_proba(self, x: Union[pd.DataFrame, np.ndarray]):
""" Predict the class probabilities for input x.
"""Predict the class probabilities for input x.
Predict target for x, using the best found pipeline(s) during the `fit` call.
Expand All @@ -99,7 +99,7 @@ def predict_proba_from_file(
target_column: Optional[str] = None,
encoding: Optional[str] = None,
):
""" Predict the class probabilities for input in the arff_file.
"""Predict the class probabilities for input in the arff_file.
Parameters
----------
Expand All @@ -124,7 +124,7 @@ def predict_proba_from_file(
return self._predict_proba(x)

def fit(self, x, y, *args, **kwargs):
""" Should use base class documentation. """
"""Should use base class documentation."""
y_ = y.squeeze() if isinstance(y, pd.DataFrame) else y
self._label_encoder = LabelEncoder().fit(y_)
if any([isinstance(yi, str) for yi in y_]):
Expand Down
4 changes: 2 additions & 2 deletions gama/GamaRegressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class GamaRegressor(Gama):
""" Gama with adaptations for regression. """
"""Gama with adaptations for regression."""

def __init__(self, config=None, scoring="neg_mean_squared_error", *args, **kwargs):
""" """
Expand All @@ -17,7 +17,7 @@ def __init__(self, config=None, scoring="neg_mean_squared_error", *args, **kwarg
super().__init__(*args, **kwargs, config=config, scoring=scoring)

def _predict(self, x: pd.DataFrame):
""" Predict the target for input X.
"""Predict the target for input X.
Parameters
----------
Expand Down
59 changes: 29 additions & 30 deletions gama/configuration/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from gama.genetic_programming.components import Primitive, Terminal, DATA_TERMINAL


def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict[str, List], Dict[str, Callable]]:
""" Create a pset for the given configuration dictionary.
def pset_from_config(
configuration: Dict[Union[str, object], Any]
) -> Tuple[Dict[str, List], Dict[str, Callable]]:
"""Create a pset for the given configuration dictionary.
Given a configuration dictionary specifying operators (e.g. sklearn
estimators), their hyperparameters and values for each hyperparameter,
Expand All @@ -19,12 +21,13 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
Side effect: Imports the classes of each primitive.
returns:
pset - Dict[str, List]: maps return-types to a list of Primitives and/or Terminals
parameter_check - Dict[str, Callable]:
maps Primitive name to a function which verifies the correct initialization of hyperparameters.
pset - Dict[str, List]:
maps return-types to a list of Primitives and/or Terminals
parameter_check - Dict[str, Callable]:
maps Primitive name to a check for the validity of the hp configuration
"""

pset = defaultdict(list)
pset: Dict[str, List[Union[Primitive, Terminal]]] = defaultdict(list)
parameter_checks = {}

# Make sure the str-keys are evaluated first, they describe shared hyperparameters.
Expand All @@ -36,9 +39,9 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
# Specification of shared hyperparameters
for value in values:
pset[key].append(Terminal(value=value, output=key, identifier=key))
elif isinstance(key, object):
elif isinstance(key, type):
# Specification of operator (learner, preprocessor)
hyperparameter_types = []
hyperparameter_types: List[str] = []
for name, param_values in sorted(values.items()):
# We construct a new type for each hyperparameter, so we can specify
# it as terminal type, making sure it matches with expected
Expand All @@ -56,41 +59,37 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
hyperparameter_types.append(hp_name)
for value in param_values:
pset[hp_name].append(
Terminal(value=value, output=name, identifier=hp_name,)
Terminal(
value=value,
output=name,
identifier=hp_name,
)
)

# After registering the hyperparameter types,
# we can register the operator itself.
transformer_tags = [
"DATA_PREPROCESSING",
"FEATURE_SELECTION",
"DATA_TRANSFORMATION",
]
if issubclass(key, sklearn.base.TransformerMixin) or (
hasattr(key, "metadata")
and key.metadata.query()["primitive_family"] in transformer_tags
):
if issubclass(key, sklearn.base.TransformerMixin):
pset[DATA_TERMINAL].append(
Primitive(
input=hyperparameter_types, output=DATA_TERMINAL, identifier=key
input=tuple(hyperparameter_types),
output=DATA_TERMINAL,
identifier=key,
)
)
elif issubclass(key, sklearn.base.ClassifierMixin) or (
hasattr(key, "metadata")
and key.metadata.query()["primitive_family"] == "CLASSIFICATION"
):
elif issubclass(key, sklearn.base.ClassifierMixin):
pset["prediction"].append(
Primitive(
input=hyperparameter_types, output="prediction", identifier=key
input=tuple(hyperparameter_types),
output="prediction",
identifier=key,
)
)
elif issubclass(key, sklearn.base.RegressorMixin) or (
hasattr(key, "metadata")
and key.metadata.query()["primitive_family"] == "REGRESSION"
):
elif issubclass(key, sklearn.base.RegressorMixin):
pset["prediction"].append(
Primitive(
input=hyperparameter_types, output="prediction", identifier=key
input=tuple(hyperparameter_types),
output="prediction",
identifier=key,
)
)
else:
Expand All @@ -108,7 +107,7 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict


def merge_configurations(c1: Dict, c2: Dict) -> Dict:
""" Takes two configurations and merges them together. """
"""Takes two configurations and merges them together."""
# Should refactor out 6 indentation levels
merged: Dict[Any, Any] = defaultdict(lambda: None, c1)
for algorithm, hparams2 in c2.items():
Expand Down
2 changes: 1 addition & 1 deletion gama/dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def build_app():


def create_generic_layout():
""" Creates the generic layout of tabs and their content pages. """
"""Creates the generic layout of tabs and their content pages."""
tab_banner_style = {
"border-top-left-radius": "3px",
"background-color": "#f9f9f9",
Expand Down
2 changes: 1 addition & 1 deletion gama/dashboard/components/cli_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def enqueue_output(out, queue_: queue.Queue):


class CLIWindow:
""" A Component for Dash App which simulates a console window """
"""A Component for Dash App which simulates a console window"""

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion gama/dashboard/components/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def markdown_header(text: str, level: int = 4, with_horizontal_rule: bool = True


class CollapsableSection:
""" A Form with a ButtonHeader which when presses collapses/expands the Form. """
"""A Form with a ButtonHeader which when presses collapses/expands the Form."""

def __init__(
self, header: str, controls: List[dbc.FormGroup], start_open: bool = True
Expand Down
3 changes: 2 additions & 1 deletion gama/dashboard/pages/analysispage.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ def build_page(self, app, controller):
)

self._content = html.Div(
id=self.id, children=[visualization_container, report_select_container],
id=self.id,
children=[visualization_container, report_select_container],
)

app.callback(
Expand Down
4 changes: 2 additions & 2 deletions gama/dashboard/pages/base_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

class BasePage(abc.ABC):
def __init__(self, name: str, alignment: int, starts_hidden: bool = False):
""" Defines the basic behavior of a page.
"""Defines the basic behavior of a page.
Parameters
----------
Expand Down Expand Up @@ -31,5 +31,5 @@ def content(self):

@abc.abstractmethod
def build_page(self, app, controller):
""" Populate the `content` field with html, register any callbacks. """
"""Populate the `content` field with html, register any callbacks."""
raise NotImplementedError
4 changes: 2 additions & 2 deletions gama/dashboard/pages/homepage.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def build_page(self, app, controller):
self._register_callbacks(app)

def _build_content(self, app, controller) -> html.Div:
""" Build all the components of the page. """
"""Build all the components of the page."""
configuration = build_configuration_menu(app, controller)
configuration.style["width"] = "35%"
configuration.style["float"] = "left"
Expand Down Expand Up @@ -163,7 +163,7 @@ def text_input(label_text: str, default_text: str, id_: str):
def dropdown(
label_text: str, id_: str, options: Dict[str, str], value: Optional[str] = None
):
""" options formatted as {LABEL_KEY: LABEL_TEXT, ...} """
"""options formatted as {LABEL_KEY: LABEL_TEXT, ...}"""
return dbc.FormGroup(
[
dbc.Label(label_text, html_for=id_, width=6),
Expand Down
5 changes: 4 additions & 1 deletion gama/dashboard/pages/runningpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ def plot_area(self):
id="evaluation-graph",
figure={
"data": [],
"layout": dict(hovermode="closest", transition={"duration": 500},),
"layout": dict(
hovermode="closest",
transition={"duration": 500},
),
},
)
return html.Div(
Expand Down
14 changes: 7 additions & 7 deletions gama/dashboard/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
# elif aggregate == "aggregate":
# plots = aggregate_best_over_time(aggregate_df, first_metric_max)
layout = dict(
title=f"Best score over time",
title="Best score over time",
xaxis=dict(title="time (s)"),
yaxis=dict(title=f"max {first_metric}"),
hovermode="closest",
Expand Down Expand Up @@ -84,7 +84,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
# go.Bar(x=size_ratio.index.values, y=size_ratio.values, name=method)
# )
layout = dict(
title=f"Ratio of pipelines by size",
title="Ratio of pipelines by size",
xaxis=dict(title="pipeline length"),
yaxis=dict(title="pipeline count"),
)
Expand All @@ -103,7 +103,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
)
)
layout = dict(
title=f"Ratio of pipelines by learner",
title="Ratio of pipelines by learner",
xaxis=dict(title="pipeline length"),
yaxis=dict(title="learner"),
)
Expand All @@ -119,7 +119,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
# ].duration.dt.total_seconds()
# plots.append(go.Histogram(x=time_s, name=method))
layout = dict(
title=f"Pipeline Evaluation Times",
title="Pipeline Evaluation Times",
xaxis=dict(title="duration (s)"),
yaxis=dict(title="count"),
)
Expand All @@ -133,7 +133,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
go.Bar(x=count_by_rung.rung, y=count_by_rung.n, name=report.name)
)
layout = dict(
title=f"#Evaluations by Rung",
title="#Evaluations by Rung",
xaxis=dict(title="rung"),
yaxis=dict(title="count"),
)
Expand All @@ -152,7 +152,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
)
)
layout = dict(
title=f"Time spent per Rung",
title="Time spent per Rung",
xaxis=dict(title="rung"),
yaxis=dict(title="time (s)"),
)
Expand Down Expand Up @@ -247,7 +247,7 @@ def aggregate_best_over_time(aggregate: pd.DataFrame, y_axis: str):


def aggregate_plot(aggregate: pd.DataFrame, x_axis: str, y_axis: str):
""" Create an aggregate plot over multiple reports.
"""Create an aggregate plot over multiple reports.
Aggregates the mean and std of `y_axis` by `x_axis`.
Expand Down
8 changes: 4 additions & 4 deletions gama/data_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def series_looks_categorical(series) -> bool:


def infer_categoricals_inplace(df: pd.DataFrame) -> None:
""" Use simple heuristics to convert columns guessed to be categorical. """
"""Use simple heuristics to convert columns guessed to be categorical."""
for column in df:
if series_looks_categorical(df[column]):
df[column] = df[column].astype("category")
Expand All @@ -36,7 +36,7 @@ def numpy_to_dataframe(x: np.ndarray) -> pd.DataFrame:
def format_y(
y: Union[pd.DataFrame, pd.Series, np.ndarray], y_type: Type = pd.Series
) -> Union[pd.DataFrame, pd.Series]:
""" Transforms a target vector or indicator matrix to a single series (or 1d df) """
"""Transforms a target vector or indicator matrix to a single series (or 1d df)"""
if not isinstance(y, (np.ndarray, pd.Series, pd.DataFrame)):
raise TypeError("y must be np.ndarray, pd.Series or pd.DataFrame.")
if y_type not in [pd.Series, pd.DataFrame]:
Expand All @@ -63,7 +63,7 @@ def format_y(
def remove_unlabeled_rows(
x: pd.DataFrame, y: Union[pd.Series, pd.DataFrame]
) -> Tuple[pd.DataFrame, Union[pd.Series, pd.DataFrame]]:
""" Removes all rows from x and y where y is nan. """
"""Removes all rows from x and y where y is nan."""
if isinstance(y, pd.DataFrame):
unlabeled = y.iloc[:, 0].isnull()
else:
Expand All @@ -84,7 +84,7 @@ def format_x_y(
y_type: Type = pd.Series,
remove_unlabeled: bool = True,
) -> Tuple[pd.DataFrame, Union[pd.DataFrame, pd.Series]]:
""" Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
"""Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
Parameters
----------
Expand Down
Loading

0 comments on commit 565348d

Please sign in to comment.