Update pre-commit (#162)

* Update pre-commit configuration * Black formatting * Fix Flake8 warnings * Fix mypy issues
openml-labs · Aug 2, 2022 · 565348d · 565348d
1 parent 1bed842
commit 565348d
Show file tree

Hide file tree

Showing 64 changed files with 407 additions and 350 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,16 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 22.6.0
     hooks:
       - id: black
-        language_version: python3.8
+        language_version: python3.10
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v0.971
     hooks:
       - id: mypy
-        files: gama/*
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.9
+        files: gama/.*
+  - repo: https://github.com/pycqa/flake8
+    rev: 5.0.3
     hooks:
       - id: flake8
-        files: gama/*
+        files: gama/.*
diff --git a/gama/GamaClassifier.py b/gama/GamaClassifier.py
@@ -13,7 +13,7 @@
 
 
 class GamaClassifier(Gama):
-    """ Gama with adaptations for (multi-class) classification. """
+    """Gama with adaptations for (multi-class) classification."""
 
     def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
         if not config:
@@ -38,7 +38,7 @@ def __init__(self, config=None, scoring="neg_log_loss", *args, **kwargs):
         super().__init__(*args, **kwargs, config=config, scoring=scoring)
 
     def _predict(self, x: pd.DataFrame):
-        """ Predict the target for input X.
+        """Predict the target for input X.
 
         Parameters
         ----------
@@ -57,7 +57,7 @@ def _predict(self, x: pd.DataFrame):
         return y
 
     def _predict_proba(self, x: pd.DataFrame):
-        """ Predict the class probabilities for input x.
+        """Predict the class probabilities for input x.
 
         Predict target for x, using the best found pipeline(s) during the `fit` call.
 
@@ -75,7 +75,7 @@ def _predict_proba(self, x: pd.DataFrame):
         return self.model.predict_proba(x)  # type: ignore
 
     def predict_proba(self, x: Union[pd.DataFrame, np.ndarray]):
-        """ Predict the class probabilities for input x.
+        """Predict the class probabilities for input x.
 
         Predict target for x, using the best found pipeline(s) during the `fit` call.
 
@@ -99,7 +99,7 @@ def predict_proba_from_file(
         target_column: Optional[str] = None,
         encoding: Optional[str] = None,
     ):
-        """ Predict the class probabilities for input in the arff_file.
+        """Predict the class probabilities for input in the arff_file.
 
         Parameters
         ----------
@@ -124,7 +124,7 @@ def predict_proba_from_file(
         return self._predict_proba(x)
 
     def fit(self, x, y, *args, **kwargs):
-        """ Should use base class documentation. """
+        """Should use base class documentation."""
         y_ = y.squeeze() if isinstance(y, pd.DataFrame) else y
         self._label_encoder = LabelEncoder().fit(y_)
         if any([isinstance(yi, str) for yi in y_]):

diff --git a/gama/GamaRegressor.py b/gama/GamaRegressor.py
@@ -5,7 +5,7 @@
 
 
 class GamaRegressor(Gama):
-    """ Gama with adaptations for regression. """
+    """Gama with adaptations for regression."""
 
     def __init__(self, config=None, scoring="neg_mean_squared_error", *args, **kwargs):
         """ """
@@ -17,7 +17,7 @@ def __init__(self, config=None, scoring="neg_mean_squared_error", *args, **kwarg
         super().__init__(*args, **kwargs, config=config, scoring=scoring)
 
     def _predict(self, x: pd.DataFrame):
-        """ Predict the target for input X.
+        """Predict the target for input X.
 
         Parameters
         ----------

diff --git a/gama/configuration/parser.py b/gama/configuration/parser.py
@@ -6,8 +6,10 @@
 from gama.genetic_programming.components import Primitive, Terminal, DATA_TERMINAL
 
 
-def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict[str, List], Dict[str, Callable]]:
-    """ Create a pset for the given configuration dictionary.
+def pset_from_config(
+    configuration: Dict[Union[str, object], Any]
+) -> Tuple[Dict[str, List], Dict[str, Callable]]:
+    """Create a pset for the given configuration dictionary.
 
     Given a configuration dictionary specifying operators (e.g. sklearn
     estimators), their hyperparameters and values for each hyperparameter,
@@ -19,12 +21,13 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
     Side effect: Imports the classes of each primitive.
 
     returns:
-        pset - Dict[str, List]: maps return-types to a list of Primitives and/or Terminals
-        parameter_check - Dict[str, Callable]: 
-            maps Primitive name to a function which verifies the correct initialization of hyperparameters.
+        pset - Dict[str, List]:
+            maps return-types to a list of Primitives and/or Terminals
+        parameter_check - Dict[str, Callable]:
+            maps Primitive name to a check for the validity of the hp configuration
     """
 
-    pset = defaultdict(list)
+    pset: Dict[str, List[Union[Primitive, Terminal]]] = defaultdict(list)
     parameter_checks = {}
 
     # Make sure the str-keys are evaluated first, they describe shared hyperparameters.
@@ -36,9 +39,9 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
             # Specification of shared hyperparameters
             for value in values:
                 pset[key].append(Terminal(value=value, output=key, identifier=key))
-        elif isinstance(key, object):
+        elif isinstance(key, type):
             # Specification of operator (learner, preprocessor)
-            hyperparameter_types = []
+            hyperparameter_types: List[str] = []
             for name, param_values in sorted(values.items()):
                 # We construct a new type for each hyperparameter, so we can specify
                 # it as terminal type, making sure it matches with expected
@@ -56,41 +59,37 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
                     hyperparameter_types.append(hp_name)
                     for value in param_values:
                         pset[hp_name].append(
-                            Terminal(value=value, output=name, identifier=hp_name,)
+                            Terminal(
+                                value=value,
+                                output=name,
+                                identifier=hp_name,
+                            )
                         )
 
             # After registering the hyperparameter types,
             # we can register the operator itself.
-            transformer_tags = [
-                "DATA_PREPROCESSING",
-                "FEATURE_SELECTION",
-                "DATA_TRANSFORMATION",
-            ]
-            if issubclass(key, sklearn.base.TransformerMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] in transformer_tags
-            ):
+            if issubclass(key, sklearn.base.TransformerMixin):
                 pset[DATA_TERMINAL].append(
                     Primitive(
-                        input=hyperparameter_types, output=DATA_TERMINAL, identifier=key
+                        input=tuple(hyperparameter_types),
+                        output=DATA_TERMINAL,
+                        identifier=key,
                     )
                 )
-            elif issubclass(key, sklearn.base.ClassifierMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] == "CLASSIFICATION"
-            ):
+            elif issubclass(key, sklearn.base.ClassifierMixin):
                 pset["prediction"].append(
                     Primitive(
-                        input=hyperparameter_types, output="prediction", identifier=key
+                        input=tuple(hyperparameter_types),
+                        output="prediction",
+                        identifier=key,
                     )
                 )
-            elif issubclass(key, sklearn.base.RegressorMixin) or (
-                hasattr(key, "metadata")
-                and key.metadata.query()["primitive_family"] == "REGRESSION"
-            ):
+            elif issubclass(key, sklearn.base.RegressorMixin):
                 pset["prediction"].append(
                     Primitive(
-                        input=hyperparameter_types, output="prediction", identifier=key
+                        input=tuple(hyperparameter_types),
+                        output="prediction",
+                        identifier=key,
                     )
                 )
             else:
@@ -108,7 +107,7 @@ def pset_from_config(configuration: Dict[Union[str, object], Any]) -> Tuple[Dict
 
 
 def merge_configurations(c1: Dict, c2: Dict) -> Dict:
-    """ Takes two configurations and merges them together. """
+    """Takes two configurations and merges them together."""
     # Should refactor out 6 indentation levels
     merged: Dict[Any, Any] = defaultdict(lambda: None, c1)
     for algorithm, hparams2 in c2.items():

diff --git a/gama/dashboard/app.py b/gama/dashboard/app.py
@@ -53,7 +53,7 @@ def build_app():
 
 
 def create_generic_layout():
-    """ Creates the generic layout of tabs and their content pages. """
+    """Creates the generic layout of tabs and their content pages."""
     tab_banner_style = {
         "border-top-left-radius": "3px",
         "background-color": "#f9f9f9",

diff --git a/gama/dashboard/components/cli_window.py b/gama/dashboard/components/cli_window.py
@@ -19,7 +19,7 @@ def enqueue_output(out, queue_: queue.Queue):
 
 
 class CLIWindow:
-    """ A Component for Dash App which simulates a console window """
+    """A Component for Dash App which simulates a console window"""
 
     def __init__(
         self,

diff --git a/gama/dashboard/components/headers.py b/gama/dashboard/components/headers.py
@@ -21,7 +21,7 @@ def markdown_header(text: str, level: int = 4, with_horizontal_rule: bool = True
 
 
 class CollapsableSection:
-    """ A Form with a ButtonHeader which when presses collapses/expands the Form. """
+    """A Form with a ButtonHeader which when presses collapses/expands the Form."""
 
     def __init__(
         self, header: str, controls: List[dbc.FormGroup], start_open: bool = True

diff --git a/gama/dashboard/pages/analysispage.py b/gama/dashboard/pages/analysispage.py
@@ -156,7 +156,8 @@ def build_page(self, app, controller):
         )
 
         self._content = html.Div(
-            id=self.id, children=[visualization_container, report_select_container],
+            id=self.id,
+            children=[visualization_container, report_select_container],
         )
 
         app.callback(

diff --git a/gama/dashboard/pages/base_page.py b/gama/dashboard/pages/base_page.py
@@ -3,7 +3,7 @@
 
 class BasePage(abc.ABC):
     def __init__(self, name: str, alignment: int, starts_hidden: bool = False):
-        """ Defines the basic behavior of a page.
+        """Defines the basic behavior of a page.
 
         Parameters
         ----------
@@ -31,5 +31,5 @@ def content(self):
 
     @abc.abstractmethod
     def build_page(self, app, controller):
-        """ Populate the `content` field with html, register any callbacks. """
+        """Populate the `content` field with html, register any callbacks."""
         raise NotImplementedError
diff --git a/gama/dashboard/pages/homepage.py b/gama/dashboard/pages/homepage.py
@@ -25,7 +25,7 @@ def build_page(self, app, controller):
             self._register_callbacks(app)
 
     def _build_content(self, app, controller) -> html.Div:
-        """ Build all the components of the page. """
+        """Build all the components of the page."""
         configuration = build_configuration_menu(app, controller)
         configuration.style["width"] = "35%"
         configuration.style["float"] = "left"
@@ -163,7 +163,7 @@ def text_input(label_text: str, default_text: str, id_: str):
 def dropdown(
     label_text: str, id_: str, options: Dict[str, str], value: Optional[str] = None
 ):
-    """ options formatted as {LABEL_KEY: LABEL_TEXT, ...} """
+    """options formatted as {LABEL_KEY: LABEL_TEXT, ...}"""
     return dbc.FormGroup(
         [
             dbc.Label(label_text, html_for=id_, width=6),

diff --git a/gama/dashboard/pages/runningpage.py b/gama/dashboard/pages/runningpage.py
@@ -191,7 +191,10 @@ def plot_area(self):
             id="evaluation-graph",
             figure={
                 "data": [],
-                "layout": dict(hovermode="closest", transition={"duration": 500},),
+                "layout": dict(
+                    hovermode="closest",
+                    transition={"duration": 500},
+                ),
             },
         )
         return html.Div(

diff --git a/gama/dashboard/plotting.py b/gama/dashboard/plotting.py
@@ -38,7 +38,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
         # elif aggregate == "aggregate":
         #     plots = aggregate_best_over_time(aggregate_df, first_metric_max)
         layout = dict(
-            title=f"Best score over time",
+            title="Best score over time",
             xaxis=dict(title="time (s)"),
             yaxis=dict(title=f"max {first_metric}"),
             hovermode="closest",
@@ -84,7 +84,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
         #            go.Bar(x=size_ratio.index.values, y=size_ratio.values, name=method)
         #        )
         layout = dict(
-            title=f"Ratio of pipelines by size",
+            title="Ratio of pipelines by size",
             xaxis=dict(title="pipeline length"),
             yaxis=dict(title="pipeline count"),
         )
@@ -103,7 +103,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
                 )
             )
         layout = dict(
-            title=f"Ratio of pipelines by learner",
+            title="Ratio of pipelines by learner",
             xaxis=dict(title="pipeline length"),
             yaxis=dict(title="learner"),
         )
@@ -119,7 +119,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
         #         ].duration.dt.total_seconds()
         #         plots.append(go.Histogram(x=time_s, name=method))
         layout = dict(
-            title=f"Pipeline Evaluation Times",
+            title="Pipeline Evaluation Times",
             xaxis=dict(title="duration (s)"),
             yaxis=dict(title="count"),
         )
@@ -133,7 +133,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
                     go.Bar(x=count_by_rung.rung, y=count_by_rung.n, name=report.name)
                 )
         layout = dict(
-            title=f"#Evaluations by Rung",
+            title="#Evaluations by Rung",
             xaxis=dict(title="rung"),
             yaxis=dict(title="count"),
         )
@@ -152,7 +152,7 @@ def plot_preset_graph(reports: List[GamaReport], preset: Optional[str]):
                     )
                 )
         layout = dict(
-            title=f"Time spent per Rung",
+            title="Time spent per Rung",
             xaxis=dict(title="rung"),
             yaxis=dict(title="time (s)"),
         )
@@ -247,7 +247,7 @@ def aggregate_best_over_time(aggregate: pd.DataFrame, y_axis: str):
 
 
 def aggregate_plot(aggregate: pd.DataFrame, x_axis: str, y_axis: str):
-    """ Create an aggregate plot over multiple reports.
+    """Create an aggregate plot over multiple reports.
 
      Aggregates the mean and std of `y_axis` by `x_axis`.
 

diff --git a/gama/data_formatting.py b/gama/data_formatting.py
@@ -20,7 +20,7 @@ def series_looks_categorical(series) -> bool:
 
 
 def infer_categoricals_inplace(df: pd.DataFrame) -> None:
-    """ Use simple heuristics to convert columns guessed to be categorical. """
+    """Use simple heuristics to convert columns guessed to be categorical."""
     for column in df:
         if series_looks_categorical(df[column]):
             df[column] = df[column].astype("category")
@@ -36,7 +36,7 @@ def numpy_to_dataframe(x: np.ndarray) -> pd.DataFrame:
 def format_y(
     y: Union[pd.DataFrame, pd.Series, np.ndarray], y_type: Type = pd.Series
 ) -> Union[pd.DataFrame, pd.Series]:
-    """ Transforms a target vector or indicator matrix to a single series (or 1d df) """
+    """Transforms a target vector or indicator matrix to a single series (or 1d df)"""
     if not isinstance(y, (np.ndarray, pd.Series, pd.DataFrame)):
         raise TypeError("y must be np.ndarray, pd.Series or pd.DataFrame.")
     if y_type not in [pd.Series, pd.DataFrame]:
@@ -63,7 +63,7 @@ def format_y(
 def remove_unlabeled_rows(
     x: pd.DataFrame, y: Union[pd.Series, pd.DataFrame]
 ) -> Tuple[pd.DataFrame, Union[pd.Series, pd.DataFrame]]:
-    """ Removes all rows from x and y where y is nan. """
+    """Removes all rows from x and y where y is nan."""
     if isinstance(y, pd.DataFrame):
         unlabeled = y.iloc[:, 0].isnull()
     else:
@@ -84,7 +84,7 @@ def format_x_y(
     y_type: Type = pd.Series,
     remove_unlabeled: bool = True,
 ) -> Tuple[pd.DataFrame, Union[pd.DataFrame, pd.Series]]:
-    """ Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
+    """Take (X,y) data and convert it to a (pd.DataFrame, pd.Series) tuple.
 
     Parameters
     ----------