[pre-commit.ci] pre-commit autoupdate (#789)

py-econometrics · Jan 13, 2025 · f6ce66e · f6ce66e
1 parent 306da82
commit f6ce66e
Show file tree

Hide file tree

Showing 11 changed files with 120 additions and 110 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,13 +14,13 @@ repos:
       - id: check-added-large-files
         exclude: pixi.lock
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.30.0
+    rev: 0.31.0
     hooks:
       - id: check-github-workflows
         args: ["--verbose"]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.9.1
     hooks:
       - id: ruff
         args: ["--fix", "--output-format=full"]

diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -1341,13 +1341,13 @@ def ccv(
         fit.ccv(treatment="D", pk=0.05, qk=0.5, n_splits=8, seed=123).head()
         ```
         """
-        assert (
-            self._supports_cluster_causal_variance
-        ), "The model does not support the causal cluster variance estimator."
+        assert self._supports_cluster_causal_variance, (
+            "The model does not support the causal cluster variance estimator."
+        )
         assert isinstance(treatment, str), "treatment must be a string."
-        assert (
-            isinstance(cluster, str) or cluster is None
-        ), "cluster must be a string or None."
+        assert isinstance(cluster, str) or cluster is None, (
+            "cluster must be a string or None."
+        )
         assert isinstance(seed, int) or seed is None, "seed must be an integer or None."
         assert isinstance(n_splits, int), "n_splits must be an integer."
         assert isinstance(pk, (int, float)) and 0 <= pk <= 1
@@ -1398,9 +1398,9 @@ def ccv(
         data = self._data
         Y = self._Y.flatten()
         W = data[treatment].to_numpy()
-        assert np.all(
-            np.isin(W, [0, 1])
-        ), "Treatment variable must be binary with values 0 and 1"
+        assert np.all(np.isin(W, [0, 1])), (
+            "Treatment variable must be binary with values 0 and 1"
+        )
         X = self._X
         cluster_vec = data[cluster].to_numpy()
         unique_clusters = np.unique(cluster_vec)
@@ -1950,8 +1950,8 @@ def tidy(
                 "Std. Error": _se,
                 "t value": _tstat,
                 "Pr(>|t|)": _pvalue,
-                f"{lb*100:.1f}%": _conf_int[0],
-                f"{ub*100:.1f}%": _conf_int[1],
+                f"{lb * 100:.1f}%": _conf_int[0],
+                f"{ub * 100:.1f}%": _conf_int[1],
             }
         )
 
@@ -2117,8 +2117,8 @@ def confint(
 
         df = pd.DataFrame(
             {
-                f"{alpha / 2*100:.1f}%": lb,
-                f"{(1-alpha / 2)*100:.1f}%": ub,
+                f"{alpha / 2 * 100:.1f}%": lb,
+                f"{(1 - alpha / 2) * 100:.1f}%": ub,
             }
         )
         # df = pd.DataFrame({f"{alpha / 2}%": lb, f"{1-alpha / 2}%": ub})
@@ -2349,8 +2349,8 @@ def ritest(
         )
 
         alpha = 1 - level
-        ci_lower_name = str(f"{alpha/2*100:.1f}% (Pr(>|t|))")
-        ci_upper_name = str(f"{(1-alpha/2)*100:.1f}% (Pr(>|t|))")
+        ci_lower_name = str(f"{alpha / 2 * 100:.1f}% (Pr(>|t|))")
+        ci_upper_name = str(f"{(1 - alpha / 2) * 100:.1f}% (Pr(>|t|))")
         res[ci_lower_name] = ci_pvalue[0]
         res[ci_upper_name] = ci_pvalue[1]
 
@@ -2648,17 +2648,17 @@ def _check_vcov_input(vcov: Union[str, dict[str, str]], data: pd.DataFrame):
             "CRV1",
             "CRV3",
         ], "vcov dict key must be CRV1 or CRV3"
-        assert isinstance(
-            next(iter(vcov.values())), str
-        ), "vcov dict value must be a string"
+        assert isinstance(next(iter(vcov.values())), str), (
+            "vcov dict value must be a string"
+        )
         deparse_vcov = next(iter(vcov.values())).split("+")
         assert len(deparse_vcov) <= 2, "not more than twoway clustering is supported"
 
     if isinstance(vcov, list):
         assert all(isinstance(v, str) for v in vcov), "vcov list must contain strings"
-        assert all(
-            v in data.columns for v in vcov
-        ), "vcov list must contain columns in the data"
+        assert all(v in data.columns for v in vcov), (
+            "vcov list must contain columns in the data"
+        )
     if isinstance(vcov, str):
         assert vcov in [
             "iid",

diff --git a/pyfixest/report/summarize.py b/pyfixest/report/summarize.py
@@ -142,17 +142,17 @@ def etable(
     """
     if signif_code is None:
         signif_code = [0.001, 0.01, 0.05]
-    assert (
-        isinstance(signif_code, list) and len(signif_code) == 3
-    ), "signif_code must be a list of length 3"
+    assert isinstance(signif_code, list) and len(signif_code) == 3, (
+        "signif_code must be a list of length 3"
+    )
     if signif_code:
-        assert all(
-            [0 < i < 1 for i in signif_code]
-        ), "All values of signif_code must be between 0 and 1"
+        assert all([0 < i < 1 for i in signif_code]), (
+            "All values of signif_code must be between 0 and 1"
+        )
     if signif_code:
-        assert (
-            signif_code[0] < signif_code[1] < signif_code[2]
-        ), "signif_code must be in increasing order"
+        assert signif_code[0] < signif_code[1] < signif_code[2], (
+            "signif_code must be in increasing order"
+        )
 
     models = _post_processing_input_checks(models)
 
@@ -166,12 +166,12 @@ def etable(
     if custom_stats:
         assert isinstance(custom_stats, dict), "custom_stats must be a dict"
         for key in custom_stats:
-            assert isinstance(
-                custom_stats[key], list
-            ), "custom_stats values must be a list"
-            assert len(custom_stats[key]) == len(
-                models
-            ), f"custom_stats {key} must have the same number as models"
+            assert isinstance(custom_stats[key], list), (
+                "custom_stats values must be a list"
+            )
+            assert len(custom_stats[key]) == len(models), (
+                f"custom_stats {key} must have the same number as models"
+            )
 
     assert type in [
         "df",
@@ -182,9 +182,9 @@ def etable(
     ], "type must be either 'df', 'md', 'html', 'gt' or 'tex'"
 
     if model_heads is not None:
-        assert len(model_heads) == len(
-            models
-        ), "model_heads must have the same length as models"
+        assert len(model_heads) == len(models), (
+            "model_heads must have the same length as models"
+        )
 
     # Check if head_order is allowed string & remove h when no model_heads provided
     assert head_order in [
@@ -203,9 +203,9 @@ def etable(
         for stat, values in custom_model_stats.items():
             assert isinstance(stat, str), "custom_model_stats keys must be strings"
             assert isinstance(values, list), "custom_model_stats values must lists"
-            assert len(values) == len(
-                models
-            ), "lists in custom_model_stats values must have the same length as models"
+            assert len(values) == len(models), (
+                "lists in custom_model_stats values must have the same length as models"
+            )
 
     dep_var_list = []
     nobs_list = []
@@ -336,9 +336,11 @@ def etable(
                     _number_formatter, **kwargs
                 )
             elif element in custom_stats:
-                assert (
-                    len(custom_stats[element][i]) == len(model_tidy_df["Estimate"])
-                ), f"custom_stats {element} has unequal length to the number of coefficients in model_tidy_df {i}"
+                assert len(custom_stats[element][i]) == len(
+                    model_tidy_df["Estimate"]
+                ), (
+                    f"custom_stats {element} has unequal length to the number of coefficients in model_tidy_df {i}"
+                )
                 model_tidy_df[coef_fmt_title] += pd.Series(
                     custom_stats[element][i]
                 ).apply(_number_formatter, **kwargs)
@@ -357,7 +359,7 @@ def etable(
             model_tidy_df,
             id_vars=["Coefficient"],
             var_name="Metric",
-            value_name=f"est{i+1}",
+            value_name=f"est{i + 1}",
         )
         model_tidy_df = model_tidy_df.drop("Metric", axis=1).set_index("Coefficient")
         etable_list.append(model_tidy_df)
@@ -833,9 +835,9 @@ def make_table(
     A table in the specified format.
     """
     assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
-    assert (
-        not isinstance(df.index, pd.MultiIndex) or df.index.nlevels <= 2
-    ), "Row index can have at most two levels."
+    assert not isinstance(df.index, pd.MultiIndex) or df.index.nlevels <= 2, (
+        "Row index can have at most two levels."
+    )
     assert type in ["gt", "tex"], "type must be either 'gt' or 'tex'."
     assert rgroup_sep in [
         "tb",
@@ -1250,16 +1252,16 @@ def dtable(
     if stats_labels is None:
         stats_labels = {}
     assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
-    assert all(
-        pd.api.types.is_numeric_dtype(df[var]) for var in vars
-    ), "Variables must be numerical."
+    assert all(pd.api.types.is_numeric_dtype(df[var]) for var in vars), (
+        "Variables must be numerical."
+    )
     assert type in ["gt", "tex", "df"], "type must be either 'gt' or 'tex' or 'df'."
-    assert (
-        byrow is None or byrow in df.columns
-    ), "byrow must be a column in the DataFrame."
-    assert bycol is None or all(
-        col in df.columns for col in bycol
-    ), "bycol must be a list of columns in the DataFrame."
+    assert byrow is None or byrow in df.columns, (
+        "byrow must be a column in the DataFrame."
+    )
+    assert bycol is None or all(col in df.columns for col in bycol), (
+        "bycol must be a list of columns in the DataFrame."
+    )
 
     # Default stats labels dictionary
     stats_dict = {

diff --git a/pyfixest/report/visualize.py b/pyfixest/report/visualize.py
@@ -411,7 +411,7 @@ def _coefplot_lets_plot(
             width=0.05,
             position=position_dodge(0.5),
         )
-        + ylab(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
+        + ylab(rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval")
     )
 
     if flip_coord:
@@ -544,7 +544,9 @@ def _coefplot_matplotlib(
         ax.axvline(x=yintercept, color="black", linestyle="--")
         if xintercept is not None:
             ax.axhline(y=xintercept, color="black", linestyle="--")
-        ax.set_xlabel(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
+        ax.set_xlabel(
+            rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval"
+        )
         ax.set_ylabel("Coefficient")
         ax.set_yticks(range(len(unique_coefficients)))
         ax.set_yticklabels(unique_coefficients)
@@ -553,7 +555,9 @@ def _coefplot_matplotlib(
         ax.axhline(y=yintercept, color="black", linestyle="--")
         if xintercept is not None:
             ax.axvline(x=xintercept, color="black", linestyle="--")
-        ax.set_ylabel(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
+        ax.set_ylabel(
+            rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval"
+        )
         ax.set_xlabel("Coefficient")
         ax.set_xticks(range(len(unique_coefficients)))
         ax.set_xticklabels(unique_coefficients)
@@ -596,10 +600,10 @@ def _get_model_df(
         A tidy model frame.
     """
     df_model = fxst.tidy(alpha=alpha).reset_index()  # Coefficient -> simple column
-    df_model["fml"] = f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}%"
+    df_model["fml"] = f"{fxst._model_name_plot}: {(1 - alpha) * 100:.1f}%"
 
     if joint in ["both", True]:
-        lb, ub = f"{alpha / 2*100:.1f}%", f"{(1 - alpha / 2)*100:.1f}%"
+        lb, ub = f"{alpha / 2 * 100:.1f}%", f"{(1 - alpha / 2) * 100:.1f}%"
         df_joint = fxst.confint(joint=True, alpha=alpha, seed=seed)
         df_joint.reset_index(inplace=True)
         df_joint = df_joint.rename(columns={"index": "Coefficient"})
@@ -609,7 +613,7 @@ def _get_model_df(
             .merge(df_joint, on="Coefficient", how="left")
         )
         df_joint_full["fml"] = (
-            f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}% joint CIs"
+            f"{fxst._model_name_plot}: {(1 - alpha) * 100:.1f}% joint CIs"
         )
         if joint == "both":
             df_model = pd.concat([df_model, df_joint_full], axis=0)

diff --git a/tests/test_confint.py b/tests/test_confint.py
@@ -71,7 +71,9 @@ def test_against_doubleml():
         np.c_[y, X], columns=["y"] + ["X_" + str(x) for x in range(n_vars)]
     )
     m = feols(
-        f"y ~ -1 + {'+'.join(['X_'+str(x) for x in range(n_vars)])}", df, vcov="hetero"
+        f"y ~ -1 + {'+'.join(['X_' + str(x) for x in range(n_vars)])}",
+        df,
+        vcov="hetero",
     )
     pyfixest_res = m.confint(keep="X_.$", reps=10_000, joint=True)
 

diff --git a/tests/test_decomposition.py b/tests/test_decomposition.py
@@ -140,16 +140,16 @@ def decompose_and_compare(
             lower_diff = filtered_df.xs(g)["CI Lower"] - ci[g][0]
             upper_diff = filtered_df.xs(g)["CI Upper"] - ci[g][1]
 
-            assert np.all(
-                np.abs(coef_diff) < 1e-6
-            ), f"Failed for {g} with values {filtered_df.xs(g).Coefficient} and {contribution_dict[g]}"
+            assert np.all(np.abs(coef_diff) < 1e-6), (
+                f"Failed for {g} with values {filtered_df.xs(g).Coefficient} and {contribution_dict[g]}"
+            )
             if False:
-                assert np.all(
-                    np.abs(lower_diff) < 1e-4
-                ), f"Failed for {g} with values {filtered_df.xs(g)['CI Lower']} and {ci[g][0]}"
-                assert np.all(
-                    np.abs(upper_diff) < 1e-4
-                ), f"Failed for {g} with values {filtered_df.xs(g)['CI Upper']} and {ci[g][1]}"
+                assert np.all(np.abs(lower_diff) < 1e-4), (
+                    f"Failed for {g} with values {filtered_df.xs(g)['CI Lower']} and {ci[g][0]}"
+                )
+                assert np.all(np.abs(upper_diff) < 1e-4), (
+                    f"Failed for {g} with values {filtered_df.xs(g)['CI Upper']} and {ci[g][1]}"
+                )
 
     # Agg 1: Heteroskedastic SE
     decompose_and_compare(

diff --git a/tests/test_event_study.py b/tests/test_event_study.py
@@ -25,15 +25,15 @@ def test_event_study_twfe(data):
 
     twfe_feols = pf.feols("dep_var ~ treat | state + year", data=data)
 
-    assert np.allclose(
-        twfe.coef().values, twfe_feols.coef().values
-    ), "TWFE coefficients are not the same."
-    assert np.allclose(
-        twfe.se().values, twfe_feols.se().values
-    ), "TWFE standard errors are not the same."
-    assert np.allclose(
-        twfe.pvalue().values, twfe_feols.pvalue().values
-    ), "TWFE p-values are not the same."
+    assert np.allclose(twfe.coef().values, twfe_feols.coef().values), (
+        "TWFE coefficients are not the same."
+    )
+    assert np.allclose(twfe.se().values, twfe_feols.se().values), (
+        "TWFE standard errors are not the same."
+    )
+    assert np.allclose(twfe.pvalue().values, twfe_feols.pvalue().values), (
+        "TWFE p-values are not the same."
+    )
 
     # TODO - minor difference, likely due to how z statistic is
     # calculated
@@ -63,15 +63,15 @@ def test_event_study_did2s(data):
         cluster="state",
     )
 
-    assert np.allclose(
-        event_study_did2s.coef().values, fit_did2s.coef().values
-    ), "DID2S coefficients are not the same."
-    assert np.allclose(
-        event_study_did2s.se().values, fit_did2s.se().values
-    ), "DID2S standard errors are not the same."
-    assert np.allclose(
-        event_study_did2s.pvalue().values, fit_did2s.pvalue().values
-    ), "DID2S p-values are not the same."
+    assert np.allclose(event_study_did2s.coef().values, fit_did2s.coef().values), (
+        "DID2S coefficients are not the same."
+    )
+    assert np.allclose(event_study_did2s.se().values, fit_did2s.se().values), (
+        "DID2S standard errors are not the same."
+    )
+    assert np.allclose(event_study_did2s.pvalue().values, fit_did2s.pvalue().values), (
+        "DID2S p-values are not the same."
+    )
     assert np.allclose(
         event_study_did2s.confint().values, fit_did2s.confint().values
     ), "DID2S confidence intervals are not the same."

diff --git a/tests/test_feols_compressed.py b/tests/test_feols_compressed.py
@@ -162,9 +162,9 @@ def test_different_seed():
     fit4 = pf.feols("Y ~ f1", data=data, use_compression=True, seed=125, reps=1000)
 
     assert np.allclose(fit3.coef().xs("f1"), fit4.coef().xs("f1")), "Error in seed"
-    assert np.all(
-        np.abs(fit3.se().xs("f1") / fit4.se().xs("f1")) < RTOL_BOOT
-    ), "Error in se"
+    assert np.all(np.abs(fit3.se().xs("f1") / fit4.se().xs("f1")) < RTOL_BOOT), (
+        "Error in se"
+    )
     assert np.all(
         np.abs(fit3.pvalue().xs("f1") / fit4.pvalue().xs("f1")) < RTOL_BOOT
     ), "Error in pvalue"