Skip to content

Commit

Permalink
[pre-commit.ci] pre-commit autoupdate (#789)
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] authored Jan 13, 2025
1 parent 306da82 commit f6ce66e
Show file tree
Hide file tree
Showing 11 changed files with 120 additions and 110 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ repos:
- id: check-added-large-files
exclude: pixi.lock
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.30.0
rev: 0.31.0
hooks:
- id: check-github-workflows
args: ["--verbose"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.6
rev: v0.9.1
hooks:
- id: ruff
args: ["--fix", "--output-format=full"]
Expand Down
42 changes: 21 additions & 21 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,13 +1341,13 @@ def ccv(
fit.ccv(treatment="D", pk=0.05, qk=0.5, n_splits=8, seed=123).head()
```
"""
assert (
self._supports_cluster_causal_variance
), "The model does not support the causal cluster variance estimator."
assert self._supports_cluster_causal_variance, (
"The model does not support the causal cluster variance estimator."
)
assert isinstance(treatment, str), "treatment must be a string."
assert (
isinstance(cluster, str) or cluster is None
), "cluster must be a string or None."
assert isinstance(cluster, str) or cluster is None, (
"cluster must be a string or None."
)
assert isinstance(seed, int) or seed is None, "seed must be an integer or None."
assert isinstance(n_splits, int), "n_splits must be an integer."
assert isinstance(pk, (int, float)) and 0 <= pk <= 1
Expand Down Expand Up @@ -1398,9 +1398,9 @@ def ccv(
data = self._data
Y = self._Y.flatten()
W = data[treatment].to_numpy()
assert np.all(
np.isin(W, [0, 1])
), "Treatment variable must be binary with values 0 and 1"
assert np.all(np.isin(W, [0, 1])), (
"Treatment variable must be binary with values 0 and 1"
)
X = self._X
cluster_vec = data[cluster].to_numpy()
unique_clusters = np.unique(cluster_vec)
Expand Down Expand Up @@ -1950,8 +1950,8 @@ def tidy(
"Std. Error": _se,
"t value": _tstat,
"Pr(>|t|)": _pvalue,
f"{lb*100:.1f}%": _conf_int[0],
f"{ub*100:.1f}%": _conf_int[1],
f"{lb * 100:.1f}%": _conf_int[0],
f"{ub * 100:.1f}%": _conf_int[1],
}
)

Expand Down Expand Up @@ -2117,8 +2117,8 @@ def confint(

df = pd.DataFrame(
{
f"{alpha / 2*100:.1f}%": lb,
f"{(1-alpha / 2)*100:.1f}%": ub,
f"{alpha / 2 * 100:.1f}%": lb,
f"{(1 - alpha / 2) * 100:.1f}%": ub,
}
)
# df = pd.DataFrame({f"{alpha / 2}%": lb, f"{1-alpha / 2}%": ub})
Expand Down Expand Up @@ -2349,8 +2349,8 @@ def ritest(
)

alpha = 1 - level
ci_lower_name = str(f"{alpha/2*100:.1f}% (Pr(>|t|))")
ci_upper_name = str(f"{(1-alpha/2)*100:.1f}% (Pr(>|t|))")
ci_lower_name = str(f"{alpha / 2 * 100:.1f}% (Pr(>|t|))")
ci_upper_name = str(f"{(1 - alpha / 2) * 100:.1f}% (Pr(>|t|))")
res[ci_lower_name] = ci_pvalue[0]
res[ci_upper_name] = ci_pvalue[1]

Expand Down Expand Up @@ -2648,17 +2648,17 @@ def _check_vcov_input(vcov: Union[str, dict[str, str]], data: pd.DataFrame):
"CRV1",
"CRV3",
], "vcov dict key must be CRV1 or CRV3"
assert isinstance(
next(iter(vcov.values())), str
), "vcov dict value must be a string"
assert isinstance(next(iter(vcov.values())), str), (
"vcov dict value must be a string"
)
deparse_vcov = next(iter(vcov.values())).split("+")
assert len(deparse_vcov) <= 2, "not more than twoway clustering is supported"

if isinstance(vcov, list):
assert all(isinstance(v, str) for v in vcov), "vcov list must contain strings"
assert all(
v in data.columns for v in vcov
), "vcov list must contain columns in the data"
assert all(v in data.columns for v in vcov), (
"vcov list must contain columns in the data"
)
if isinstance(vcov, str):
assert vcov in [
"iid",
Expand Down
76 changes: 39 additions & 37 deletions pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,17 @@ def etable(
"""
if signif_code is None:
signif_code = [0.001, 0.01, 0.05]
assert (
isinstance(signif_code, list) and len(signif_code) == 3
), "signif_code must be a list of length 3"
assert isinstance(signif_code, list) and len(signif_code) == 3, (
"signif_code must be a list of length 3"
)
if signif_code:
assert all(
[0 < i < 1 for i in signif_code]
), "All values of signif_code must be between 0 and 1"
assert all([0 < i < 1 for i in signif_code]), (
"All values of signif_code must be between 0 and 1"
)
if signif_code:
assert (
signif_code[0] < signif_code[1] < signif_code[2]
), "signif_code must be in increasing order"
assert signif_code[0] < signif_code[1] < signif_code[2], (
"signif_code must be in increasing order"
)

models = _post_processing_input_checks(models)

Expand All @@ -166,12 +166,12 @@ def etable(
if custom_stats:
assert isinstance(custom_stats, dict), "custom_stats must be a dict"
for key in custom_stats:
assert isinstance(
custom_stats[key], list
), "custom_stats values must be a list"
assert len(custom_stats[key]) == len(
models
), f"custom_stats {key} must have the same number as models"
assert isinstance(custom_stats[key], list), (
"custom_stats values must be a list"
)
assert len(custom_stats[key]) == len(models), (
f"custom_stats {key} must have the same number as models"
)

assert type in [
"df",
Expand All @@ -182,9 +182,9 @@ def etable(
], "type must be either 'df', 'md', 'html', 'gt' or 'tex'"

if model_heads is not None:
assert len(model_heads) == len(
models
), "model_heads must have the same length as models"
assert len(model_heads) == len(models), (
"model_heads must have the same length as models"
)

# Check if head_order is allowed string & remove h when no model_heads provided
assert head_order in [
Expand All @@ -203,9 +203,9 @@ def etable(
for stat, values in custom_model_stats.items():
assert isinstance(stat, str), "custom_model_stats keys must be strings"
assert isinstance(values, list), "custom_model_stats values must lists"
assert len(values) == len(
models
), "lists in custom_model_stats values must have the same length as models"
assert len(values) == len(models), (
"lists in custom_model_stats values must have the same length as models"
)

dep_var_list = []
nobs_list = []
Expand Down Expand Up @@ -336,9 +336,11 @@ def etable(
_number_formatter, **kwargs
)
elif element in custom_stats:
assert (
len(custom_stats[element][i]) == len(model_tidy_df["Estimate"])
), f"custom_stats {element} has unequal length to the number of coefficients in model_tidy_df {i}"
assert len(custom_stats[element][i]) == len(
model_tidy_df["Estimate"]
), (
f"custom_stats {element} has unequal length to the number of coefficients in model_tidy_df {i}"
)
model_tidy_df[coef_fmt_title] += pd.Series(
custom_stats[element][i]
).apply(_number_formatter, **kwargs)
Expand All @@ -357,7 +359,7 @@ def etable(
model_tidy_df,
id_vars=["Coefficient"],
var_name="Metric",
value_name=f"est{i+1}",
value_name=f"est{i + 1}",
)
model_tidy_df = model_tidy_df.drop("Metric", axis=1).set_index("Coefficient")
etable_list.append(model_tidy_df)
Expand Down Expand Up @@ -833,9 +835,9 @@ def make_table(
A table in the specified format.
"""
assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
assert (
not isinstance(df.index, pd.MultiIndex) or df.index.nlevels <= 2
), "Row index can have at most two levels."
assert not isinstance(df.index, pd.MultiIndex) or df.index.nlevels <= 2, (
"Row index can have at most two levels."
)
assert type in ["gt", "tex"], "type must be either 'gt' or 'tex'."
assert rgroup_sep in [
"tb",
Expand Down Expand Up @@ -1250,16 +1252,16 @@ def dtable(
if stats_labels is None:
stats_labels = {}
assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
assert all(
pd.api.types.is_numeric_dtype(df[var]) for var in vars
), "Variables must be numerical."
assert all(pd.api.types.is_numeric_dtype(df[var]) for var in vars), (
"Variables must be numerical."
)
assert type in ["gt", "tex", "df"], "type must be either 'gt' or 'tex' or 'df'."
assert (
byrow is None or byrow in df.columns
), "byrow must be a column in the DataFrame."
assert bycol is None or all(
col in df.columns for col in bycol
), "bycol must be a list of columns in the DataFrame."
assert byrow is None or byrow in df.columns, (
"byrow must be a column in the DataFrame."
)
assert bycol is None or all(col in df.columns for col in bycol), (
"bycol must be a list of columns in the DataFrame."
)

# Default stats labels dictionary
stats_dict = {
Expand Down
16 changes: 10 additions & 6 deletions pyfixest/report/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def _coefplot_lets_plot(
width=0.05,
position=position_dodge(0.5),
)
+ ylab(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
+ ylab(rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval")
)

if flip_coord:
Expand Down Expand Up @@ -544,7 +544,9 @@ def _coefplot_matplotlib(
ax.axvline(x=yintercept, color="black", linestyle="--")
if xintercept is not None:
ax.axhline(y=xintercept, color="black", linestyle="--")
ax.set_xlabel(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
ax.set_xlabel(
rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval"
)
ax.set_ylabel("Coefficient")
ax.set_yticks(range(len(unique_coefficients)))
ax.set_yticklabels(unique_coefficients)
Expand All @@ -553,7 +555,9 @@ def _coefplot_matplotlib(
ax.axhline(y=yintercept, color="black", linestyle="--")
if xintercept is not None:
ax.axvline(x=xintercept, color="black", linestyle="--")
ax.set_ylabel(rf"Estimate and {round((1-alpha)*100, 1)}% Confidence Interval")
ax.set_ylabel(
rf"Estimate and {round((1 - alpha) * 100, 1)}% Confidence Interval"
)
ax.set_xlabel("Coefficient")
ax.set_xticks(range(len(unique_coefficients)))
ax.set_xticklabels(unique_coefficients)
Expand Down Expand Up @@ -596,10 +600,10 @@ def _get_model_df(
A tidy model frame.
"""
df_model = fxst.tidy(alpha=alpha).reset_index() # Coefficient -> simple column
df_model["fml"] = f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}%"
df_model["fml"] = f"{fxst._model_name_plot}: {(1 - alpha) * 100:.1f}%"

if joint in ["both", True]:
lb, ub = f"{alpha / 2*100:.1f}%", f"{(1 - alpha / 2)*100:.1f}%"
lb, ub = f"{alpha / 2 * 100:.1f}%", f"{(1 - alpha / 2) * 100:.1f}%"
df_joint = fxst.confint(joint=True, alpha=alpha, seed=seed)
df_joint.reset_index(inplace=True)
df_joint = df_joint.rename(columns={"index": "Coefficient"})
Expand All @@ -609,7 +613,7 @@ def _get_model_df(
.merge(df_joint, on="Coefficient", how="left")
)
df_joint_full["fml"] = (
f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}% joint CIs"
f"{fxst._model_name_plot}: {(1 - alpha) * 100:.1f}% joint CIs"
)
if joint == "both":
df_model = pd.concat([df_model, df_joint_full], axis=0)
Expand Down
4 changes: 3 additions & 1 deletion tests/test_confint.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def test_against_doubleml():
np.c_[y, X], columns=["y"] + ["X_" + str(x) for x in range(n_vars)]
)
m = feols(
f"y ~ -1 + {'+'.join(['X_'+str(x) for x in range(n_vars)])}", df, vcov="hetero"
f"y ~ -1 + {'+'.join(['X_' + str(x) for x in range(n_vars)])}",
df,
vcov="hetero",
)
pyfixest_res = m.confint(keep="X_.$", reps=10_000, joint=True)

Expand Down
18 changes: 9 additions & 9 deletions tests/test_decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,16 @@ def decompose_and_compare(
lower_diff = filtered_df.xs(g)["CI Lower"] - ci[g][0]
upper_diff = filtered_df.xs(g)["CI Upper"] - ci[g][1]

assert np.all(
np.abs(coef_diff) < 1e-6
), f"Failed for {g} with values {filtered_df.xs(g).Coefficient} and {contribution_dict[g]}"
assert np.all(np.abs(coef_diff) < 1e-6), (
f"Failed for {g} with values {filtered_df.xs(g).Coefficient} and {contribution_dict[g]}"
)
if False:
assert np.all(
np.abs(lower_diff) < 1e-4
), f"Failed for {g} with values {filtered_df.xs(g)['CI Lower']} and {ci[g][0]}"
assert np.all(
np.abs(upper_diff) < 1e-4
), f"Failed for {g} with values {filtered_df.xs(g)['CI Upper']} and {ci[g][1]}"
assert np.all(np.abs(lower_diff) < 1e-4), (
f"Failed for {g} with values {filtered_df.xs(g)['CI Lower']} and {ci[g][0]}"
)
assert np.all(np.abs(upper_diff) < 1e-4), (
f"Failed for {g} with values {filtered_df.xs(g)['CI Upper']} and {ci[g][1]}"
)

# Agg 1: Heteroskedastic SE
decompose_and_compare(
Expand Down
36 changes: 18 additions & 18 deletions tests/test_event_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ def test_event_study_twfe(data):

twfe_feols = pf.feols("dep_var ~ treat | state + year", data=data)

assert np.allclose(
twfe.coef().values, twfe_feols.coef().values
), "TWFE coefficients are not the same."
assert np.allclose(
twfe.se().values, twfe_feols.se().values
), "TWFE standard errors are not the same."
assert np.allclose(
twfe.pvalue().values, twfe_feols.pvalue().values
), "TWFE p-values are not the same."
assert np.allclose(twfe.coef().values, twfe_feols.coef().values), (
"TWFE coefficients are not the same."
)
assert np.allclose(twfe.se().values, twfe_feols.se().values), (
"TWFE standard errors are not the same."
)
assert np.allclose(twfe.pvalue().values, twfe_feols.pvalue().values), (
"TWFE p-values are not the same."
)

# TODO - minor difference, likely due to how z statistic is
# calculated
Expand Down Expand Up @@ -63,15 +63,15 @@ def test_event_study_did2s(data):
cluster="state",
)

assert np.allclose(
event_study_did2s.coef().values, fit_did2s.coef().values
), "DID2S coefficients are not the same."
assert np.allclose(
event_study_did2s.se().values, fit_did2s.se().values
), "DID2S standard errors are not the same."
assert np.allclose(
event_study_did2s.pvalue().values, fit_did2s.pvalue().values
), "DID2S p-values are not the same."
assert np.allclose(event_study_did2s.coef().values, fit_did2s.coef().values), (
"DID2S coefficients are not the same."
)
assert np.allclose(event_study_did2s.se().values, fit_did2s.se().values), (
"DID2S standard errors are not the same."
)
assert np.allclose(event_study_did2s.pvalue().values, fit_did2s.pvalue().values), (
"DID2S p-values are not the same."
)
assert np.allclose(
event_study_did2s.confint().values, fit_did2s.confint().values
), "DID2S confidence intervals are not the same."
Expand Down
6 changes: 3 additions & 3 deletions tests/test_feols_compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ def test_different_seed():
fit4 = pf.feols("Y ~ f1", data=data, use_compression=True, seed=125, reps=1000)

assert np.allclose(fit3.coef().xs("f1"), fit4.coef().xs("f1")), "Error in seed"
assert np.all(
np.abs(fit3.se().xs("f1") / fit4.se().xs("f1")) < RTOL_BOOT
), "Error in se"
assert np.all(np.abs(fit3.se().xs("f1") / fit4.se().xs("f1")) < RTOL_BOOT), (
"Error in se"
)
assert np.all(
np.abs(fit3.pvalue().xs("f1") / fit4.pvalue().xs("f1")) < RTOL_BOOT
), "Error in pvalue"
Loading

0 comments on commit f6ce66e

Please sign in to comment.