From ca05552b952e6c46094dcf0710abbcbda2a2afc9 Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Mon, 24 Feb 2020 22:06:49 +0100 Subject: [PATCH 1/6] more warnings --- gettsim/checks.py | 31 +++++++++++++++++-- gettsim/tax_transfer.py | 6 ++-- .../tests/test_calculation_tax_transfer.py | 2 +- .../tests/test_data/test_dfs_tax_transfer.csv | 30 +++++++++--------- 4 files changed, 47 insertions(+), 22 deletions(-) diff --git a/gettsim/checks.py b/gettsim/checks.py index c5c8ccbc4..192ff6b3d 100644 --- a/gettsim/checks.py +++ b/gettsim/checks.py @@ -1,3 +1,28 @@ -def check_boolean(df, variable_list): - for variable in variable_list: - assert df[variable].dtype == bool +def check_data(df): + bool_variables = ["child", "east", "female"] + for variable in bool_variables: + try: + assert df[variable].dtype == bool + except TypeError: + print(f"{variable} is not of type boolean.") + + positive_vars = ["mietstufe", "wohnfl", "miete"] + for var in positive_vars: + try: + print(var) + print(df[var].min()) + assert df[var].min() > 0 + except ValueError: + print(f"{var} must be strictly positive.") + print(df[var].describe()) + + try: + assert df.notna().all() + except ValueError: + print("") + + try: + assert (df.groupby("hid")["head"].sum() == 1).all() + except ValueError: + print("There must be exactly one household head per household.") + print(df["hid"].first()) diff --git a/gettsim/tax_transfer.py b/gettsim/tax_transfer.py index 8bea46486..8be12bd50 100644 --- a/gettsim/tax_transfer.py +++ b/gettsim/tax_transfer.py @@ -4,7 +4,7 @@ from gettsim.benefits.kiz import kiz from gettsim.benefits.unterhaltsvorschuss import uhv from gettsim.benefits.wohngeld import wg -from gettsim.checks import check_boolean +from gettsim.checks import check_data from gettsim.incomes import disposable_income from gettsim.incomes import gross_income from gettsim.pensions import pensions @@ -50,8 +50,8 @@ def tax_transfer( The 'sub' functions may take an argument 'ref', which might be used for small reforms that e.g. only differ in parameters or slightly change the calculation. """ - bool_variables = ["child", "east"] - check_boolean(df, bool_variables) + + check_data(df) # if hyporun is False: # df = uprate(df, datayear, settings['taxyear'], settings['MAIN_PATH']) diff --git a/gettsim/tests/test_calculation_tax_transfer.py b/gettsim/tests/test_calculation_tax_transfer.py index 03831ca06..f0d5b276e 100644 --- a/gettsim/tests/test_calculation_tax_transfer.py +++ b/gettsim/tests/test_calculation_tax_transfer.py @@ -5,7 +5,7 @@ from gettsim.tax_transfer import calculate_tax_and_transfers -YEARS = [2002, 2010, 2018, 2019] +YEARS = [2002, 2010, 2012, 2013, 2014, 2018, 2019] @pytest.fixture(scope="module") diff --git a/gettsim/tests/test_data/test_dfs_tax_transfer.csv b/gettsim/tests/test_data/test_dfs_tax_transfer.csv index b29aed1d9..fcfe808de 100644 --- a/gettsim/tests/test_data/test_dfs_tax_transfer.csv +++ b/gettsim/tests/test_data/test_dfs_tax_transfer.csv @@ -1,15 +1,15 @@ -hid,tu_id,pid,head_tu,head,adult_num,child0_18_num,hh_wealth,m_wage,east,age,selfemployed,haskids,m_self,m_pensions,pkv,m_wage_l1,months_ue,months_ue_l1,months_ue_l2,w_hours,child_num_tu,adult_num_tu,byear,exper,EP,child,pensioner,m_childcare,m_imputedrent,m_kapinc,m_vermiet,renteneintritt,zveranl,ineducation,handcap_degree,alleinerz,miete,wohnfl,eigentum,heizkost,cnstyr,mietstufe,m_transfers,year -1,1,1,True,True,4,0,5500,300,False,25,False,False,0,0,False,1000,1,0,0,0,0,4,1958,7,6.2776,False,False,0,0,200,0,2061,False,False,0,False,400,50,False,80,3,3,50,2018 -1,1,2,False,False,4,0,5500,600,False,25,False,False,0,0,False,2000,1,0,0,0,0,4,1957,7,6.2776,False,False,0,0,200,1,2062,False,False,0,False,400,50,False,80,3,3,50,2018 -1,2,3,False,False,4,0,5500,900,False,25,False,False,0,0,False,3000,12,5,0,0,1,4,1982,7,6.2776,False,False,0,0,200,2,2063,False,False,0,False,400,50,False,80,3,3,50,2018 -1,2,4,False,False,4,0,5500,1200,True,25,False,True,0,0,False,4000,1,0,0,20,1,4,1970,7,6.2776,False,False,0,0,200,3,2064,False,False,0,False,400,50,False,80,3,3,50,2018 -2,3,5,True,True,4,0,5500,1500,True,25,False,True,0,0,False,7000,1,0,0,0,1,4,1952,7,6.2776,False,False,0,0,200,4,2065,False,False,0,False,400,50,False,80,3,3,50,2018 -2,3,6,False,False,4,0,5500,5000,True,25,False,True,0,0,False,2500,1,0,0,0,1,4,1985,7,6.2776,False,False,0,0,201,5,2066,False,False,0,False,400,50,False,80,3,3,50,2018 -2,4,7,False,False,4,0,5500,300,True,20,False,False,0,0,False,2300,1,0,0,0,0,4,1982,2,1.7936,False,False,0,0,202,6,2067,False,False,0,False,400,50,False,80,3,3,50,2010 -2,4,8,False,False,4,0,5500,600,True,20,False,False,0,0,False,1000,1,0,0,0,0,4,1971,2,1.7936,False,False,0,0,203,7,2068,False,False,0,False,400,50,False,80,3,3,50,2010 -3,5,9,True,True,1,0,5500,900,True,20,False,False,0,0,False,2000,1,0,0,0,0,1,1968,2,1.7936,False,False,0,0,204,8,2069,False,False,0,False,400,50,False,80,3,3,50,2010 -4,6,10,True,True,1,0,5500,1200,False,20,False,True,0,0,False,3000,12,5,0,0,1,1,1964,2,1.7936,False,False,0,0,205,9,2070,False,False,0,False,400,50,False,80,3,3,50,2010 -5,7,11,True,True,1,0,5500,1500,False,20,False,True,0,0,False,4000,1,0,0,20,1,1,1967,2,1.7936,False,False,0,0,206,10,2071,False,False,0,False,400,50,False,80,3,3,50,2010 -6,8,12,True,True,1,0,5500,8000,False,20,False,True,0,0,False,7000,1,0,0,0,1,1,1951,2,1.7936,False,False,0,0,207,11,2072,False,False,0,False,400,50,False,80,3,3,50,2010 -7,9,13,True,True,1,0,5500,700,False,20,False,False,0,0,False,2500,1,0,0,0,1,1,1960,2,1.7936,False,False,0,0,208,12,2073,False,False,0,False,400,50,False,80,3,3,50,2002 -8,10,14,True,True,1,0,5500,0,False,40,True,True,2500,0,False,2300,1,0,0,0,0,1,1959,22,19.7296,False,False,0,0,0,13,2074,False,False,0,False,400,50,False,80,3,3,50,2019 +hid,tu_id,pid,head_tu,female,head,adult_num,child0_18_num,hh_wealth,m_wage,east,age,selfemployed,haskids,m_self,m_pensions,pkv,m_wage_l1,months_ue,months_ue_l1,months_ue_l2,w_hours,child_num_tu,adult_num_tu,byear,exper,EP,child,pensioner,m_childcare,m_imputedrent,m_kapinc,m_vermiet,renteneintritt,zveranl,ineducation,handcap_degree,alleinerz,miete,wohnfl,eigentum,heizkost,cnstyr,mietstufe,m_transfers,year +1,1,1,True,True,True,4,0,5500,300,False,25,False,False,0,0,False,1000,1,0,0,0,0,4,1958,7,6.2776,False,False,0,0,200,0,2061,False,False,0,False,400,50,False,80,3,3,50,2018 +1,1,2,False,True,False,4,0,5500,600,False,25,False,False,0,0,False,2000,1,0,0,0,0,4,1957,7,6.2776,False,False,0,0,200,1,2062,False,False,0,False,400,50,False,80,3,3,50,2018 +1,2,3,False,True,False,4,0,5500,900,False,25,False,False,0,0,False,3000,12,5,0,0,1,4,1982,7,6.2776,False,False,0,0,200,2,2063,False,False,0,False,400,50,False,80,3,3,50,2018 +1,2,4,False,True,False,4,0,5500,1200,True,25,False,True,0,0,False,4000,1,0,0,20,1,4,1970,7,6.2776,False,False,0,0,200,3,2064,False,False,0,False,400,50,False,80,3,3,50,2018 +2,3,5,True,True,True,4,0,5500,1500,True,25,False,True,0,0,False,7000,1,0,0,0,1,4,1952,7,6.2776,False,False,0,0,200,4,2065,False,False,0,False,400,50,False,80,3,3,50,2018 +2,3,6,False,True,False,4,0,5500,5000,True,25,False,True,0,0,False,2500,1,0,0,0,1,4,1985,7,6.2776,False,False,0,0,201,5,2066,False,False,0,False,400,50,False,80,3,3,50,2018 +2,4,7,False,True,False,4,0,5500,300,True,20,False,False,0,0,False,2300,1,0,0,0,0,4,1982,2,1.7936,False,False,0,0,202,6,2067,False,False,0,False,400,50,False,80,3,3,50,2010 +2,4,8,False,True,False,4,0,5500,600,True,20,False,False,0,0,False,1000,1,0,0,0,0,4,1971,2,1.7936,False,False,0,0,203,7,2068,False,False,0,False,400,50,False,80,3,3,50,2010 +3,5,9,True,True,True,1,0,5500,900,True,20,False,False,0,0,False,2000,1,0,0,0,0,1,1968,2,1.7936,False,False,0,0,204,8,2069,False,False,0,False,400,50,False,80,3,3,50,2010 +4,6,10,True,True,True,1,0,5500,1200,False,20,False,True,0,0,False,3000,12,5,0,0,1,1,1964,2,1.7936,False,False,0,0,205,9,2070,False,False,0,False,400,50,False,80,3,3,50,2010 +5,7,11,True,True,True,1,0,5500,1500,False,20,False,True,0,0,False,4000,1,0,0,20,1,1,1967,2,1.7936,False,False,0,0,206,10,2071,False,False,0,False,400,50,False,80,3,3,50,2010 +6,8,12,True,True,True,1,0,5500,8000,False,20,False,True,0,0,False,7000,1,0,0,0,1,1,1951,2,1.7936,False,False,0,0,207,11,2072,False,False,0,False,400,50,False,80,3,3,50,2010 +7,9,13,True,True,True,1,0,5500,700,False,20,False,False,0,0,False,2500,1,0,0,0,1,1,1960,2,1.7936,False,False,0,0,208,12,2073,False,False,0,False,400,50,False,80,3,3,50,2002 +8,10,14,True,True,True,1,0,5500,0,False,40,True,True,2500,0,False,2300,1,0,0,0,0,1,1959,22,19.7296,False,False,0,0,0,13,2074,False,False,0,False,400,50,False,80,3,3,50,2019 From 916df7eb22e05c0f377d4666cdffc31804a5d403 Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Mon, 24 Feb 2020 22:07:32 +0100 Subject: [PATCH 2/6] test corrected --- gettsim/tests/test_data/test_dfs_tax_transfer.csv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gettsim/tests/test_data/test_dfs_tax_transfer.csv b/gettsim/tests/test_data/test_dfs_tax_transfer.csv index fcfe808de..a289425b0 100644 --- a/gettsim/tests/test_data/test_dfs_tax_transfer.csv +++ b/gettsim/tests/test_data/test_dfs_tax_transfer.csv @@ -3,13 +3,13 @@ hid,tu_id,pid,head_tu,female,head,adult_num,child0_18_num,hh_wealth,m_wage,east, 1,1,2,False,True,False,4,0,5500,600,False,25,False,False,0,0,False,2000,1,0,0,0,0,4,1957,7,6.2776,False,False,0,0,200,1,2062,False,False,0,False,400,50,False,80,3,3,50,2018 1,2,3,False,True,False,4,0,5500,900,False,25,False,False,0,0,False,3000,12,5,0,0,1,4,1982,7,6.2776,False,False,0,0,200,2,2063,False,False,0,False,400,50,False,80,3,3,50,2018 1,2,4,False,True,False,4,0,5500,1200,True,25,False,True,0,0,False,4000,1,0,0,20,1,4,1970,7,6.2776,False,False,0,0,200,3,2064,False,False,0,False,400,50,False,80,3,3,50,2018 -2,3,5,True,True,True,4,0,5500,1500,True,25,False,True,0,0,False,7000,1,0,0,0,1,4,1952,7,6.2776,False,False,0,0,200,4,2065,False,False,0,False,400,50,False,80,3,3,50,2018 -2,3,6,False,True,False,4,0,5500,5000,True,25,False,True,0,0,False,2500,1,0,0,0,1,4,1985,7,6.2776,False,False,0,0,201,5,2066,False,False,0,False,400,50,False,80,3,3,50,2018 +2,3,5,True,True,True,4,0,5500,1500,True,25,False,True,0,0,False,7000,1,0,0,0,1,4,1952,7,6.2776,False,False,0,0,200,4,2065,False,False,0,False,400,50,False,80,3,3,50,2010 +2,3,6,False,True,False,4,0,5500,5000,True,25,False,True,0,0,False,2500,1,0,0,0,1,4,1985,7,6.2776,False,False,0,0,201,5,2066,False,False,0,False,400,50,False,80,3,3,50,2010 2,4,7,False,True,False,4,0,5500,300,True,20,False,False,0,0,False,2300,1,0,0,0,0,4,1982,2,1.7936,False,False,0,0,202,6,2067,False,False,0,False,400,50,False,80,3,3,50,2010 2,4,8,False,True,False,4,0,5500,600,True,20,False,False,0,0,False,1000,1,0,0,0,0,4,1971,2,1.7936,False,False,0,0,203,7,2068,False,False,0,False,400,50,False,80,3,3,50,2010 -3,5,9,True,True,True,1,0,5500,900,True,20,False,False,0,0,False,2000,1,0,0,0,0,1,1968,2,1.7936,False,False,0,0,204,8,2069,False,False,0,False,400,50,False,80,3,3,50,2010 -4,6,10,True,True,True,1,0,5500,1200,False,20,False,True,0,0,False,3000,12,5,0,0,1,1,1964,2,1.7936,False,False,0,0,205,9,2070,False,False,0,False,400,50,False,80,3,3,50,2010 -5,7,11,True,True,True,1,0,5500,1500,False,20,False,True,0,0,False,4000,1,0,0,20,1,1,1967,2,1.7936,False,False,0,0,206,10,2071,False,False,0,False,400,50,False,80,3,3,50,2010 -6,8,12,True,True,True,1,0,5500,8000,False,20,False,True,0,0,False,7000,1,0,0,0,1,1,1951,2,1.7936,False,False,0,0,207,11,2072,False,False,0,False,400,50,False,80,3,3,50,2010 +3,5,9,True,True,True,1,0,5500,900,True,20,False,False,0,0,False,2000,1,0,0,0,0,1,1968,2,1.7936,False,False,0,0,204,8,2069,False,False,0,False,400,50,False,80,3,3,50,2012 +4,6,10,True,True,True,1,0,5500,1200,False,20,False,True,0,0,False,3000,12,5,0,0,1,1,1964,2,1.7936,False,False,0,0,205,9,2070,False,False,0,False,400,50,False,80,3,3,50,2012 +5,7,11,True,True,True,1,0,5500,1500,False,20,False,True,0,0,False,4000,1,0,0,20,1,1,1967,2,1.7936,False,False,0,0,206,10,2071,False,False,0,False,400,50,False,80,3,3,50,2013 +6,8,12,True,True,True,1,0,5500,8000,False,20,False,True,0,0,False,7000,1,0,0,0,1,1,1951,2,1.7936,False,False,0,0,207,11,2072,False,False,0,False,400,50,False,80,3,3,50,2014 7,9,13,True,True,True,1,0,5500,700,False,20,False,False,0,0,False,2500,1,0,0,0,1,1,1960,2,1.7936,False,False,0,0,208,12,2073,False,False,0,False,400,50,False,80,3,3,50,2002 8,10,14,True,True,True,1,0,5500,0,False,40,True,True,2500,0,False,2300,1,0,0,0,0,1,1959,22,19.7296,False,False,0,0,0,13,2074,False,False,0,False,400,50,False,80,3,3,50,2019 From be5136b84e46b66dda9347b976626e80a779a71d Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Mon, 24 Feb 2020 22:12:54 +0100 Subject: [PATCH 3/6] NA check corrected --- gettsim/checks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gettsim/checks.py b/gettsim/checks.py index 192ff6b3d..15e9a82da 100644 --- a/gettsim/checks.py +++ b/gettsim/checks.py @@ -9,15 +9,12 @@ def check_data(df): positive_vars = ["mietstufe", "wohnfl", "miete"] for var in positive_vars: try: - print(var) - print(df[var].min()) assert df[var].min() > 0 except ValueError: print(f"{var} must be strictly positive.") - print(df[var].describe()) try: - assert df.notna().all() + assert df.notna().all().all() except ValueError: print("") From 0b902bd5260727008b878fa30ea7ab2ecca91248 Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Mon, 24 Feb 2020 22:15:10 +0100 Subject: [PATCH 4/6] message added to nan test --- gettsim/checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gettsim/checks.py b/gettsim/checks.py index 15e9a82da..b926a7075 100644 --- a/gettsim/checks.py +++ b/gettsim/checks.py @@ -16,7 +16,7 @@ def check_data(df): try: assert df.notna().all().all() except ValueError: - print("") + print("NaN value encountered in input data") try: assert (df.groupby("hid")["head"].sum() == 1).all() From 57104708e1d1337378881d1a922bdfc28cb9c4d2 Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Tue, 25 Feb 2020 20:47:30 +0100 Subject: [PATCH 5/6] more checks added, esp. on boole variables --- gettsim/checks.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gettsim/checks.py b/gettsim/checks.py index b926a7075..e6acc79d7 100644 --- a/gettsim/checks.py +++ b/gettsim/checks.py @@ -1,5 +1,15 @@ def check_data(df): - bool_variables = ["child", "east", "female"] + bool_variables = [ + "child", + "east", + "female", + "head", + "haskids", + "pkv", + "ineducation", + "eigentum", + "pensioner", + ] for variable in bool_variables: try: assert df[variable].dtype == bool @@ -20,6 +30,7 @@ def check_data(df): try: assert (df.groupby("hid")["head"].sum() == 1).all() + assert (df.groupby("hid_tu")["head_tu"].sum() == 1).all() except ValueError: print("There must be exactly one household head per household.") print(df["hid"].first()) From 731b695277385444b536d3980bfcfba0c5e8ffb8 Mon Sep 17 00:00:00 2001 From: Eric Sommer Date: Tue, 25 Feb 2020 20:55:03 +0100 Subject: [PATCH 6/6] adjust test --- gettsim/checks.py | 2 +- gettsim/tests/test_data/test_dfs_tax_transfer.csv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gettsim/checks.py b/gettsim/checks.py index e6acc79d7..490957761 100644 --- a/gettsim/checks.py +++ b/gettsim/checks.py @@ -30,7 +30,7 @@ def check_data(df): try: assert (df.groupby("hid")["head"].sum() == 1).all() - assert (df.groupby("hid_tu")["head_tu"].sum() == 1).all() + assert (df.groupby("tu_id")["head_tu"].sum() == 1).all() except ValueError: print("There must be exactly one household head per household.") print(df["hid"].first()) diff --git a/gettsim/tests/test_data/test_dfs_tax_transfer.csv b/gettsim/tests/test_data/test_dfs_tax_transfer.csv index a289425b0..c34bf7d8b 100644 --- a/gettsim/tests/test_data/test_dfs_tax_transfer.csv +++ b/gettsim/tests/test_data/test_dfs_tax_transfer.csv @@ -1,11 +1,11 @@ hid,tu_id,pid,head_tu,female,head,adult_num,child0_18_num,hh_wealth,m_wage,east,age,selfemployed,haskids,m_self,m_pensions,pkv,m_wage_l1,months_ue,months_ue_l1,months_ue_l2,w_hours,child_num_tu,adult_num_tu,byear,exper,EP,child,pensioner,m_childcare,m_imputedrent,m_kapinc,m_vermiet,renteneintritt,zveranl,ineducation,handcap_degree,alleinerz,miete,wohnfl,eigentum,heizkost,cnstyr,mietstufe,m_transfers,year 1,1,1,True,True,True,4,0,5500,300,False,25,False,False,0,0,False,1000,1,0,0,0,0,4,1958,7,6.2776,False,False,0,0,200,0,2061,False,False,0,False,400,50,False,80,3,3,50,2018 1,1,2,False,True,False,4,0,5500,600,False,25,False,False,0,0,False,2000,1,0,0,0,0,4,1957,7,6.2776,False,False,0,0,200,1,2062,False,False,0,False,400,50,False,80,3,3,50,2018 -1,2,3,False,True,False,4,0,5500,900,False,25,False,False,0,0,False,3000,12,5,0,0,1,4,1982,7,6.2776,False,False,0,0,200,2,2063,False,False,0,False,400,50,False,80,3,3,50,2018 +1,2,3,True,True,False,4,0,5500,900,False,25,False,False,0,0,False,3000,12,5,0,0,1,4,1982,7,6.2776,False,False,0,0,200,2,2063,False,False,0,False,400,50,False,80,3,3,50,2018 1,2,4,False,True,False,4,0,5500,1200,True,25,False,True,0,0,False,4000,1,0,0,20,1,4,1970,7,6.2776,False,False,0,0,200,3,2064,False,False,0,False,400,50,False,80,3,3,50,2018 2,3,5,True,True,True,4,0,5500,1500,True,25,False,True,0,0,False,7000,1,0,0,0,1,4,1952,7,6.2776,False,False,0,0,200,4,2065,False,False,0,False,400,50,False,80,3,3,50,2010 2,3,6,False,True,False,4,0,5500,5000,True,25,False,True,0,0,False,2500,1,0,0,0,1,4,1985,7,6.2776,False,False,0,0,201,5,2066,False,False,0,False,400,50,False,80,3,3,50,2010 -2,4,7,False,True,False,4,0,5500,300,True,20,False,False,0,0,False,2300,1,0,0,0,0,4,1982,2,1.7936,False,False,0,0,202,6,2067,False,False,0,False,400,50,False,80,3,3,50,2010 +2,4,7,True,True,False,4,0,5500,300,True,20,False,False,0,0,False,2300,1,0,0,0,0,4,1982,2,1.7936,False,False,0,0,202,6,2067,False,False,0,False,400,50,False,80,3,3,50,2010 2,4,8,False,True,False,4,0,5500,600,True,20,False,False,0,0,False,1000,1,0,0,0,0,4,1971,2,1.7936,False,False,0,0,203,7,2068,False,False,0,False,400,50,False,80,3,3,50,2010 3,5,9,True,True,True,1,0,5500,900,True,20,False,False,0,0,False,2000,1,0,0,0,0,1,1968,2,1.7936,False,False,0,0,204,8,2069,False,False,0,False,400,50,False,80,3,3,50,2012 4,6,10,True,True,True,1,0,5500,1200,False,20,False,True,0,0,False,3000,12,5,0,0,1,1,1964,2,1.7936,False,False,0,0,205,9,2070,False,False,0,False,400,50,False,80,3,3,50,2012