Skip to content

Commit

Permalink
Merge pull request Breakthrough-Energy#21 from mbwaite/daniel/puma_da…
Browse files Browse the repository at this point in the history
…ta_agg

refactor: simplify calculation of puma fuel fractions, and correct a calculation typo
  • Loading branch information
danielolsen authored Oct 26, 2021
2 parents 4e63689 + e1d5986 commit 4dc9fc7
Show file tree
Hide file tree
Showing 8 changed files with 2,448 additions and 2,473 deletions.
62 changes: 38 additions & 24 deletions prereise/gather/demanddata/bldg_electrification/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,29 +144,43 @@
target_year = 2010

# Regions to differentiate fuel usage: NorthEast, MidWest, South, West. Defined by RECS/CBECS surveys
us_northeast = ["MA", "CT", "ME", "NH", "RI", "VT", "NJ", "NY", "PA"]
us_midwest = ["IL", "MI", "WI", "IN", "OH", "MO", "IA", "MN", "ND", "SD", "KS", "NE"]
us_south = [
"VA",
"GA",
"FL",
"DC",
"DE",
"MD",
"WV",
"NC",
"SC",
"TN",
"AL",
"KY",
"MS",
"TX",
"AR",
"LA",
"OK",
]
us_west = ["CO", "ID", "MT", "UT", "WY", "AZ", "NM", "NV", "CA", "AK", "HI", "OR", "WA"]

regions = [us_northeast, us_midwest, us_south, us_west]
regions = {
"northeast": ["CT", "MA", "ME", "NH", "NJ", "NY", "PA", "RI", "VT"],
"midwest": ["IA", "IL", "IN", "KS", "MI", "MN", "MO", "ND", "NE", "OH", "SD", "WI"],
"south": [
"AL",
"AR",
"DC",
"DE",
"FL",
"GA",
"KY",
"LA",
"MD",
"MS",
"NC",
"OK",
"SC",
"TN",
"TX",
"VA",
"WV",
],
"west": [
"AK",
"AZ",
"CA",
"CO",
"HI",
"ID",
"MT",
"NM",
"NV",
"OR",
"UT",
"WA",
"WY",
],
}

fuel = ["natgas", "fok", "othergas", "elec"]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,natgas,elec,othergas,fok,
NE,0.251,0.210,0.035,0,
MW,0.205,0.218,0.012,0,
SO,0.185,0.202,0.014,0,
WE,0.196,0.168,0.013,0,
northeast,0.251,0.210,0.035,0,
midwest,0.205,0.218,0.012,0,
south,0.185,0.202,0.014,0,
west,0.196,0.168,0.013,0,
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,natgas,elec,othergas,fok,district,
NE,0.399,0.352,0.020,0.082,0.083,
MW,0.490,0.365,0.019,0.007,0.037,
SO,0.319,0.495,0.015,0.005,0.040,
WE,0.448,0.399,0.014,0.003,0.029,
northeast,0.399,0.352,0.020,0.082,0.083,
midwest,0.490,0.365,0.019,0.007,0.037,
south,0.319,0.495,0.015,0.005,0.040,
west,0.448,0.399,0.014,0.003,0.029,
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,natgas,elec,othergas,fok,,
NE,0.541,0.258,0.035,0.160,,
MW,0.641,0.304,0.048,0.003,,
SO,0.317,0.652,0.024,0.003,,
WE,0.660,0.285,0.048,0.004,,
northeast,0.541,0.258,0.035,0.160,,
midwest,0.641,0.304,0.048,0.003,,
south,0.317,0.652,0.024,0.003,,
west,0.660,0.285,0.048,0.004,,
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,natgas,elec,othergas,fok,,
NE,0.481,0.461,0.057,0,,
MW,0.369,0.587,0.044,0,,
SO,0.208,0.742,0.050,0,,
WE,0.430,0.526,0.044,0,,
northeast,0.481,0.461,0.057,0,,
midwest,0.369,0.587,0.044,0,,
south,0.208,0.742,0.050,0,,
west,0.430,0.526,0.044,0,,
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,natgas,elec,othergas,fok,
NE,0.530,0.131,0.024,0.156,
MW,0.685,0.156,0.037,0.012,
SO,0.355,0.433,0.025,0.009,
WE,0.507,0.312,0.018,0.002,
northeast,0.530,0.131,0.024,0.156,
midwest,0.685,0.156,0.037,0.012,
south,0.355,0.433,0.025,0.009,
west,0.507,0.312,0.018,0.002,
4,674 changes: 2,337 additions & 2,337 deletions prereise/gather/demanddata/bldg_electrification/data/puma_data.csv

Large diffs are not rendered by default.

145 changes: 53 additions & 92 deletions prereise/gather/demanddata/bldg_electrification/puma_data_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@


def aggregate_puma_df(
puma_fuel_2010, tract_puma_mapping, tract_gbs_area, tract_degday_normals, tract_pop
puma_states, tract_puma_mapping, tract_gbs_area, tract_degday_normals, tract_pop
):
"""Scale census tract data up to puma areas.
:param pandas.DataFrame puma_fuel_2010: household fuel type by puma.
:param pandas.DataFrame puma_states: mapping of puma to state.
:param pandas.DataFrame tract_puma_mapping: tract to puma mapping.
:param pandas.DataFrame tract_gbs_area: General Building Stock area for residential, commercial, industrial areas by tract
:param pandas.DataFrame tract_degday_normals: heating and cooling degree day normals by tract
Expand All @@ -22,7 +22,7 @@ def aggregate_puma_df(
fractions.
"""
# Set up puma_df data frame
puma_df = puma_fuel_2010["state"].to_frame()
puma_df = puma_states.to_frame()

# Combine tract-level data into single data frame with only census tracts with building area data
tract_data = pd.concat(
Expand Down Expand Up @@ -110,111 +110,72 @@ def aggregate_puma_df(
puma_df[puma_df["state"] == state]["com_area_gbs_m2"] * com_scalar
)

# Calculate res fractions of fuel usage based off puma_fuel_2010 household data
puma_df["frac_sh_res_natgas"] = (
puma_fuel_2010["hh_utilgas"] / puma_fuel_2010["hh_total"]
)
puma_df["frac_sh_res_fok"] = puma_fuel_2010["hh_fok"] / puma_fuel_2010["hh_total"]
puma_df["frac_sh_res_othergas"] = (
puma_fuel_2010["hh_othergas"] / puma_fuel_2010["hh_total"]
)
puma_df["frac_sh_res_coal"] = puma_fuel_2010["hh_coal"] / puma_fuel_2010["hh_total"]
puma_df["frac_sh_res_wood"] = puma_fuel_2010["hh_wood"] / puma_fuel_2010["hh_total"]
puma_df["frac_sh_res_solar"] = (
puma_fuel_2010["hh_solar"] / puma_fuel_2010["hh_total"]
)
puma_df["frac_sh_res_elec"] = puma_fuel_2010["hh_elec"] / puma_fuel_2010["hh_total"]
puma_df["frac_sh_res_other"] = (
puma_fuel_2010["hh_other"] / puma_fuel_2010["hh_total"]
)
puma_df["frac_sh_res_none"] = puma_fuel_2010["hh_none"] / puma_fuel_2010["hh_total"]

return puma_df


def scale_fuel_fractions(puma_df, regions, fuel):
def scale_fuel_fractions(hh_fuels, puma_df, year=2010):
"""Scale census tract data up to puma areas.
:param pandas.DataFrame hh_fuels: household fuel type by puma.
:param pandas.DataFrame puma_df: output of :func:`aggregate_puma_df`.
:param list of lists regions: state regions used to scale fuel fractions.
:param list fuel: types of fuel.
:param int/str year: year to use within label when creating columns.
:return: (*pandas.DataFrame*) -- fractions of natural gas, fuel oil and kerosone,
propane, and electricity used for space heating, hot water, cooking, and other
in residential and commercial buildings.
"""
# Calculate res fractions of fuel usage based off puma_fuel_2010 household data
puma_df["frac_sh_res_natgas"] = hh_fuels["hh_utilgas"] / hh_fuels["hh_total"]
for f in ["fok", "othergas", "coal", "wood", "solar", "elec", "other", "none"]:
puma_df[f"frac_sh_res_{f}"] = hh_fuels[f"hh_{f}"] / hh_fuels["hh_total"]

region_map = {state: r for r, states in const.regions.items() for state in states}
puma_region_groups = puma_df.groupby(puma_df["state"].map(region_map))
for c in const.classes:
if c == "res":
uselist = ["dhw", "other"]
else:
uselist = ["sh", "dhw", "cook"]
for u in uselist:
frac_area = pd.DataFrame(columns=fuel)

# Compute frac_area for each fuel type in each region
for i in regions:
fuellist = []
for j in fuel:
region_df = puma_df[puma_df["state"].isin(i)].reset_index()
fuellist.append(
sum(
region_df[f"frac_sh_res_{j}"]
* region_df[f"{c}_area_2010_m2"]
)
/ sum(region_df[f"{c}_area_2010_m2"])
# Compute area fraction for each fuel type (column) in each region (index)
area_fractions = puma_region_groups.apply(
lambda x: pd.Series(
{
f: (
(x[f"frac_sh_res_{f}"] * x[f"{c}_area_2010_m2"]).sum()
/ x[f"{c}_area_2010_m2"].sum()
)
df_i = len(frac_area)
frac_area.loc[df_i] = fuellist

# Values calculated externally
frac_scale = pd.read_csv(os.path.join(data_dir, f"frac_target_{u}_{c}.csv"))

downscalar = frac_scale / frac_area

upscalar = (frac_scale - frac_area) / (1 - frac_area)

# Scale frac_hh_fuel to frac_com_fuel
for f in fuel:
scalar = 1
fraccom = []
for i in range(len(puma_df)):
for j in range(len(regions)):
if puma_df["state"][i] in regions[j]:
region_index = j
if downscalar[f][region_index] <= 1:
scalar = downscalar[f][region_index]
fraccom.append(puma_df[f"frac_sh_res_{f}"][i] * scalar)
for f in const.fuel
}
)
)
# Scale per-PUMA values to match target regional values (calculated externally)
uselist = ["dhw", "other"] if c == "res" else ["sh", "dhw", "cook"]
for u in uselist:
area_fraction_targets = pd.read_csv(
os.path.join(data_dir, f"frac_target_{u}_{c}.csv"),
index_col=0,
)
down_scale = area_fraction_targets / area_fractions
up_scale = (area_fraction_targets - area_fractions) / (1 - area_fractions)
for r in const.regions:
for f in const.fuel:
pre_scaling = puma_region_groups.get_group(r)[f"frac_sh_res_{f}"]
if down_scale.loc[r, f] <= 1:
scaled = pre_scaling * down_scale.loc[r, f]
else:
scalar = upscalar[f][region_index]
fraccom.append(
(1 - puma_df[f"frac_sh_res_{f}"][i]) * scalar
+ puma_df[f"frac_sh_res_{f}"][i]
)
puma_df[f"frac_{u}_{c}_{f}"] = fraccom
scaled = pre_scaling + up_scale.loc[r, f] * (1 - pre_scaling)
puma_df.loc[pre_scaling.index, f"frac_{f}_{u}_{c}_{year}"] = scaled

# Sum coal, wood, solar and other fractions for frac_com_other
puma_df["frac_sh_com_other"] = puma_df[
[
"frac_sh_res_coal",
"frac_sh_res_wood",
"frac_sh_res_solar",
"frac_sh_res_other",
]
].sum(axis=1)
named_sh_com_fuels = {"elec", "fok", "natgas", "othergas"}
named_sh_com_cols = [f"frac_{f}_sh_com_{year}" for f in named_sh_com_fuels]
puma_df[f"frac_other_sh_com_{year}"] = 1 - puma_df[named_sh_com_cols].sum(axis=1)

# Copy residential space heating columns to match new column naming convention
puma_df = puma_df.assign(
**{f"frac_{f}_sh_res_{year}": puma_df[f"frac_sh_res_{f}"] for f in const.fuel}
)
fossil_fuels = {"natgas", "othergas", "fok"}
for c in const.classes:
if c == "res":
uselist = ["sh", "dhw", "other"]
else:
uselist = ["sh", "dhw", "cook"]
uselist = ["sh", "dhw", "other"] if c == "res" else ["sh", "dhw", "cook"]
for u in uselist:
puma_df[f"frac_ff_{u}_{c}_2010"] = puma_df[
[
f"frac_{u}_{c}_natgas",
f"frac_{u}_{c}_othergas",
f"frac_{u}_{c}_fok",
]
].sum(axis=1)
puma_df[f"frac_elec_{u}_{c}_2010"] = puma_df[f"frac_{u}_{c}_elec"]
fossil_cols = [f"frac_{f}_{u}_{c}_{year}" for f in fossil_fuels]
puma_df[f"frac_ff_{u}_{c}_{year}"] = puma_df[fossil_cols].sum(axis=1)
return puma_df


Expand Down Expand Up @@ -261,14 +222,14 @@ def puma_timezone_join(timezones, pumas):
tract_pop = pd.read_csv(os.path.join(data_dir, "tract_pop.csv"), index_col="tract")

puma_data_unscaled = aggregate_puma_df(
puma_fuel_2010,
puma_fuel_2010["state"],
tract_puma_mapping,
tract_gbs_area,
tract_degday_normals,
tract_pop,
)

puma_data = scale_fuel_fractions(puma_data_unscaled, const.regions, const.fuel)
puma_data = scale_fuel_fractions(puma_fuel_2010, puma_data_unscaled)

# Add time zone information
puma_timezones = pd.read_csv(
Expand Down

0 comments on commit 4dc9fc7

Please sign in to comment.