Skip to content

Commit

Permalink
Use .csv instead of .dat files for non-indexed parameters
Browse files Browse the repository at this point in the history
This commit updates Switch to use two-row .csv files instead of .dat files for
non-indexed  parameters. The first row holds a list of parameter names and the
second row holds their values. The upgrade script for 2.0.5 also converts .dat
files into .csv files.

This means non-indexed inputs can be viewed and edited with the same tools
as all other inputs, and also means users don't need to learn the ampl/pyomo
.dat data description language.

The following files have been converted to .csv:
    financials.dat, trans_params.dat, spillage_penalty.dat,
    spinning_reserve_params.dat, lost_load_cost.dat, hydrogen.dat

However, Switch still uses gen_multiple_fuels.dat, which defines indexed sets
(future work will convert this to .csv too).
  • Loading branch information
mfripp committed Aug 11, 2019
1 parent 9c3671e commit 244a9c9
Show file tree
Hide file tree
Showing 11 changed files with 189 additions and 70 deletions.
10 changes: 6 additions & 4 deletions switch_model/balancing/operating_reserves/spinning_reserves.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,9 @@ def load_inputs(m, switch_data, inputs_dir):
generation_projects_info.csv
GENERATION_PROJECTS, ... gen_can_provide_spinning_reserves
spinning_reserve_params.dat may override the default value of
contingency_safety_factor. Note that is is a .dat file, not a .csv file.
spinning_reserve_params.csv may override the default value of
contingency_safety_factor. Note that this only contains one
header row and one data row.
"""
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'generation_projects_info.csv'),
Expand All @@ -530,6 +531,7 @@ def load_inputs(m, switch_data, inputs_dir):
param=(m.gen_can_provide_spinning_reserves)
)
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'spinning_reserve_params.dat'),
optional=True,
filename=os.path.join(inputs_dir, 'spinning_reserve_params.csv'),
optional=True, auto_select=True,
param=(m.contingency_safety_factor,)
)
Original file line number Diff line number Diff line change
Expand Up @@ -608,8 +608,9 @@ def load_inputs(m, switch_data, inputs_dir):
generation_projects_reserve_capability.csv
GENERATION_PROJECTS, RESERVE_TYPES, [gen_reserve_type_max_share]
spinning_reserve_params.dat may override the default value of
contingency_safety_factor. Note that this is a .dat file, not a .csv file.
spinning_reserve_params.csv may override the default value of
contingency_safety_factor. Note that this only contains one header row
and one data row.
"""
path=os.path.join(inputs_dir, 'generation_projects_reserve_capability.csv')
switch_data.load_aug(
Expand All @@ -627,6 +628,7 @@ def load_inputs(m, switch_data, inputs_dir):
[(g, "spinning") for g in gen_projects]

switch_data.load_aug(
filename=os.path.join(inputs_dir, 'spinning_reserve_params.dat'),
optional=True,
filename=os.path.join(inputs_dir, 'spinning_reserve_params.csv'),
optional=True, auto_select=True,
param=(m.contingency_safety_factor,)
)
14 changes: 8 additions & 6 deletions switch_model/balancing/unserved_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ def load_inputs(mod, switch_data, inputs_dir):
"""
The cost penalty of unserved load in units of $/MWh is the only parameter
that can be inputted. The following file is not mandatory, because the
parameter defaults to a value of 500 $/MWh.
parameter defaults to a value of 500 $/MWh. This file contains one header
row and one data row.
optional input files:
lost_load_cost.dat
lost_load_cost.csv
unserved_load_penalty
"""

lost_load_path = os.path.join(inputs_dir, 'lost_load_cost.dat')
if os.path.isfile(lost_load_path):
switch_data.load(filename=lost_load_path)
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'lost_load_cost.csv'),
optional=True, auto_select=True,
param=(mod.unserved_load_penalty,)
)
14 changes: 10 additions & 4 deletions switch_model/financials.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def define_components(mod):
mod.base_financial_year = Param(within=NonNegativeReals)
mod.interest_rate = Param(within=NonNegativeReals)
mod.discount_rate = Param(
within=NonNegativeReals, default=mod.interest_rate)
within=NonNegativeReals, default=lambda m: value(m.interest_rate))
mod.min_data_check('base_financial_year', 'interest_rate')
mod.bring_annual_costs_to_base_year = Param(
mod.PERIODS,
Expand Down Expand Up @@ -307,11 +307,17 @@ def calc_sys_costs_per_period(m, p):

def load_inputs(mod, switch_data, inputs_dir):
"""
Import base financial data from a .dat file. The inputs_dir should
contain the file financials.dat that gives parameter values for
Import base financial data from a .csv file. The inputs_dir should
contain the file financials.csv that gives parameter values for
base_financial_year, interest_rate and optionally discount_rate.
The names of parameters go on the first row and the values go on
the second.
"""
switch_data.load(filename=os.path.join(inputs_dir, 'financials.dat'))
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'financials.csv'),
optional=False, auto_select=True,
param=(mod.base_financial_year, mod.interest_rate, mod.discount_rate)
)

def post_solve(instance, outdir):
m = instance
Expand Down
8 changes: 5 additions & 3 deletions switch_model/generators/extensions/hydro_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ def load_inputs(mod, switch_data, inputs_dir):
auto_select=True,
index=mod.HYDRO_GENS,
param=(mod.hydro_efficiency, mod.hydraulic_location))
spillage_penalty_path = os.path.join(inputs_dir, 'spillage_penalty.dat')
if os.path.isfile(spillage_penalty_path):
switch_data.load(filename=spillage_penalty_path)
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'spillage_penalty.csv'),
optional=True, auto_select=True,
param=(mod.spillage_penalty,)
)
27 changes: 25 additions & 2 deletions switch_model/hawaii/hydrogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,31 @@ def define_hydrogen_components(m):

def load_inputs(m, switch_data, inputs_dir):
"""
Import hydrogen data from a .dat file.
Import hydrogen data from a .csv file.
TODO: change this to allow multiple storage technologies.
"""
if not m.options.no_hydrogen:
switch_data.load(filename=os.path.join(inputs_dir, 'hydrogen.dat'))
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'hydrogen.csv'),
optional=False, auto_select=True,
param=(
m.hydrogen_electrolyzer_capital_cost_per_mw,
m.hydrogen_electrolyzer_fixed_cost_per_mw_year,
m.hydrogen_electrolyzer_kg_per_mwh,
m.hydrogen_electrolyzer_life_years,
m.hydrogen_electrolyzer_variable_cost_per_kg,
m.hydrogen_fuel_cell_capital_cost_per_mw,
m.hydrogen_fuel_cell_fixed_cost_per_mw_year,
m.hydrogen_fuel_cell_life_years,
m.hydrogen_fuel_cell_mwh_per_kg,
m.hydrogen_fuel_cell_variable_cost_per_mwh,
m.hydrogen_liquifier_capital_cost_per_kg_per_hour,
m.hydrogen_liquifier_fixed_cost_per_kg_hour_year,
m.hydrogen_liquifier_life_years,
m.hydrogen_liquifier_mwh_per_kg,
m.hydrogen_liquifier_variable_cost_per_kg,
m.liquid_hydrogen_tank_capital_cost_per_kg,
m.liquid_hydrogen_tank_life_years,
m.liquid_hydrogen_tank_minimum_size_kg,
)
)
68 changes: 47 additions & 21 deletions switch_model/hawaii/scenario_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def write_tables(**args):
# financials

# this just uses a dat file, not a table (and the values are not in a database for now)
write_dat_file(
'financials.dat',
write_simple_csv(
'financials.csv',
['base_financial_year', 'interest_rate', 'discount_rate'],
args
)
Expand Down Expand Up @@ -265,7 +265,7 @@ def write_tables(**args):
#########################
# rps targets

write_tab_file(
write_csv_file(
'rps_targets.csv',
headers=('year', 'rps_target'),
data=[(y, args['rps_targets'][y]) for y in sorted(args['rps_targets'].keys())],
Expand Down Expand Up @@ -714,14 +714,14 @@ def write_tables(**args):
bat_cost = 'battery_capital_cost_per_mwh_capacity_by_year'
non_cost_bat_vars = sorted([k for k in args if k.startswith('battery_') and k not in [bat_years, bat_cost]])
if non_cost_bat_vars:
write_dat_file(
'batteries.dat',
write_simple_csv(
'batteries.csv',
non_cost_bat_vars,
args
)
if bat_years in args and bat_cost in args:
# annual costs were provided -- write those to a tab file
write_tab_file(
write_csv_file(
'battery_capital_cost.csv',
headers=[bat_years, bat_cost],
data=list(zip(args[bat_years], args[bat_cost])),
Expand Down Expand Up @@ -848,24 +848,24 @@ def write_tables(**args):
# TODO: put these data in a database with hydro_scen_id's and pull them from there

if "pumped_hydro_headers" in args:
write_tab_file(
write_csv_file(
'pumped_hydro.csv',
headers=args["pumped_hydro_headers"],
data=args["pumped_hydro_projects"],
arguments=args
)

# write_dat_file(
# 'pumped_hydro.dat',
# write_simple_csv(
# 'pumped_hydro.csv',
# [k for k in args if k.startswith('pumped_hydro_')],
# args
# )

#########################
# hydrogen
# TODO: put these data in a database and write a .csv file instead
write_dat_file(
'hydrogen.dat',
write_simple_csv(
'hydrogen.csv',
sorted([k for k in args if k.startswith('hydrogen_') or k.startswith('liquid_hydrogen_')]),
args
)
Expand Down Expand Up @@ -949,21 +949,30 @@ def db_cursor():
raise
return con.cursor()

def write_dat_file(output_file, args_to_write, arguments):
""" write a simple .dat file with the arguments specified in args_to_write,
drawn from the arguments dictionary"""
def write_simple_csv(output_file, args_to_write, arguments):
""" write a simple .csv file with the arguments specified in args_to_write,
drawn from the arguments dictionary. This includes one row with all the
parameter names and a second row with their values.
(previously write_dat_file())"""

if any(arg in arguments for arg in args_to_write):
start=time.time()

# collect data for the two rows (if any)
headers = []
values = []
for name in args_to_write:
if name in arguments:
headers.append(name)
values.append(str(arguments[name]))

if headers:
output_file = make_file_path(output_file, arguments)
print("Writing {file} ...".format(file=output_file), end=' ')
sys.stdout.flush() # display the part line to the user
start=time.time()

with open(output_file, 'w') as f:
f.writelines([
'param ' + name + ' := ' + str(arguments[name]) + ';\n'
for name in args_to_write if name in arguments
])
f.write(','.join(headers) + '\n')
f.write(','.join(values) + '\n')

print("time taken: {dur:.2f}s".format(dur=time.time()-start))

Expand All @@ -985,7 +994,7 @@ def write_table(output_file, query, arguments):

print("time taken: {dur:.2f}s".format(dur=time.time()-start))

def write_tab_file(output_file, headers, data, arguments={}):
def write_csv_file(output_file, headers, data, arguments={}):
"Write a tab file using the headers and data supplied."
output_file = make_file_path(output_file, arguments)

Expand All @@ -1000,6 +1009,23 @@ def write_tab_file(output_file, headers, data, arguments={}):

print("time taken: {dur:.2f}s".format(dur=time.time()-start))

def write_dat_file(output_file, args_to_write, arguments):
""" write a simple .dat file with the arguments specified in args_to_write,
drawn from the arguments dictionary"""

if any(arg in arguments for arg in args_to_write):
output_file = make_file_path(output_file, arguments)
print("Writing {file} ...".format(file=output_file), end=' ')
sys.stdout.flush() # display the part line to the user
start=time.time()

with open(output_file, 'w') as f:
f.writelines([
'param ' + name + ' := ' + str(arguments[name]) + ';\n'
for name in args_to_write if name in arguments
])

print("time taken: {dur:.2f}s".format(dur=time.time()-start))

def write_indexed_set_dat_file(output_file, set_name, query, arguments):
"""Write a .dat file defining an indexed set, based on the query provided.
Expand Down
26 changes: 14 additions & 12 deletions switch_model/transmission/transport/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def define_components(mod):
transmission model transmission data. At the end of this time,
we assume transmission lines will be rebuilt at the same cost.
trans_fixed_om_fraction is describes the fixed Operations and
trans_fixed_om_fraction describes the fixed Operations and
Maintenance costs as a fraction of capital costs. This optional
parameter defaults to 0.03 based on 2009 WREZ transmission model
transmission data costs for existing transmission maintenance.
Expand Down Expand Up @@ -289,17 +289,14 @@ def load_inputs(mod, switch_data, inputs_dir):
columns are missing or if cells contain a dot (.), those parameters
will be set to default values as described in documentation.
Note that the next file is formatted as .dat, not as .csv. The
distribution_loss_rate parameter should only be inputted if the
local_td module is loaded in the simulation. If this parameter is
specified a value in trans_params.dat and local_td is not included
in the module list, then an error will be raised.
Note that in the next file, parameter names are written on the first
row (as usual), and the single value for each parameter is written in
the second row. The distribution_loss_rate parameter is read by the
local_td module (if used).
trans_params.dat
trans_params.csv
trans_capital_cost_per_mw_km, trans_lifetime_yrs,
trans_fixed_om_fraction, distribution_loss_rate
"""

# TODO: send issue / pull request to Pyomo to allow .csv files with
Expand All @@ -324,9 +321,14 @@ def load_inputs(mod, switch_data, inputs_dir):
mod.trans_terrain_multiplier, mod.trans_new_build_allowed
)
)
trans_params_path = os.path.join(inputs_dir, 'trans_params.dat')
if os.path.isfile(trans_params_path):
switch_data.load(filename=trans_params_path)
switch_data.load_aug(
filename=os.path.join(inputs_dir, 'trans_params.csv'),
optional=True, auto_select=True,
param=(
mod.trans_capital_cost_per_mw_km, mod.trans_lifetime_yrs,
mod.trans_fixed_om_fraction, mod.distribution_loss_rate
)
)


def post_solve(instance, outdir):
Expand Down
16 changes: 16 additions & 0 deletions switch_model/upgrade/re_upgrade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import print_function

import os
from switch_model.upgrade.manager import upgrade_plugins
upgrade_module, upgrade_from, upgrade_to = upgrade_plugins[-1]

if __name__ == '__main__':
print(
"Re-running upgrade from {} to {} for all subdirectories of current directory"
.format(upgrade_from, upgrade_to)
)

for dirpath, dirnames, filenames in os.walk('.'):
if 'switch_inputs_version.txt' in filenames:
print('upgrading {}'.format(dirpath))
upgrade_module.upgrade_input_dir(dirpath)
Loading

8 comments on commit 244a9c9

@josiahjohnston
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find extra-wide tables with a single row extremely awkward to review or edit compared to long and skinny tables with key & value columns. Text editors are basically unusable for this, and spreadsheet editors are just quite awkward, with a tendency to truncate most cells in the display.
What is the motivation for using wide style instead of narrow style for this?

@mfripp
Copy link
Member Author

@mfripp mfripp commented on 244a9c9 Aug 12, 2019 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@josiahjohnston
Copy link
Contributor

@josiahjohnston josiahjohnston commented on 244a9c9 Aug 12, 2019 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mfripp
Copy link
Member Author

@mfripp mfripp commented on 244a9c9 Aug 12, 2019 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mfripp
Copy link
Member Author

@mfripp mfripp commented on 244a9c9 Aug 12, 2019 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@josiahjohnston
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. I'll implement parsing support in load_aug for individual key,value csv files to replace .dat files if the optional argument singleton is set to True. Should be easy to fiddle with the code subsequently if more discussion is warranted.

If I/we can figure out a trivial & robust way to auto-detect singleton from inspecting the parameters, we could drop the explicit singleton argument, but I'm doubtful changing that trigger from an explicit request to an implicit inference would be an improvement. From the Zen of Python (PEP 20)

Explicit is better than implicit.

@josiahjohnston
Copy link
Contributor

@josiahjohnston josiahjohnston commented on 244a9c9 Aug 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After starting implementation, I realized it would be more explicit and clear to define a new function instead of doing more overloading of our load_aug wrapper around Pyomo's DataPortal.load function. One reason this seems better to me is it implements distinct parsing from DataPortal.load, rather than just customizing calls to DataPortal.load as our load_aug[mented] function was intended to do.

My currect spec is:

def load_key_value_inputfile(switch_data, filepath, optional=False):

Where switch_data is a data portal object. Previously .dat files didn't use any of the other keyword arguments for load_aug, since DataPortal.load had such a simple API for .dat file parsing.

@mfripp
Copy link
Member Author

@mfripp mfripp commented on 244a9c9 Aug 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reason for doing this within load_aug is that it allows us to use the same behaviors as we already use without duplication of code (i.e., optional/auto_select/params arguments, autodetection of optional params, soon --input-aliases). And load_aug already has a code path (that you wrote) for spotting non-indexed parameters and loading a non-indexed (wide) csv. So it would just require a minor change to that code path (read the file and generate data values instead of reading it directly).

One point though is that we can't just use .load() to read the vertical csv because the "name" and "value" columns may conflict with other param names (or just other singleton parameter files). So we need to be careful to use exactly the same parsing rules as other csv files (e.g. support similar quoting and treatment of null values) while loading it some other way. But that's not insurmountable, and there's not really an argument for doing that in a separate function instead of in the singleton path in load_aug.

So if we are going to keep the behaviors we already have in load_aug, it seems to me to make sense to just use load_aug for these singleton files.

On the other hand, I don't like a lot of aspects of our current arguments to load_aug: I think optional_params should be similar to param, listing components rather than names (e.g., hydro_system currently has optional_params=['mod.wnode_constant_inflow', 'mod.wnode_constant_consumption'] due to this kind of confusion. Further, param should probably be called params, or maybe mandatory_params, and any particular param should probably only appear in params or optional_params, not both (again to avoid duplication and maintenance problems). And auto_select should be the default/only behavior for all calls. And really, all of this should actually be handled by tagging the param or set with its filename at creation time (along with tagging it with documentation info), and any column should be optional if and only if a default is defined. Then we could automate the load_aug calls and the user wouldn't have to learn or memorize any arguments. This would minimize duplication and enhance readability and maintainability. It would also support use of automatic tools to generate documentation, including cross-references between component names, the module where they're defined and the table where the values are read from.

So, with that in mind, it may not matter much how we handle reading the csv files for non-indexed parameters for now. But since we don't know how soon that transition will come, we might as well keep it logical, which probably means reusing load_aug (or at least most of its behavior).

Please sign in to comment.