Breakthrough-Energy · rouille · Mar 26, 2021 · Mar 24, 2021
diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py
@@ -8,25 +8,23 @@
 
 
 def sjoin_nearest(left_df, right_df, search_dist=0.06):
-    """
-    Perform a spatial join between two input layers.
-    If a geometry in left_df falls outside (all) geometries in right_df, the data from
-        nearest Polygon will be used as a result.
-    To make queries faster, change "search_dist."
+    """Perform a spatial join between two input layers.
+
     :param geopandas.GeoDataFrame left_df: A dataframe of Points.
-    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons
-    :param float/int search_dist: parameter (specified in map units) is used to limit
-        the search area for geometries around source points. Smaller -> faster runtime.
-    :return: (*geopandas.GeoDataFrame*) -- A dataframe of Points mapped to each polygon
-        in right_df.
+    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons.
+    :param float/int search_dist: radius (in map units) around point to detect polygons.
+    :return: (*geopandas.GeoDataFrame*) -- data frame of Points mapped to each Polygon.
+
+    .. note:: data from nearest Polygon/Multipolygon will be used as a result if a
+        Point falls outside all available Polygon/Multipolygons.
     """
 
     def _find_nearest(series, polygons, search_dist):
-        """Given a row with a bus id and a Point, find the closest polygon.
+        """Find the closest polygon.
 
         :param pandas.Series series: point to map.
         :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from.
-        :param float search_dist: radius around point to detect polygons in.
+        :param float search_dist: radius around point to detect polygons.
         """
         geom = series[left_df.geometry.name]
         # Get geometries within search distance
@@ -83,17 +81,16 @@ def _find_nearest(series, polygons, search_dist):
 
 
 def points_to_polys(df, name, shpfile, search_dist=0.04):
-    """Given a dataframe which includes 'lat' and 'lon' columns, and a shapefile of
-        Polygons/Multipolygon regions, map df.index to closest regions.
-
-    :param pandas.DataFrame df: includes an index, and 'lat' and 'lon' columns.
-    :param str name: what to name the id (bus, plant, substation, etc)
-    :param str shpfile: name of shapefile containing a collection Polygon/Multipolygon
-        shapes with region IDs.
-    :param float/int search_dist: distance to search from point for nearest polygon.
-    :raises ValueError: if some points are dropped because too far away from polys.
-    :return: (*geopandas.GeoDataFrame*) --
-        columns: index id, (point) geometry, [region, other properties of region]
+    """Map node to closest region.
+
+    :param pandas.DataFrame df: data frame with node id as index and *'lat'* and
+        *'lon'* as columns.
+    :param str name: name of node, e.g., bus, plant, substation, etc.
+    :param str shpfile: shapefile enclosing Polygon/Multipolygon with region id.
+    :param float/int search_dist: radius around point to detect polygons.
+    :raises ValueError: if some points are dropped because too far away from polygons.
+    :return: (*geopandas.GeoDataFrame*) -- columns: id name, (point) geometry,
+        region and properties of region.
     """
     gpd = _check_import("geopandas")
     polys = gpd.read_file(shpfile)
@@ -123,22 +120,19 @@ def points_to_polys(df, name, shpfile, search_dist=0.04):
         err_msg = (
             "Some points dropped because could not be mapped to regions. "
             "Check your lat/lon values to be sure it's in the US. "
-            f"Or increase search_dist if close. Problem ids: {dropped}"
+            f"Or increase search_dist. ids dropped: {dropped}"
         )
         raise ValueError(err_msg)
 
     return pts_poly
 
 
 def bus_to_reeds_reg(df):
-    """Given a dataframe of buses, return a dataframe of bus_id's with associated
-        ReEDS regions (wind resource regions (rs) and BA regions (rb)).
-    Used to map regional generation investment cost multipliers.
-    region_map.csv is from: "/bokehpivot/in/reeds2/region_map.csv".
-    rs/rs.shp is created with :py:func:`write_poly_shapefile`.
-
-    :param pandas.DataFrame df: grid bus dataframe.
-    :return: (*pandas.DataFrame*) -- bus_id map. columns: bus_id, rs, rb
+    """Map bus to ReEDS regions.
+
+    :param pandas.DataFrame df: bus data frame.
+    :return: (*pandas.DataFrame*) -- index: bus id, columns rs (wind resource region)
+        and rb (BA region).
     """
     pts_poly = points_to_polys(
         df, "bus", const.reeds_wind_shapefile_path, search_dist=2
@@ -156,18 +150,15 @@ def bus_to_reeds_reg(df):
 
 
 def bus_to_neem_reg(df):
-    """Given a dataframe of buses, return a dataframe of bus_id's with associated
-        NEEM region, lat, and lon of bus.
-    Used to map regional transmission investment cost multipliers.
-    Shapefile used to map is 'data/NEEM/NEEMregions.shp' which is pulled from Energy
-        Zones `Mapping tool <http://ezmt.anl.gov>`_. This map is overly detailed, so I
-        simplified the shapes using 1 km distance (Douglas-Peucker) method in QGIS.
-
-    :param pandas.DataFrame df: grid.bus instance.
-    :return: (*pandas.DataFrame*) -- bus_id map.
-        columns: bus_id, lat, lon, name_abbr (NEEM region)
-
-    Note: mapping may take a while, especially for many points.
+    """Map bus to NEEM regions.
+
+    :param pandas.DataFrame df: bus data frame.
+    :return: (*pandas.DataFrame*) -- index: bus id, columns: lat, lon, name_abbr
+        (NEEM region)
+
+    .. note:: the shapefile used for mapping is pulled from the Energy Zones `Mapping
+        tool <http://ezmt.anl.gov>`_. This map is overly detailed, so the shapes are
+        simplified using 1 km distance (Douglas-Peucker) method in QGIS.
     """
 
     pts_poly = points_to_polys(df, "bus", const.neem_shapefile_path, search_dist=1)
@@ -184,11 +175,7 @@ def bus_to_neem_reg(df):
 
 
 def write_bus_neem_map():
-    """
-    Maps the bus locations from the base USA grid to NEEM regions.
-    Writes out csv with bus numbers, associated NEEM region, and lat/lon of bus
-        (to check if consistent with bus location in _calculate_ac_inv_costs).
-    """
+    """Write bus location to NEEM region mapping to file"""
     base_grid = Grid(["USA"])
     df_pts_bus = bus_to_neem_reg(base_grid.bus)
     df_pts_bus.sort_index(inplace=True)
@@ -197,10 +184,7 @@ def write_bus_neem_map():
 
 
 def write_bus_reeds_map():
-    """
-    Maps the bus locations from the base USA grid to ReEDS regions.
-    Writes out csv with bus numbers, associated ReEDS regions, and distances.
-    """
+    """Write bus location to ReEDS region mapping to file."""
     base_grid = Grid(["USA"])
     df_pts_bus = bus_to_reeds_reg(base_grid.bus)
     df_pts_bus.sort_index(inplace=True)
@@ -209,17 +193,10 @@ def write_bus_reeds_map():
 
 
 def write_poly_shapefile():
-    """
-    Converts a ReEDS csv-format file to a shapefile. Shouldn't need to run again
-        unless new source data.
-    Right now, hard-coded read ReEDS wind resource regions (labelled rs).
-    gis_rs.csv is from ReEDS open-source: "/bokehpivot/in/gis_rs.csv"
-    hierarchy.csv is from: "/bokehpivot/in/reeds2/hierarchy.csv"
-    writes out the shapefile in "rs/rs.shp"
-
-    Note: These ReEDS wind resource region shapes are approximate. Thus, there are
-        probably some mistakes, but this is currently only used for mapping plant
-        regional multipliers, which are approximate anyway, so it should be fine.
+    """Convert ReEDS wind resource csv-format file to a shapefile.
+
+    .. note:: *gis_rs.csv* is from ReEDS open-source: */bokehpivot/in/gis_rs.csv*,
+        *hierarchy.csv* is from: */bokehpivot/in/reeds2/hierarchy.csv*.
     """
     fiona = _check_import("fiona")
     shapely_geometry = _check_import("shapely.geometry")

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
@@ -14,16 +14,12 @@
 
 
 def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
-    """Given a Scenario object, calculate the total cost of building that scenario's
-    upgrades of lines and transformers.
-    Currently uses NEEM regions to find regional multipliers.
-    Currently ignores financials, but all values are in 2010 $-year.
-    Need to test that there aren't any na values in regional multipliers
-    (some empty parts of table)
+    """Calculate cost of upgrading AC lines and/or transformers in a scenario.
+    NEEM regions are used to find regional multipliers.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param boolean sum_results: if True, sum dataframe for each category.
-    :return: (*dict*) -- Total costs (line costs, transformer costs) (in $2010).
+    :param bool sum_results: sum data frame for each branch type.
+    :return: (*dict*) -- cost of upgrading branches in $2010.
     """
 
     base_grid = Grid(scenario.info["interconnect"].split("_"))
@@ -44,25 +40,24 @@ def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
 
 
 def _calculate_ac_inv_costs(grid_new, sum_results=True):
-    """Given a grid, calculate the total cost of building that grid's
-    lines and transformers.
-    This function is separate from calculate_ac_inv_costs() for testing purposes.
-    Currently counts Transformer and TransformerWinding as transformers.
-    Currently uses NEEM regions to find regional multipliers.
+    """Calculate cost of upgrading AC lines and/or transformers. NEEM regions are
+    used to find regional multipliers. Note that a transformer winding is considered
+    as a transformer.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param boolean sum_results: if True, sum dataframe for each category.
-    :return: (*dict*) -- Total costs (line costs, transformer costs).
+    :param bool sum_results: sum data frame for each branch type.
+    :return: (*dict*) -- cost of upgrading branches in $2010.
     """
 
     def select_mw(x, cost_df):
-        """Given a single branch, determine the closest kV/MW combination and return
-        the corresponding cost $/MW-mi.
-
-        :param pandas.core.series.Series x: data for a single branch
-        :param pandas.core.frame.DataFrame cost_df: DataFrame with kV, MW, cost columns
-        :return: (*pandas.core.series.Series*) -- series of ['MW', 'costMWmi'] to be
-            assigned to given branch
+        """Determine the closest kV/MW combination for a single branch and return
+        the corresponding cost (in $/MW-mi).
+
+        :param pandas.Series x: data for a single branch
+        :param pandas.DataFrame cost_df: data frame with *'kV'*, *'MW'*, *'costMWmi'*
+            as columns
+        :return: (*pandas.Series*) -- series of [*'MW'*, *'costMWmi'*] to be assigned
+            to branch.
         """
 
         # select corresponding cost table of selected kV
@@ -75,9 +70,9 @@ def select_mw(x, cost_df):
     def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         """Determine the regional multiplier based on kV and power (closest).
 
-        :param pandas.core.series.Series x: data for a single transformer.
-        :param pandas.core.frame.DataFrame bus_reg: data frame with bus regions
-        :param pandas.core.frame.DataFrame ac_reg_mult: data frame with regional mults.
+        :param pandas.Series x: data for a single transformer.
+        :param pandas.DataFrame bus_reg: data frame with bus regions.
+        :param pandas.DataFrame ac_reg_mult: data frame with regional multipliers.
         :param set xfmr_lookup_alerted: set of (voltage, region) tuples for which
             a message has already been printed that this lookup was not found.
         :return: (*float*) -- regional multiplier.
@@ -140,8 +135,8 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
     lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
 
     # check that all buses included in this file and lat/long values match,
-    #   otherwise re-run mapping script on mis-matching buses.
-    # these buses are missing in region file
+    # otherwise re-run mapping script on mis-matching buses. These buses are missing
+    # in region file
     bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index
     bus_mask = bus[~bus.index.isin(bus_fix_index)]
     bus_mask = bus_mask.merge(bus_reg, how="left", on="bus_id")
@@ -213,12 +208,11 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
 
 
 def calculate_dc_inv_costs(scenario, sum_results=True):
-    """Given a Scenario object, calculate the total cost of that grid's dc line
-        investment. Currently ignores financials, but all values are in 2015 $-year.
+    """Calculate cost of upgrading HVDC lines in a scenario.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param boolean sum_results: if True, sum Series to return float.
-    :return: (*pandas.Series/float*) -- [Summed] dc line costs.
+    :param bool sum_results: sum series to return total cost.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
     """
     base_grid = Grid(scenario.info["interconnect"].split("_"))
     grid = scenario.state.get_grid()
@@ -235,22 +229,20 @@ def calculate_dc_inv_costs(scenario, sum_results=True):
 
 
 def _calculate_dc_inv_costs(grid_new, sum_results=True):
-    """Given a grid, calculate the total cost of that grid's dc line investment.
-    This function is separate from calculate_dc_inv_costs() for testing purposes.
+    """Calculate cost of upgrading HVDC lines.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param boolean sum_results: if True, sum Series to return float.
-    :return: (*pandas.Series/float*) -- [Summed] dc line costs.
+    :param bool sum_results: sum series to return total cost.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
     """
 
     def _calculate_single_line_cost(line, bus):
-        """Given a series representing a DC line upgrade/addition, and a dataframe of
-        bus locations, calculate this line's upgrade cost.
+        """Calculate cost of upgrading a single HVDC line.
 
-        :param pandas.Series line: DC line series featuring:
-            {"from_bus_id", "to_bus_id", "Pmax"}.
-        :param pandas.Dataframe bus: Bus data frame featuring {"lat", "lon"}.
-        :return: (*float*) -- DC line upgrade cost (in $2015).
+        :param pandas.Series line: HVDC line series featuring *'from_bus_id'*',
+            *'to_bus_id'* and *'Pmax'*.
+        :param pandas.Dataframe bus: bus data frame featuring *'lat'*, *'lon'*.
+        :return: (*float*) -- HVDC line upgrade cost in $2015.
         """
         # Calculate distance
         from_lat = bus.loc[line.from_bus_id, "lat"]
@@ -280,20 +272,19 @@ def _calculate_single_line_cost(line, bus):
 
 
 def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True):
-    """Given a Scenario object, calculate the total cost of building that scenario's
-        upgrades of generation.
-    Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs.
-        Will want to fix for wind/solar (based on resource supply curves).
-    Currently uses ReEDS regions to find regional multipliers.
+    """Calculate cost of upgrading generators in a scenario. ReEDS regions are used to
+    find regional multipliers.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param int/str year: year of builds.
-    :param str cost_case: the ATB cost case of data:
-        'Moderate': mid cost case,
-        'Conservative': generally higher costs,
-        'Advanced': generally lower costs
-    :return: (*pandas.DataFrame*) -- Total generation investment cost summed by
+    :param int/str year: building year.
+    :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case,
+        *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs
+    :return: (*pandas.DataFrame*) -- total generation investment cost summed by
         technology.
+
+    .. todo:: it currently uses one (arbitrary) sub-technology. The rest of the costs
+        are dropped. Wind and solar will need to be fixed based on the resource supply
+        curves.
     """
 
     base_grid = Grid(scenario.info["interconnect"].split("_"))
@@ -322,38 +313,33 @@ def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True):
 
 
 def _calculate_gen_inv_costs(grid_new, year, cost_case, sum_results=True):
-    """Given a grid, calculate the total cost of building that generation investment.
-    Computes total capital cost as CAPEX_total =
-        CAPEX ($/MW) * Pmax (MW) * reg_cap_cost_mult (regional cost multiplier)
-    This function is separate from calculate_gen_inv_costs() for testing purposes.
-    Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs.
-        Will want to fix for wind/solar (based on resource supply curves).
-    Currently uses ReEDS regions to find regional multipliers.
+    """Calculate cost of upgrading generators. ReEDS regions are used to find
+    regional multipliers.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param int/str year: year of builds (used in financials).
-    :param str cost_case: the ATB cost case of data:
-        'Moderate': mid cost case
-        'Conservative': generally higher costs
-        'Advanced': generally lower costs
+    :param int/str year: year of builds.
+    :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case
+        *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs.
     :raises ValueError: if year not 2020 - 2050, or cost case not an allowed option.
-    :raises TypeError: if year gets the wrong type, or if cost_case is not str.
-    :return: (*pandas.Series*) -- Total generation investment cost,
-        summed by technology.
+    :raises TypeError: if year not int/str or cost_case not str.
+    :return: (*pandas.Series*) -- total generation investment cost, summed by
+        technology.
+
+    .. note:: the function computes the total capital cost as:
+        CAPEX_total = CAPEX ($/MW) * Pmax (MW) * regional multiplier
     """
 
     def load_cost(year, cost_case):
-        """
-        Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX).
-            Can be adapted in the future for FOM, VOM, & CAPEX.
-        This data is pulled from the ATB xlsx file Summary pages (saved as csv's).
-        Therefore, currently uses default financials, but will want to create custom
-            financial functions in the future.
+        """Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX).
 
         :param int/str year: year of cost projections.
-        :param str cost_case: the ATB cost case of data
-            (see :py:func:`write_poly_shapefile` for details).
-        :return: (*pandas.DataFrame*) -- Cost by technology/subtype (in $2018).
+        :param str cost_case: ATB cost case of data (see
+        :return: (*pandas.DataFrame*) -- cost by technology/subtype in $2018.
+
+        .. todo:: it can be adapted in the future for FOM, VOM, & CAPEX. This data is
+            pulled from the ATB xlsx file summary pages. Therefore, it currently uses
+            default financials, but will want to create custom financial functions in
+            the future.
         """
         cost = pd.read_csv(const.gen_inv_cost_path)
         cost = cost.dropna(axis=0, how="all")