Skip to content

Commit

Permalink
metrics update
Browse files Browse the repository at this point in the history
  • Loading branch information
deronsmith committed Feb 22, 2024
1 parent b19b8f4 commit 4f45460
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 57 deletions.
2 changes: 1 addition & 1 deletion .idea/cyan_waterbody.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ ARG CONDA_ENV="base"
ARG GDAL_VERSION=3.7.1

COPY environment.yml /src/environment.yml
RUN micromamba install -n $CONDA_ENV -f /src/environment.yml
RUN micromamba clean -p -t -l --trash -y
RUN micromamba install -n $CONDA_ENV -f /src/environment.ymlRUN micromamba clean -p -t -l --trash -y

COPY . /src/

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- fiona=1.9.4
- libspatialindex
- flask-cors=4.0.0
- xhtml2pdf
- xhtml2pdf=0.2.6
- plotly=5.16.1
- python-kaleido
- celery=5.3.4
Expand Down
145 changes: 98 additions & 47 deletions flaskr/metrics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from flaskr.utils import convert_dn
from flaskr.geometry import get_waterbody_fids, get_waterbody_by_fids
from flaskr.db import get_waterbody_data
import time


def get_data(objectids, start_date: datetime, end_date: datetime, wb_dict: dict = None):
start_day = start_date.timetuple().tm_yday
end_day = end_date.timetuple().tm_yday
wb_data = []
for oid in objectids:
if wb_dict is not None:
data = wb_dict
else:
data = get_waterbody_data(objectid=oid, start_year=end_date.year, start_day=end_day, end_year=start_date.year, end_day=start_day, ranges=None, non_blooms=True)
for datestr, values in data.items():
date_split = datestr.split(" ")
data_date = datetime(year=int(date_split[0]), month=1, day=1) + timedelta(days=int(date_split[1]) - 1)
results = {"OBJECTID": str(oid), "date": data_date}
for i, v in enumerate(values):
results[f"DN={i}"] = int(v)
wb_data.append(results)
df = pd.DataFrame(wb_data)
return df


def calculate_metrics(objectids: list, year: int, day: int, historic_days: int = 30, summary: bool = True, report: bool = False, wb_dict: dict = None):
"""
Calculate waterbody metrics as defined in publications. Magnitude is currently commented out in the output as the publication has not yet been cleared.
Expand All @@ -26,23 +47,7 @@ def calculate_metrics(objectids: list, year: int, day: int, historic_days: int =
day = today.timetuple().tm_yday
start_date = datetime(year=year, month=1, day=1) + timedelta(days=day - 1)
end_date = start_date - timedelta(days=historic_days)
start_day = start_date.timetuple().tm_yday
end_day = end_date.timetuple().tm_yday

wb_data = []
for oid in objectids:
if wb_dict is not None:
data = wb_dict
else:
data = get_waterbody_data(objectid=oid, start_year=end_date.year, start_day=end_day, end_year=start_date.year, end_day=start_day, ranges=None, non_blooms=True)
for datestr, values in data.items():
date_split = datestr.split(" ")
data_date = datetime(year=int(date_split[0]), month=1, day=1) + timedelta(days=int(date_split[1]) - 1)
results = {"OBJECTID": str(oid), "date": data_date}
for i, v in enumerate(values):
results[f"DN={i}"] = int(v)
wb_data.append(results)
df = pd.DataFrame(wb_data)
df = get_data(objectids=objectids, start_date=start_date, end_date=end_date, wb_dict=wb_dict)
detect_columns = [f"DN={i}" for i in range(1, 254)]
all_columns = [f"DN={i}" for i in range(0, 254)]

Expand All @@ -59,15 +64,15 @@ def calculate_metrics(objectids: list, year: int, day: int, historic_days: int =
results["Extent by Waterbody"] = extent_wb
# results["Magnitude by Waterbody"] = magnitude_wb
# results["Area Normalized Magnitude"] = area_normalized
# results["Chia Normalized Magnitude"] = chia_normalized
results["Chia Normalized Magnitude"] = chia_normalized
results["Metadata"] = {
"Period": f"{historic_days} days",
"Timestep": "daily",
"Frequency Units": "%",
"Extent Units": "%",
# "Magnitude Units": "cell concentration",
# "Area Normalized Magnitude Units": "cells/km^2",
# "Chia Normalized Magnitude Units": "kg*km^-2"
"Chia Normalized Magnitude Units": "kg*km^-2"
}
else:
if summary:
Expand All @@ -77,55 +82,86 @@ def calculate_metrics(objectids: list, year: int, day: int, historic_days: int =
results["extent_wb"] = extent_wb
# results["magnitude_wb"] = magnitude_wb
# results["area_normalized_magnitude"] = area_normalized
# results["chia_normalized_magnitude"] = chia_normalized
results["chia_normalized_magnitude"] = chia_normalized
results["metadata"] = {
"period": f"{historic_days} days",
"timestep": "daily",
"frequency_units": "%",
"extent_units": "%",
# "magnitude_units": "cell concentration",
# "area_normalized_magnitude_units": "cells/km^2",
# "chia_normalized_magnitude_units": "kg*km^-2"
"chia_normalized_magnitude_units": "kg*km^-2"
}
t1 = time.time()
print(f"Metric calculation runtime: {round(t1 - t0, 3)} sec")
return results


def calculate_frequency(data: pd.DataFrame, detect_columns: list, all_columns: list):
# calculate frequency of detection, for collection of waterbody and individual waterbodies
# spatial extent: = n of pixels with detectable CI / n of valid pixels
"""
For the timespan of observations, the average lake-scale bloom frequencies are computed by averaging the
pixel-scale bloom frequencies for all pixels contained with a lake.
:param data:
:param detect_columns: The list of column names which correspond to detection DN, DN=1:253
:param all_columns: The entire list of valid pixel columns, DN=0-254
:return:
"""
# detect
# valid pixel DN=[0:253]

detections = data[detect_columns].sum(axis=0).sum()
all_cells = data[all_columns].sum(axis=0).sum()
frequency = round(detections.sum() / all_cells.sum(), 4)
# For all the dates in the range,
# detections = data[detect_columns].sum(axis=0).sum()
# all_cells = data[all_columns].sum(axis=0).sum()
# frequency = round(detections.sum() / all_cells.sum(), 4)

# For all dates in the timespan, how many dates was there any detection.
# That count is divided by the total number of days.
detections0 = np.count_nonzero(data[detect_columns].sum(axis=1).to_numpy())
all_cells0 = data[all_columns].sum(axis=1).size
frequency = round(100 * (detections0 / all_cells0), 2)

# wb_detections = data.groupby(by='OBJECTID')[detect_columns].sum().sum(axis=1)
# wb_all_cells = data.groupby(by='OBJECTID')[all_columns].sum().sum(axis=1)
# wb_frequency = dict(wb_detections / wb_all_cells)

# _wb_frequency = {}
# for k, v in wb_frequency.items():
# _wb_frequency[int(k)] = round(v * 100, 2)

wb_detections = data.groupby(by='OBJECTID')[detect_columns].sum().sum(axis=1)
wb_all_cells = data.groupby(by='OBJECTID')[all_columns].sum().sum(axis=1)
wb_frequency = dict(wb_detections / wb_all_cells)
_wb_frequency = {}
for k, v in wb_frequency.items():
_wb_frequency[int(k)] = round(v * 100, 4)
for object_id, y in data.groupby(by='OBJECTID')[detect_columns]:
_wb_frequency[int(object_id)] = round(100 * (np.count_nonzero(y.sum(axis=1).to_numpy())/y.shape[0]), 2)

return frequency, _wb_frequency


def calculate_extent(data: pd.DataFrame, detect_columns: list, all_columns: list):

# Extent is the average extent of detections over the timespan.
# Calculated by the average of (# of detection pixels on that date)/(total # of pixels) over the timespan.
detections = data[detect_columns].sum(axis=1)
all_cells = data[all_columns].sum(axis=1)
extent = round(100 * (detections / all_cells).mean(), 4)
extent_i = (detections / all_cells).to_numpy()
extent = extent_i[extent_i.nonzero()]
extent_mean = np.round(100 * np.mean(extent), 2)

objectids = [int(oid) for oid in list(data.OBJECTID.unique())]

oid_groups = data.groupby(by='OBJECTID')
# objectids = [int(oid) for oid in list(data.OBJECTID.unique())]
#
# oid_groups = data.groupby(by='OBJECTID')
wb_extent = {}
for oid in objectids:
oid_df = oid_groups.get_group(str(oid))
detects = oid_df[detect_columns].sum(axis=1)
all_cells = oid_df[all_columns].sum(axis=1)
wb_extent[oid] = round(100 * (detects / all_cells).mean(), 4)
return extent, wb_extent
for oid, data in data.groupby(by='OBJECTID'):
detects = data[detect_columns].sum(axis=1)
oid_cells = data[all_columns].sum(axis=1)
oid_extent_i = (detects/oid_cells).to_numpy()
oid_extent = np.round(100 * np.mean(oid_extent_i[oid_extent_i.nonzero()]), 2)
oid_extent = oid_extent if not np.isnan(oid_extent) else 0.0
wb_extent[int(oid)] = oid_extent
# for oid in objectids:
# oid_df = oid_groups.get_group(str(oid))
# detects = oid_df[detect_columns].sum(axis=1)
# all_cells = oid_df[all_columns].sum(axis=1)
# wb_extent[oid] = round(100 * (detects / all_cells).mean(), 2)
return extent_mean, wb_extent


def calculate_chla(ci):
Expand All @@ -134,19 +170,32 @@ def calculate_chla(ci):

def calculate_magnitude(data: pd.DataFrame, detect_columns: list, all_columns: list):
# bloom magnitude = 1/M * SUM(m=1->M) * 1/T * SUM(t=1->T) * SUM(p=1->P) * CI_cyano
# bloom magnitude = 1/M * SUM(m=1->M) * 1/T * SUM(t=1->T) * (convert_dn(dn) * dn_count)
# p: represent the number of valid pixels in a lake or waterbody
# t: the number of composite time sequence in a season or annual study period
# M: the number of months in a season or annual
# Bloom Magnitude

# Equation 2 from magnitude publication
# mean bloom magnitude = a_p/A_lake * 1/M * SUM(m=1->M) * 1/T * SUM(t=1->T) * SUM(p=1->P) CI(p,t,m)
# mean bloom magnitude = (area_pixel / Area of lake)*(1/Number of Months)* For each month(m) in the timespan *
# (1/Number of timesteps in month) * For each timestep(t) in month * For each pixel(p) in the waterbody : CI(p,t,m)

# One date/step of the histogram represents the count of each pixel DN within the waterbody -> equivalent to Sum
# of (DN_count) * (DN_value)

# SUM(t=1->T) is the sum of all steps in the datespan.
# SUM(m=1->M) is not utilized in this instance as the 'period' is always singular (only calculating for a single
# timespan).

CI_data = data[detect_columns]
CI_values = [convert_dn(i) for i in range(1, 254)]
magnitude = data[detect_columns] * CI_values
T = int(data.shape[0]/data['OBJECTID'].nunique())
magnitude = CI_data * CI_values
magnitude['OBJECTID'] = data['OBJECTID']
magnitude_wb = dict(magnitude.groupby(by='OBJECTID').sum().sum(axis=1))
magnitude_wb = dict(magnitude.groupby(by='OBJECTID').sum().sum(axis=1)/T)
wb_magnitude_update = {}
for k, v in magnitude_wb.items():
wb_magnitude_update[int(k)] = round(v, 4)
wb_magnitude_update[int(k)] = round(v, 2)

waterbody_fids = get_waterbody_fids(return_dict=True)

Expand All @@ -156,9 +205,11 @@ def calculate_magnitude(data: pd.DataFrame, detect_columns: list, all_columns: l

for comid in objectids:
wb_data = get_waterbody_by_fids(waterbody_fids[comid])
area_normalized_magnitude[int(comid)] = round(magnitude_wb[str(comid)] / wb_data[0][0]['properties']['AREASQKM'], 4)
wb_area = wb_data[0][0]['properties']['AREASQKM']
pixel_area = 0.9 # 900sqmeters -> 0.9sqkm
area_normalized_magnitude[int(comid)] = round((pixel_area/wb_area) * magnitude_wb[str(comid)], 2)

chia_area_normalized_bloom = {}
for k, v in area_normalized_magnitude.items():
chia_area_normalized_bloom[int(k)] = round(595.8 * v, 4)
chia_area_normalized_bloom[int(k)] = round(calculate_chla(v), 2)
return wb_magnitude_update, area_normalized_magnitude, chia_area_normalized_bloom
11 changes: 7 additions & 4 deletions flaskr/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
Expand Down Expand Up @@ -181,7 +182,7 @@ def generate_report(
'frequency': wb_metrics["Frequency by Waterbody"][objectid],
# 'magnitude': wb_metrics["Magnitude by Waterbody"][objectid],
# 'mag_area_norm': wb_metrics["Area Normalized Magnitude"][objectid],
# 'chia_area_norm': wb_metrics["Chia Normalized Magnitude"][objectid],
'chia_area_norm': wb_metrics["Chia Normalized Magnitude"][objectid],
'p_days': report_day}) for objectid in wbs
]
for r in results_objects:
Expand All @@ -198,7 +199,7 @@ def generate_report(
frequency=wb_metrics["Frequency by Waterbody"][objectid],
# magnitude=wb_metrics["Magnitude by Waterbody"][objectid],
# mag_area_norm=wb_metrics["Area Normalized Magnitude"][objectid],
# chia_area_norm=wb_metrics["Chia Normalized Magnitude"][objectid],
chia_area_norm=wb_metrics["Chia Normalized Magnitude"][objectid],
p_days=report_day)
wbs_html[i_name] = i_html
# TODO: Add sorting by magnitude option
Expand Down Expand Up @@ -229,7 +230,7 @@ def generate_report(
'frequency': group_metrics["Frequency by Waterbody"][objectid],
# 'magnitude': group_metrics["Magnitude by Waterbody"][objectid],
# 'mag_area_norm': group_metrics["Area Normalized Magnitude"][objectid],
# 'chia_area_norm': group_metrics["Chia Normalized Magnitude"][objectid],
'chia_area_norm': group_metrics["Chia Normalized Magnitude"][objectid],
'p_days': report_day}) for objectid in ids
]
for r in results_objects:
Expand All @@ -246,7 +247,7 @@ def generate_report(
frequency=group_metrics["Frequency by Waterbody"][objectid],
# magnitude=group_metrics["Magnitude by Waterbody"][objectid],
# mag_area_norm=group_metrics["Area Normalized Magnitude"][objectid],
# chia_area_norm=group_metrics["Chia Normalized Magnitude"][objectid],
chia_area_norm=group_metrics["Chia Normalized Magnitude"][objectid],
p_days=report_day
)
wbs_html[i_name] = i_html
Expand Down Expand Up @@ -584,6 +585,7 @@ def get_report_waterbody_raster(objectid: int, report_id: str, day: int, year: i
# plt.show()
plt.axis('off')
plt.savefig(image_path)
fig.clf()
plt.close(fig)
return image_path

Expand Down Expand Up @@ -683,6 +685,7 @@ def get_waterbody_collection_raster(groupname: str, grouptype: str, group_id: st
# plt.show()
plt.tight_layout()
plt.savefig(image_path, dpi=140)
fig.clf()
plt.close('all')
return image_path

Expand Down
2 changes: 1 addition & 1 deletion flaskr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def p_get_geometry_bounds(objectid, day, year):

def convert_dn(dn, round=2):
if type(dn) == int or type(dn) == np.int64:
np.int(64)
np.int_(64)
if dn == 0:
return 0
if dn > 255:
Expand Down
15 changes: 15 additions & 0 deletions tests/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import unittest


def get_columns():
detect_columns = [f"DN={i}" for i in range(1, 254)]
all_columns = [f"DN={i}" for i in range(0, 254)]
return detect_columns, all_columns

def get_data()


class MetricsCalculationMethods(unittest.TestCase):

def test_frequency_calculation(self):
detect_columns, all_columns = get_columns()

0 comments on commit 4f45460

Please sign in to comment.