From abb468c11379a2cf3a608e35e2f01601e9d5de07 Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa <69815622+kevindougherty-noaa@users.noreply.github.com> Date: Mon, 15 Nov 2021 15:48:39 -0500 Subject: [PATCH] Develop minimization plots (#85) --- LAMDA/minimization_plots.py | 138 +++++++++++++ LAMDA/plot_features.py | 285 -------------------------- LAMDA/scripts/LAMDA_stats_workflow.py | 17 +- 3 files changed, 151 insertions(+), 289 deletions(-) create mode 100644 LAMDA/minimization_plots.py delete mode 100644 LAMDA/plot_features.py diff --git a/LAMDA/minimization_plots.py b/LAMDA/minimization_plots.py new file mode 100644 index 0000000..d9c0b95 --- /dev/null +++ b/LAMDA/minimization_plots.py @@ -0,0 +1,138 @@ +import numpy as np +import pandas as pd +from datetime import datetime +import matplotlib.pyplot as plt +from emcpy.plots.plots import LinePlot +from emcpy.plots import CreatePlot + +__all__ = ['plot_minimization'] + + +def _plot_cost_function(df, config, outdir): + """ + Use data from dataframe to plot the cost function. + """ + # Get cycle info + cycle = df.index.get_level_values(0)[-1] + cyclestr = datetime.strftime(cycle, '%Y%m%d%H') + n_cycles = len(np.unique(df.index.get_level_values(0))) + + # Create approriate dataframes + current_cycle_df = df.loc[cycle] + avg_df = df.groupby(level=[1, 2]).mean() + + # Grab data + j = current_cycle_df['J'].to_numpy() + x = np.arange(len(j)) + + avg_j = avg_df['J'].to_numpy() + avg_x = np.arange(len(avg_j)) + + # Create LinePlot objects + cost_plot = LinePlot(x, j) + cost_plot.linewidth = 2 + cost_plot.label = 'Cost' + + avg_cost_plot = LinePlot(avg_x, avg_j) + avg_cost_plot.color = 'tab:red' + avg_cost_plot.linewidth = 2 + avg_cost_plot.label = f'Last {n_cycles} cycles Average' + + # Create Plot + myplot = CreatePlot() + myplot.draw_data([cost_plot, avg_cost_plot]) + myplot.set_yscale('log') + myplot.add_grid() + myplot.set_xlim(0, len(j)-1) + myplot.add_xlabel('Iterations') + myplot.add_ylabel('log (J)') + myplot.add_legend(loc='upper right', + fontsize='large') + + title = f"{config['experiment']} Cost - {config['tm']}" + myplot.add_title(title, loc='left') + myplot.add_title(cyclestr, loc='right', + fontweight='semibold') + + fig = myplot.return_figure() + + savefile = (f"{cyclestr}_{config['experiment']}_" + + f"tm0{config['tm']}_cost_function.png") + plt.savefig(outdir + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def _plot_gnorm(df, config, outdir): + """ + Use date from dataframe to plot gnorm. + """ + # Get cycle info + cycle = df.index.get_level_values(0)[-1] + cyclestr = datetime.strftime(cycle, '%Y%m%d%H') + n_cycles = len(np.unique(df.index.get_level_values(0))) + + # Create approriate dataframes + current_cycle_df = df.loc[cycle] + avg_df = df.groupby(level=[1, 2]).mean() + + # Grab data + gJ = current_cycle_df['gJ'].to_numpy() + gJ = np.log(gJ/gJ[0]) + x = np.arange(len(gJ)) + + avg_gJ = avg_df['gJ'].to_numpy() + avg_gJ = np.log(avg_gJ/avg_gJ[0]) + avg_x = np.arange(len(avg_gJ)) + + # Create LinePlot objects + gnorm = LinePlot(x, gJ) + gnorm.linewidth = 2 + gnorm.label = 'gnorm' + + avg_gnorm = LinePlot(avg_x, avg_gJ) + avg_gnorm.color = 'tab:red' + avg_gnorm.linewidth = 2 + avg_gnorm.linestyle = '--' + avg_gnorm.label = f'Last {n_cycles} cycles Average' + + # Create Plot + myplot = CreatePlot() + myplot.draw_data([gnorm, avg_gnorm]) + # myplot.set_yscale('log') + myplot.add_grid() + myplot.set_xlim(0, len(gJ)-1) + myplot.add_xlabel('Iterations') + myplot.add_ylabel('log (gnorm)') + myplot.add_legend(loc='upper right', + fontsize='large') + + title = f"{config['experiment']} gnorm - {config['tm']}" + myplot.add_title(title, loc='left') + myplot.add_title(cyclestr, loc='right', + fontweight='semibold') + + fig = myplot.return_figure() + + savefile = (f"{cyclestr}_{config['experiment']}_" + + f"tm0{config['tm']}_gnorm.png") + plt.savefig(outdir + savefile, bbox_inches='tight', + pad_inches=0.1) + plt.close('all') + + +def plot_minimization(df, plotting_config, outdir): + """ + Plot minimization plots including gnorm and cost function + and save them to outdir. + + Args: + df : (pandas dataframe) dataframe with appropriate information + from GSI Stat file + plotting_config : (dict) dictionary with information about + minimization period + outdir : (str) path to output diagnostics + """ + + _plot_gnorm(df, plotting_config, outdir) + _plot_cost_function(df, plotting_config, outdir) diff --git a/LAMDA/plot_features.py b/LAMDA/plot_features.py deleted file mode 100644 index 4bf574b..0000000 --- a/LAMDA/plot_features.py +++ /dev/null @@ -1,285 +0,0 @@ -from datetime import datetime -import numpy as np -from textwrap import TextWrapper -import matplotlib.pyplot as plt -from emcpy.plots.plots import Scatter, Histogram, VerticalLine -from emcpy.plots.map_plots import MapScatter -from emcpy.plots import CreateMap, CreatePlot, VariableSpecs -from emcpy.plots.map_tools import Domain, MapProjection -import matplotlib -matplotlib.use('agg') - -__all__ = ['get_obs_type', 'get_bins', 'calculate_stats', - 'get_labels', 'varspecs_name'] - - -def qc_flag_colors(qcflag): - """ - Dictionary of colors to use for specific QC flag numbers. - Args: - qcflag : (int) qc flag number - Returns: - color : (str) color based on qc flag number - """ - qc_flag_colors = { - 0: 'tab:green', - 1: 'black', - 2: 'tab:pink', - 3: 'tab:red', - 4: 'tab:orange', - 5: 'tab:brown', - 6: 'tab:olive', - 7: 'skyblue', - 8: 'tab:blue', - 9: 'maroon', - 10: 'tab:cyan', - 11: 'slateblue', - 12: 'navy', - 13: 'tan', - 14: 'gold', - 15: 'darkcyan', - 50: 'indigo', - 51: 'tab:purple', - 52: 'magenta', - 53: 'cornflowerblue', - 54: 'pink', - 55: 'lightgrey', - 56: 'darkgrey', - 57: 'dimgrey' - } - - return qc_flag_colors[qcflag] - - -def get_obs_type(obs_id): - """ - Grabs the full name of a specific observation. - Args: - obs_id: (list of ints) ID number(s) for conventional - diagnostic - Returns: - list of observation names. If ID in the list, it will return, - the proper name. If no observation ID, returns 'All Observations'. - If ID not in the list, returns list of string of the ID number. - """ - - obs_indicators = { - 120: "Rawinsonde", - 126: "RASS", - 130: "Aircraft: AIREP and PIREP", - 131: "Aircraft: AMDAR", - 132: "Flight-Level Reconnaissance and Profile Dropsonde", - 133: "Aircraft: MDCRS ACARS", - 134: "Aircraft: TAMDAR", - 135: "Aircraft: Canadian AMDAR", - 153: "GPS-Integrated Precipitable Water", - 180: ("Surface Marine w/ Station Pressure (Ship, Buoy, C-MAN, " - "Tide Guage)"), - 181: "Surface Land w/ Station Pressure (Synoptic, METAR)", - 182: "Splash-Level Dropsonde Over Ocean", - 183: "Surface Marine or Land - Missing Station Pressure", - 187: "Surface Land - Missing Station Pressure", - 210: "Synthetic Tropical Cyclone", - 220: "Rawinsonde", - 221: "PIBAL", - 224: "NEXRAD Vertical Azimuth Display", - 228: "Wind Profiler: JMA", - 229: "Wind Profiler: PIBAL", - 230: "Aircraft: AIREP and PIREP", - 231: "Aircraft: AMDAR", - 232: "Flight-Level Reconnaissance and Profile Dropsonde", - 233: "Aircraft: MDCRS ACARS", - 234: "Aircraft: TAMDAR", - 235: "Aircraft: Canadian AMDAR", - 242: ("JMA IR (Longwave) and Visible Cloud Drift Below 850mb " - "(GMS, MTSAT, HIMAWARI)"), - 243: ("EUMETSAT IR (Longwave) and Visible Cloud Drift Below " - " 850mb (METEOSAT)"), - 244: "AVHRR/POES IR (Longwave) Cloud Drift", - 245: "NESDIS IR (Longwave) Cloud Drift (All Levels)", - 246: "NESDIS Imager Water Vapor (All Levels) - Cloud Top (GOES)", - 250: ("JMA Imager Water Vapor (All Levels) - Cloud Top & Deep " - "Layer (GMS, MTSAT, HIMAWARI)"), - 251: "NESDIS Visible Cloud Drift (All Levels) (GOES)", - 252: ("JMA IR (Longwave) and Visible Cloud Drift Above 850mb " - "(GMS, MTSAT, HIMAWARI)"), - 253: ("EUMETSAT IR (Longwave) and Visible Cloud Drift Above " - "850mb (METEOSAT)"), - 254: ("EUMETSAT Imager Water Vapor (All Levels) - Cloud Top " - " & Deep Layer (METEOSAT)"), - 257: ("MODIS/POES IR (Longwave) Cloud Drift (All Levels) " - "(AQUA, TERRA)"), - 258: ("MODIS/POES Imager Water Vapor (All Levels) - Cloud Top " - "(AQUA, TERRA)"), - 259: ("MODIS/POES Imager Water Vapor (All Levels) - Deep Layer " - "(AQUA, TERRA)"), - 280: ("Surface Marine w/ Station Pressure (Ship, Buoy, C-MAN, " - "Tide Guage)"), - 281: "Surface Land w/ Station Pressure (Synoptic, METAR)", - 282: "ATLAS Buoy", - 284: "Surface Marine or Land - Missing Station Pressure", - 287: "Surface Land (METAR) - Missing Station Pressure", - 289: "SUPEROBED (1.0 Lat/Lon) Scatterometer Winds over Ocean", - 290: "Non-SUPEROBED Scatterometer Winds over Ocean" - } - - descripts = list() - if obs_id is None: - return ['All Observations'] - - else: - for ids in obs_id: - if ids in obs_indicators.keys(): - descripts.append(obs_indicators[ids]) - - else: - descripts.append(str(ids)) - - return descripts - - -def get_bins(eval_type, stats): - """ - Calculates bins to use for histogram plot. - Args: - eval_type : (str) determines how to create bins - whether 'diff' or 'magnitude' - stats : (dict) dictionary of stats from - `calculate_stats()` - Returns: - bins : (array-like) - """ - binsize = stats['Std']/5 - - if eval_type == 'diff': - bins = np.arange(0-(4*stats['Std']), - 0+(4*stats['Std']), - binsize) - - else: - bins = np.arange(stats['Mean']-(4*stats['Std']), - stats['Mean']+(4*stats['Std']), - binsize) - - return bins - - -def calculate_stats(data): - """ - Calculates n, mean, min, max, - standard deviation, and RMSE. - Returns dictionary with stats. - Args: - data : (array-like) array of data to calculate - stats - Returns: - stats : (dict) dictionary of stats - """ - - n = np.count_nonzero(~np.isnan(data)) - - mean = np.nanmean(data) - std = np.nanstd(data) - mx = np.nanmax(data) - mn = np.nanmin(data) - - rmse = np.sqrt(np.nanmean(np.square(data))) - - stats = {'Nobs': n, - 'Min': np.round(mn, 3), - 'Max': np.round(mx, 3), - 'Mean': np.round(mean, 3), - 'Std': np.round(std, 3), - 'RMSE': np.round(rmse, 3) - } - - return stats - - -def get_labels(metadata): - """ - Creates a dictionary of title, date title, and the save - file name using information from the metadata. - Args: - metadata : (dict) metadata dictionary created by diags.py - Returns: - labels : (dict) dictionary of labels - """ - - var = metadata['Satellite'] if 'Satellite' in metadata \ - else metadata['Variable'] - - # Get title and save file name - if metadata['Anl Use Type'] is not None: - title = (f"{metadata['Obs Type']}: {var} - {metadata['Diag Type']}" - f" - Data {metadata['Anl Use Type']}") - - save_file = (f"{metadata['Date']:%Y%m%d%H}_{metadata['Obs Type']}_" - f"{var}_{metadata['Diag Type']}_" - f"{metadata['Anl Use Type']}_") - - else: - title = f"{metadata['Obs Type']}: {var} - {metadata['Diag Type']}" - save_file = (f"{metadata['Date']:%Y%m%d%H}_{metadata['Obs Type']}_" - f"{var}_{metadata['Diag Type']}_") - - # Adds on specific obsid, channel, or layer info to title/save file - if metadata['Diag File Type'] == 'conventional': - title = title + '\n%s' % '\n'.join(metadata['ObsID Name']) - save_file = save_file + '%s' % '_'.join( - str(x) for x in metadata['ObsID']) - - elif metadata['Diag File Type'] == 'radiance': - if metadata['Channels'] == 'All Channels': - title = title + '\nAll Channels' - save_file = save_file + '_All_Channels' - - else: - title = title + '\nChannels: %s' % ', '.join( - str(x) for x in metadata['Channels']) - save_file = save_file + 'channels_%s' % '_'.join( - str(x) for x in metadata['Channels']) - - else: - layer = metadata['Layer'] - title = title + f'\nLayer: {layer}' - save_file = save_file + '%s' % layer - - # Get date label - date_title = metadata['Date'].strftime("%d %b %Y %Hz") - - labels = { - 'title': title, - 'date title': date_title, - 'save file': save_file - } - - return labels - - -def varspecs_name(variable): - """ - Grabs the specific variable name to utlitize emcpy's VariableSpecs. - Args: - variable : (str) variable name - Returns: - spec_variable : (str) name of spec variable - """ - vardict = { - 'temperature': ['air temperature', 'tmp', 'temp', 't'], - 'specific humidity': ['q', 'spfh'], - 'u': ['eastward wind', 'ugrd', 'zonal wind'], - 'v': ['northward wind', 'vgrd', 'meridional wind'], - 'wind speed': ['windspeed'], - 'brightness temperature': ['bt'], - 'integrated layer ozone in air': ['ozone', 'o3'] - } - - # makes lower case and replaces underscore with space - spec_variable = variable.lower().replace('_', ' ') - - for key in vardict.keys(): - spec_variable = key if spec_variable in vardict[key] \ - else spec_variable - - return spec_variable diff --git a/LAMDA/scripts/LAMDA_stats_workflow.py b/LAMDA/scripts/LAMDA_stats_workflow.py index 427926b..157008f 100644 --- a/LAMDA/scripts/LAMDA_stats_workflow.py +++ b/LAMDA/scripts/LAMDA_stats_workflow.py @@ -99,13 +99,15 @@ def plotting(config, data_dict, outdir, data_type, ob_type): plot_dict[plot_type](fits_df, plotting_config, outdir) -def create_minimization_plots(data_dict, outdir): +def create_minimization_plots(data_dict, experiment_name, outdir): """ Since it is stand alone from other plotting scripts, this functions purpose is to generate the minimization plots. Args: data_dir : (dict) dictionary that includes fits2 data + experiment_name : (str) the type of experiment i.e. + FV3LAMDA, LAMDAX, etc. outdir : (str) path to where figures should be outputted """ @@ -123,8 +125,14 @@ def create_minimization_plots(data_dict, outdir): fits2_df = concatenate_dfs(fits2_data, 'cost', cycles, data_type='cost') + # Get plotting information + plotting_config = { + 'tm': tm, + 'experiment': experiment_name + } + # Create plot by calling plotting script - minimization_plots(fits2_df, outdir) + minimization_plots(fits2_df, plotting_config outdir) def stats_workflow(config_yaml, nprocs, outdir): @@ -147,8 +155,9 @@ def stats_workflow(config_yaml, nprocs, outdir): statdir = config_yaml['stat']['stat dir'] data_type = config_yaml['stat']['data type'] + experiment_name = config_yaml['stat']['experiment name'] ob_type = config_yaml['stat']['ob type'] - plot_types = config_yaml['plot types'] + plot_types = config_yaml['stat']['plot types'] # Grabs all subdirectories of stats files (in date subdirectory) subdirs = sorted([f.path for f in os.scandir(statdir) if f.is_dir()]) @@ -169,7 +178,7 @@ def stats_workflow(config_yaml, nprocs, outdir): # remove from plot list and then call function to plot if 'minimization' in plot_types: plot_types.remove('minimization') - create_minimization_plots(data_dict, outdir) + create_minimization_plots(data_dict, experiment_name, outdir) # Create multiprocessing lists if data_type == 'conventional':