Skip to content

Commit

Permalink
Improvement of the data management and naming of containers files (#181)
Browse files Browse the repository at this point in the history
* cleaner way to get the extractor string asociated to extractor kwargs provided
-> not anymore sensitive to the order of kargs given by user
-> It deals smoothly If user set kwargs to default value

* The search of saved containers (SPE,Charge,wfs) is more robust
-> Exception raised and cautch properly when file no found

* propagation of changes in makers

* script updates + fix of broken photostat code

* update of shell scripts

---------

Co-authored-by: guillaume.grolleron <[email protected]>
  • Loading branch information
guillaumegrolleron and guillaume.grolleron authored Feb 10, 2025
1 parent 6d1ebad commit f2df538
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 164 deletions.
27 changes: 18 additions & 9 deletions notebooks/tool_implementation/tuto_photostat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@
import os
import pathlib

import matplotlib.pyplot as plt

from nectarchain.data.management import DataManagement
from nectarchain.makers.calibration import PhotoStatisticNectarCAMCalibrationTool
from nectarchain.makers.extractor.utils import CtapipeExtractor

logging.basicConfig(
format="%(asctime)s %(name)s %(levelname)s %(message)s", level=logging.INFO
)
log = logging.getLogger(__name__)
log.handlers = logging.getLogger("__main__").handlers

import matplotlib.pyplot as plt

from nectarchain.data.management import DataManagement
from nectarchain.makers.calibration import PhotoStatisticNectarCAMCalibrationTool
from nectarchain.makers.extractor.utils import CtapipeExtractor

# %%
extractor_kwargs = {"window_width": 12, "window_shift": 4}
Expand All @@ -42,23 +43,31 @@
FF_run_number = 3937

# %%
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(extractor_kwargs)
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
method=method, extractor_kwargs=extractor_kwargs
)
path = DataManagement.find_SPE_HHV(
run_number=HHV_run_number,
method=method,
str_extractor_kwargs=str_extractor_kwargs,
)
if len(path) == 1:
log.info(
f"{path[0]} found associated to HHV run {HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
f"{path[0]} found associated to HHV run {HHV_run_number},"
f"method {method} and extractor kwargs {str_extractor_kwargs}"
)
else:
_text = f"no file found in $NECTARCAM_DATA/../SPEfit associated to HHV run {HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
_text = (
f"no file found in $NECTARCAM_DATA/../SPEfit associated to HHV run"
f"{HHV_run_number}, method {method} and extractor kwargs {str_extractor_kwargs}"
)
log.error(_text)
raise FileNotFoundError(_text)

# %% [markdown]
# WARNING : for now you can't split the event loop in slice for the Photo-statistic method, however in case of the charges havn't been computed on disk, the loop over events will only store the charge, therefore memory errors should happen rarely
# WARNING : for now you can't split the event loop in slice for the Photo-statistic
# method, however in case of the charges havn't been computed on disk, the loop over
# events will only store the charge, therefore memory errors should happen rarely

# %%
tool = PhotoStatisticNectarCAMCalibrationTool(
Expand Down
142 changes: 87 additions & 55 deletions src/nectarchain/data/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def __get_GRID_location_ELog(
break

if i == len(lines) - 1:
e = Exception("lfns not found on GRID")
e = FileNotFoundError("lfns not found on GRID")
log.error(e, exc_info=True)
log.debug(lines)
raise e
Expand Down Expand Up @@ -283,16 +283,17 @@ def find_photostat(
ped_method="FullWaveformSum",
str_extractor_kwargs="",
):
full_file = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/PhotoStat/"
f"PhotoStatisticNectarCAM_FFrun{FF_run_number}_{FF_method}"
f"_{str_extractor_kwargs}_Pedrun{ped_run_number}_{ped_method}.h5"
).__str__()
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/PhotoStat/"
f"PhotoStatisticNectarCAM_FFrun{FF_run_number}_{FF_method}"
f"_{str_extractor_kwargs}_Pedrun{ped_run_number}_{ped_method}.h5"
)
full_file = glob.glob(str(path))
log.debug("for now it does not check if there are files with max events")
if len(full_file) != 1:
raise Exception(f"the files is {full_file}")
raise FileNotFoundError(
f"When looking for {str(path)} : the found files are {full_file}"
)
return full_file

@staticmethod
Expand Down Expand Up @@ -328,67 +329,98 @@ def find_SPE_HHV(
):
keyword = kwargs.get("keyword", "FlatFieldSPEHHV")
std_key = "" if free_pp_n else "Std"
full_file = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}*_{method}"
f"_{str_extractor_kwargs}.h5"
).__str__()
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}*_{method}"
f"_{str_extractor_kwargs}.h5"
)
# need to improve the files search !!
# -> unstable behavior with SPE results computed
# with maxevents not to None
if len(full_file) != 1:
all_files = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"FlatFieldSPEHHVStdNectarCAM_run{run_number}_maxevents*_"
f"{method}_{str_extractor_kwargs}.h5"
).__str__()
full_file = glob.glob(str(path))
if len(full_file) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")
elif len(full_file) > 1:
log.debug(f"Several files found for {str(path)} : {full_file}")
for file in full_file:
if "maxevents" not in file:
log.debug(
f"File found with the most important"
f"number of events for {str(path)} : {file}"
)
return file
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/SPEfit/"
f"{keyword}{std_key}NectarCAM_run{run_number}_maxevents*_"
f"{method}_{str_extractor_kwargs}.h5"
)
max_events = 0
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
max_events = _max_events
index = i
return [all_files[index]]
all_files = glob.glob(str(path))
if len(all_files) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")
else:
log.debug(f"Files found for {str(path)} : {all_files}")
max_events = 0
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
max_events = _max_events
index = i
log.debug(f"Best file found : {all_files[index]}")
return [all_files[index]]
else:
log.debug(f"File found for {str(path)} : {full_file}")
return full_file

@staticmethod
def __find_computed_data(
run_number, max_events=None, ext=".h5", data_type="waveforms"
):
out = glob.glob(
pathlib.Path(
if max_events is not None:
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}_maxevents*{ext}"
)
else:
path = pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}{ext}"
).__str__()
)
if not (max_events is None):
all_files = glob.glob(
pathlib.Path(
f"{os.environ.get('NECTARCAMDATA','/tmp')}/runs/"
f"{data_type}/*_run{run_number}_maxevents*{ext}"
).__str__()
)
best_max_events = np.inf
best_index = None
for i, file in enumerate(all_files):
data = file.split("/")[-1].split(".h5")[0].split("_")
out = glob.glob(str(path))
if len(out) == 0:
raise FileNotFoundError(f"No file found looking for {str(path)}")
elif len(out) > 1:
if max_events is None:
raise FileExistsError(f"Several files found for {str(path)} : {out}")
else:
log.debug(
f"Several files found for {str(path)} : {out},"
f"will look for the most complete one"
)
best_max_events = np.inf
best_index = None
for i, file in enumerate(out):
data = file.split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
if _max_events < best_max_events:
best_max_events = _max_events
best_index = i
if best_index is not None:
out = [out[best_index]]
else:
if max_events is not None:
data = out[0].split("/")[-1].split(".h5")[0].split("_")
for _data in data:
if "maxevents" in _data:
_max_events = int(_data.split("maxevents")[-1])
break
if _max_events >= max_events:
if _max_events < best_max_events:
best_max_events = _max_events
best_index = i
if not (best_index is None):
out = [all_files[best_index]]
if _max_events < max_events:
raise FileNotFoundError(
f"File found for {str(path)} : {out[0]} has less events "
f"than max_events asked {max_events}"
)
return out
62 changes: 37 additions & 25 deletions src/nectarchain/makers/calibration/gain/flatfield_spe_makers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ....data.container import ChargesContainer, ChargesContainers
from ....data.container.core import merge_map_ArrayDataContainer
from ....data.management import DataManagement
from ....utils.error import TooMuchFileException
from ...component import ArrayDataComponent, NectarCAMComponent
from ...extractor.utils import CtapipeExtractor
from .core import GainNectarCAMCalibrationTool
Expand Down Expand Up @@ -46,25 +47,37 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if not (self.reload_events):
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
if len(files) == 1:
try:
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
if len(files) == 1:
log.warning(
"You asked events_per_slice but you don't want to\
reload events and a charges file is on disk, \
then events_per_slice is set to None"
)
self.events_per_slice = None
else:
raise TooMuchFileException("No single charges file found")
except (FileNotFoundError, TooMuchFileException) as e:
log.warning(e)
log.warning(
"You asked events_per_slice but you don't want to reload events and\
a charges file is on disk, then events_per_slice is set to None"
"You will not be able to reload charges from\
disk when start() call"
)
self.events_per_slice = None

def _init_output_path(self):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if self.events_per_slice is None:
ext = ".h5"
Expand Down Expand Up @@ -94,14 +107,19 @@ def start(
**kwargs,
):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
)
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
extractor_kwargs=self.extractor_kwargs,
)
try:
files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
except Exception as e:
log.warning(e)
files = []
if self.reload_events or len(files) != 1:
if len(files) != 1:
self.log.info(
Expand Down Expand Up @@ -135,7 +153,7 @@ def start(
self.components[
0
]._chargesContainers = merge_map_ArrayDataContainer(
chargesContainers
next(chargesContainers)
)
else:
self.log.info("merging along slices")
Expand All @@ -152,12 +170,6 @@ def start(
)

def _write_container(self, container: Container, index_component: int = 0) -> None:
# if isinstance(container,SPEfitContainer) :
# self.writer.write(table_name = f"{self.method}_
# {CtapipeExtractor.get_extractor_kwargs_str(self.extractor_kwargs)}",
# containers = container,
# )
# else :
super()._write_container(container=container, index_component=index_component)


Expand Down
32 changes: 21 additions & 11 deletions src/nectarchain/makers/calibration/gain/photostat_makers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def __init__(self, *args, **kwargs):

def _init_output_path(self):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
method=self.method,
extractor_kwargs=self.extractor_kwargs,
)
if self.max_events is None:
filename = (
Expand Down Expand Up @@ -96,18 +97,27 @@ def start(
**kwargs,
):
str_extractor_kwargs = CtapipeExtractor.get_extractor_kwargs_str(
self.extractor_kwargs
)
FF_files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
Ped_files = DataManagement.find_charges(
run_number=self.Ped_run_number,
max_events=self.max_events,
extractor_kwargs=self.extractor_kwargs,
)
try:
FF_files = DataManagement.find_charges(
run_number=self.run_number,
method=self.method,
str_extractor_kwargs=str_extractor_kwargs,
max_events=self.max_events,
)
except Exception as e:
self.log.warning(e)
FF_files = []
try:
Ped_files = DataManagement.find_charges(
run_number=self.Ped_run_number,
max_events=self.max_events,
)
except Exception as e:
self.log.warning(e)
Ped_files = []
if self.reload_events or len(FF_files) != 1 or len(Ped_files) != 1:
if len(FF_files) != 1 or len(Ped_files) != 1:
self.log.info(
Expand Down
Loading

0 comments on commit f2df538

Please sign in to comment.