From 21b03fdcb04d3e7196e9ccde06ef1aa2965e8864 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:57:05 +0100 Subject: [PATCH 1/8] add conftest file --- conftest.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 conftest.py diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..1287a021 --- /dev/null +++ b/conftest.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.fixture(scope="session") +def pipeline_testdir(tmp_path_factory): + return tmp_path_factory.mktemp("test_pipeline") From 8151f43be830749e59f782e45f2485f00bcd942a Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:57:26 +0100 Subject: [PATCH 2/8] Add curstom marker for integration tests --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index a97dd4b4..8a051d17 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,6 +10,7 @@ show-response = 1 minversion=3.0 norecursedirs=build docs/_build addopts = -v +markers = integration: integration test [aliases] test=pytest From 5ed7b80eed0b11c17a43757f2c3043d7bc630ec8 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:58:16 +0100 Subject: [PATCH 3/8] Add CTA South test analysis configuration --- protopipe/scripts/tests/test_config_analysis_south.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protopipe/scripts/tests/test_config_analysis_south.yaml b/protopipe/scripts/tests/test_config_analysis_south.yaml index 6a9de344..448d6899 100644 --- a/protopipe/scripts/tests/test_config_analysis_south.yaml +++ b/protopipe/scripts/tests/test_config_analysis_south.yaml @@ -9,7 +9,7 @@ General: # WARNING: for simulations containing multiple copies of the telescopes, # only 'full_array' or custom list are supported options! array: full_array - cam_id_list : ['LSTCam', 'NectarCam'] # Selected cameras (disabled option) + cam_id_list : ['LSTCam', 'FlashCam', 'CHEC'] # Selected cameras (disabled option) # Cleaning for reconstruction ImageCleaning: From 62a0caab8e0a7871875164d14cee597e754ffd25 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:58:55 +0100 Subject: [PATCH 4/8] Add test config files for current models --- protopipe/scripts/tests/test_classifier.yaml | 51 ++++++++++++++++++++ protopipe/scripts/tests/test_regressor.yaml | 41 ++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 protopipe/scripts/tests/test_classifier.yaml create mode 100644 protopipe/scripts/tests/test_regressor.yaml diff --git a/protopipe/scripts/tests/test_classifier.yaml b/protopipe/scripts/tests/test_classifier.yaml new file mode 100644 index 00000000..ad2deacf --- /dev/null +++ b/protopipe/scripts/tests/test_classifier.yaml @@ -0,0 +1,51 @@ +General: + model_type: 'classifier' + # [...] = your analysis local full path OUTSIDE the Vagrant box + data_dir: '[...]/shared_folder/analyses/v0.4.0_dev1/data/TRAINING/for_particle_classification/' + data_sig_file: 'TRAINING_classification_{}_gamma_merged.h5' + data_bkg_file: 'TRAINING_classification_{}_proton_merged.h5' + cam_id_list: ['LSTCam', 'NectarCam'] + table_name_template: '' # leave empty (TO BE REMOVED) + outdir: '[...]/shared_folder/analyses/v0.4.0_dev1/estimators/gamma_hadron_classifier' + +Split: + train_fraction: 0.8 + use_same_number_of_sig_and_bkg_for_training: False # Lowest statistics will drive the split + +Method: + name: 'RandomForestClassifier' # AdaBoostClassifier or RandomForestClassifier + target_name: 'label' + tuned_parameters: # these are lists of values used by the GridSearchCV algorithm + n_estimators: [200] + max_depth: [10] # null for None + max_features: [3] # possible choices are “auto”, “sqrt”, “log2”, int or float + min_samples_split: [10] + min_samples_leaf: [10] + scoring: 'roc_auc' # possible choices are 'roc_auc', 'explained_variance' + cv: 2 + use_proba: True # If not output is score + calibrate_output: False # If true calibrate probability + +FeatureList: + # - 'log10_reco_energy' + # - 'log10_reco_energy_tel' + - 'log10_hillas_intensity' + - 'hillas_width' + - 'hillas_length' + - 'h_max' + - 'impact_dist' + +SigFiducialCuts: + - 'good_image == 1' + - 'is_valid == True' + +BkgFiducialCuts: + - 'good_image == 1' + - 'is_valid == True' + +Diagnostic: + # Energy binning (used for reco and true energy) + energy: + nbins: 4 + min: 0.02 + max: 200 diff --git a/protopipe/scripts/tests/test_regressor.yaml b/protopipe/scripts/tests/test_regressor.yaml new file mode 100644 index 00000000..9041fe5e --- /dev/null +++ b/protopipe/scripts/tests/test_regressor.yaml @@ -0,0 +1,41 @@ +General: + model_type: 'regressor' + # [...] = your analysis local full path OUTSIDE the Vagrant box + data_dir: './' + data_file: 'test_TRAINING_energy_{}_gamma_merged.h5' + outdir: './' + cam_id_list: ['LSTCam', 'NectarCam'] + table_name_template: '' # leave empty (TO BE REMOVED) + +Split: + train_fraction: 0.5 + +Method: + name: 'AdaBoostRegressor' + target_name: 'true_energy' + tuned_parameters: + learning_rate: [0.3] + n_estimators: [100] + base_estimator__max_depth: [null] # null is equivalent to None + base_estimator__min_samples_split: [2] + base_estimator__min_samples_leaf: [10] + scoring: 'explained_variance' + cv: 2 + +FeatureList: + - 'log10_hillas_intensity' + - 'log10_impact_dist' + - 'hillas_width_reco' + - 'hillas_length_reco' + - 'h_max' + +SigFiducialCuts: + - 'good_image == 1' + - 'is_valid == True' + +Diagnostic: + # Energy binning (used for reco and true energy) + energy: + nbins: 15 + min: 0.0125 + max: 125 From ff2622fc72fd58188ea4d263fe5d44fb2a5c0fbc Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:59:14 +0100 Subject: [PATCH 5/8] Add pipeline integration testing module --- protopipe/scripts/tests/test_pipeline.py | 198 +++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 protopipe/scripts/tests/test_pipeline.py diff --git a/protopipe/scripts/tests/test_pipeline.py b/protopipe/scripts/tests/test_pipeline.py new file mode 100644 index 00000000..6926376c --- /dev/null +++ b/protopipe/scripts/tests/test_pipeline.py @@ -0,0 +1,198 @@ +from os import system +from pkg_resources import resource_filename + +import tables +import pytest + +from protopipe.pipeline.temp import get_dataset_path +from protopipe.scripts import data_training, build_model + + +# PROD 3B + +# CONFIG FILES +config_prod3b_CTAN = resource_filename("protopipe", "scripts/tests/test_config_analysis_north.yaml") +config_prod3b_CTAS = resource_filename("protopipe", "scripts/tests/test_config_analysis_south.yaml") + +# TEST FILES + +URL_TEST_DATA = "http://cccta-dataserver.in2p3.fr/data/protopipe/testData/" +URL_PROD3B_CTAN = f"{URL_TEST_DATA}/prod3_laPalma_baseline_Az180_Zd20" +URL_PROD3B_CTAS = f"{URL_TEST_DATA}/prod3_Paranal_baseline_Az180_Zd20" + +input_data = { + + "PROD3B_CTA_NORTH": {"config": config_prod3b_CTAN, + "gamma1": get_dataset_path("gamma1.simtel.gz", + url=f"{URL_PROD3B_CTAN}"), + "gamma2": get_dataset_path("gamma2.simtel.gz", + url=f"{URL_PROD3B_CTAN}"), + "proton1": get_dataset_path("proton1.simtel.gz", + url=f"{URL_PROD3B_CTAN}"), + }, + + "PROD3B_CTA_SOUTH": {"config": config_prod3b_CTAS, + "gamma1": get_dataset_path("gamma1.simtel.gz", + url=f"{URL_PROD3B_CTAS}"), + "gamma2": get_dataset_path("gamma2.simtel.gz", + url=f"{URL_PROD3B_CTAS}"), + "proton1": get_dataset_path("proton1.simtel.gz", + url=f"{URL_PROD3B_CTAS}"), + } + +} + + +@pytest.mark.parametrize("test_case", ["PROD3B_CTA_NORTH", "PROD3B_CTA_SOUTH"]) +def test_GET_GAMMAS_FOR_ENERGY_MODEL_WITH_IMAGES(test_case, pipeline_testdir): + + outpath = pipeline_testdir / f"test_training_withImages_{test_case}.h5" + + exit_status = system( + f"python {data_training.__file__}\ + --config_file {input_data[test_case]['config']}\ + -o {outpath}\ + --save_images\ + -i {input_data[test_case]['gamma1'].parent}\ + -f {input_data[test_case]['gamma1'].name}" + ) + + # check that the script ends without crashing + assert exit_status == 0 + + # check that the produced HDF5 file is non-empty + with tables.open_file(outpath) as file: + assert file.get_filesize() > 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="g1N")), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="g1S")), +]) +def test_GET_GAMMAS_FOR_ENERGY_MODEL(test_case, pipeline_testdir): + + outpath = pipeline_testdir / f"test_gamma1_noImages_{test_case}.h5" + + exit_status = system( + f"python {data_training.__file__}\ + --config_file {input_data[test_case]['config']}\ + -o {outpath}\ + -i {input_data[test_case]['gamma1'].parent}\ + -f {input_data[test_case]['gamma1'].name}" + ) + + # check that the script ends without crashing + assert exit_status == 0 + + # check that the produced HDF5 file is non-empty + with tables.open_file(outpath) as file: + assert file.get_filesize() > 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="g2N")), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="g2S")), +]) +def test_GET_GAMMAS_FOR_CLASSIFICATION_MODEL(test_case, pipeline_testdir): + + outpath = pipeline_testdir / f"test_gamma2_noImages_{test_case}.h5" + + exit_status = system( + f"python {data_training.__file__}\ + --config_file {input_data[test_case]['config']}\ + -o {outpath}\ + -i {input_data[test_case]['gamma2'].parent}\ + -f {input_data[test_case]['gamma2'].name}" + ) + + # check that the script ends without crashing + assert exit_status == 0 + + # check that the produced HDF5 file is non-empty + with tables.open_file(outpath) as file: + assert file.get_filesize() > 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="p1N")), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="p1S")), +]) +def test_GET_PROTONS_FOR_CLASSIFICATION_MODEL(test_case, pipeline_testdir): + + outpath = pipeline_testdir / f"test_proton1_noImages_{test_case}.h5" + + exit_status = system( + f"python {data_training.__file__}\ + --config_file {input_data[test_case]['config']}\ + -o {outpath}\ + -m 10\ + -i {input_data[test_case]['proton1'].parent}\ + -f {input_data[test_case]['proton1'].name}" + ) + + # check that the script ends without crashing + assert exit_status == 0 + + # check that the produced HDF5 file is non-empty + with tables.open_file(outpath) as file: + assert file.get_filesize() > 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="EN", + depends=["g1N"])), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="ES", + depends=["g1S"])), +]) +def test_BUILD_ENERGY_MODEL_AdaBoost_DecisionTreeRegressor(test_case, pipeline_testdir): + """Launch protopipe.scripts.build_model for a AdaBoost DecisionTreeRegressor.""" + + infile = pipeline_testdir / f"test_gamma1_noImages_{test_case}.h5" + outdir = pipeline_testdir / f"energy_model_{test_case}" + + config = resource_filename("protopipe", "scripts/tests/test_regressor.yaml") + + exit_status = system( + f"python {build_model.__file__}\ + --config_file {config}\ + --infile_signal {infile}\ + --outdir {outdir}\ + --cameras_from_file" + ) + assert exit_status == 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="C1", + depends=["g2N", "p1N"])), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="C2", + depends=["g2S", "p1S"])), +]) +def test_BUILD_CLASSIFICATION_MODEL_RandomForest(test_case, pipeline_testdir): + """Launch protopipe.scripts.build_model for a Random Forest classifier.""" + + infile_signal = pipeline_testdir / f"test_gamma2_noImages_{test_case}.h5" + infile_background = pipeline_testdir / f"test_proton1_noImages_{test_case}.h5" + outdir = pipeline_testdir / f"classification_model_{test_case}" + + config = resource_filename("protopipe", "scripts/tests/test_regressor.yaml") + + # This is temporary + # It is only a way to overwrite the cameras to be used in this test + # Reason is that from the current Paranal test files the CHEC images + # are all useless (also because of unoptimized settings) + if test_case == "PROD3B_CTA_SOUTH": + cameras = ['LSTCam', 'FlashCam'] + cameras_argument = f"--cam_id_list '{' '.join([camera for camera in cameras ])}'" + else: + cameras_argument = "--cameras_from_file" + + exit_status = system( + f"python {build_model.__file__}\ + --config_file {config}\ + --infile_signal {infile_signal}\ + --infile_background {infile_background}\ + --outdir {outdir}\ + {cameras_argument}" + ) + assert exit_status == 0 From 061a595ac17d918cb5f87153c0c5928dcd26d1d8 Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 12:59:34 +0100 Subject: [PATCH 6/8] remove old integration testing module for data training tool --- protopipe/scripts/tests/test_dataTraining.py | 94 -------------------- 1 file changed, 94 deletions(-) delete mode 100644 protopipe/scripts/tests/test_dataTraining.py diff --git a/protopipe/scripts/tests/test_dataTraining.py b/protopipe/scripts/tests/test_dataTraining.py deleted file mode 100644 index 952c568e..00000000 --- a/protopipe/scripts/tests/test_dataTraining.py +++ /dev/null @@ -1,94 +0,0 @@ -"""Test the data training script. - -TODO ----- - -- test only diffuse data (more general case) -- add Paranal diffuse test file -- add Prod5 test files - -""" -from os import path, system -from pkg_resources import resource_filename - -import tables -import pytest - -from protopipe.scripts import data_training -from protopipe.pipeline.temp import get_dataset_path - -# TEST FILES - -URL = "http://cccta-dataserver.in2p3.fr/data/protopipe/testData/" - -# PROD 3b - -PROD3B_CTA_NORTH = get_dataset_path("gamma1.simtel.gz", - url=f"{URL}/prod3_laPalma_baseline_Az180_Az20") -PROD3B_CTA_SOUTH = get_dataset_path("gamma1.simtel.gz", - url=f"{URL}/prod3_Paranal_baseline_Az180_Az20") - - -@pytest.mark.parametrize("input_file", [PROD3B_CTA_NORTH, PROD3B_CTA_SOUTH]) -def test_dataTraining_noImages(input_file): - """Very bare test to see if the script reaches the end correctly. - - WARNING: some of the cuts in the example config file are not optimized for - cameras other than LSTCam and NectarCam. - In any case, it is expected that in absence of fatal bugs, the script - ends successfully. - """ - - # the difference is only the 'site' key as a check for the user - if input_file in [PROD3B_CTA_SOUTH]: - ana_config = resource_filename("protopipe", "scripts/tests/test_config_analysis_south.yaml") - else: - ana_config = resource_filename("protopipe", "scripts/tests/test_config_analysis_north.yaml") - - exit_status = system( - f"python {data_training.__file__}\ - --config_file {ana_config}\ - -o test_training_noImages.h5\ - -i {path.dirname(input_file)}\ - -f {path.basename(input_file)}" - ) - - # check that the script ends without crashing - assert exit_status == 0 - - # check that the produced HDF5 file is non-empty - with tables.open_file("test_training_noImages.h5") as file: - assert file.get_filesize() > 0 - - -@pytest.mark.parametrize("input_file", [PROD3B_CTA_NORTH, PROD3B_CTA_SOUTH]) -def test_dataTraining_withImages(input_file): - """Very bare test to see if the script reaches the end correctly. - - WARNING: some of the cuts in the example config file are not optimized for - cameras other than LSTCam and NectarCam. - In any case, it is expected that in absence of fatal bugs, the script - ends successfully. - """ - - # the difference is only the 'site' key as a check for the user - if input_file in [PROD3B_CTA_SOUTH]: - ana_config = resource_filename("protopipe", "scripts/tests/test_config_analysis_south.yaml") - else: - ana_config = resource_filename("protopipe", "scripts/tests/test_config_analysis_north.yaml") - - exit_status = system( - f"python {data_training.__file__}\ - --config_file {ana_config}\ - -o test_training_withImages.h5\ - --save_images\ - -i {path.dirname(input_file)}\ - -f {path.basename(input_file)}" - ) - - # check that the script ends without crashing - assert exit_status == 0 - - # check that the produced HDF5 file is non-empty - with tables.open_file("test_training_noImages.h5") as file: - assert file.get_filesize() > 0 From 5f504526bfae0f99e475f28e8baf880fa99874fc Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 13:05:05 +0100 Subject: [PATCH 7/8] Use all available cameras from test files by default --- protopipe/scripts/tests/test_pipeline.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/protopipe/scripts/tests/test_pipeline.py b/protopipe/scripts/tests/test_pipeline.py index 6926376c..562cc73c 100644 --- a/protopipe/scripts/tests/test_pipeline.py +++ b/protopipe/scripts/tests/test_pipeline.py @@ -177,22 +177,12 @@ def test_BUILD_CLASSIFICATION_MODEL_RandomForest(test_case, pipeline_testdir): config = resource_filename("protopipe", "scripts/tests/test_regressor.yaml") - # This is temporary - # It is only a way to overwrite the cameras to be used in this test - # Reason is that from the current Paranal test files the CHEC images - # are all useless (also because of unoptimized settings) - if test_case == "PROD3B_CTA_SOUTH": - cameras = ['LSTCam', 'FlashCam'] - cameras_argument = f"--cam_id_list '{' '.join([camera for camera in cameras ])}'" - else: - cameras_argument = "--cameras_from_file" - exit_status = system( f"python {build_model.__file__}\ --config_file {config}\ --infile_signal {infile_signal}\ --infile_background {infile_background}\ --outdir {outdir}\ - {cameras_argument}" + --cameras_from_file" ) assert exit_status == 0 From da6458ce1bab7f7558252b61ec0a8a6d0b24a70b Mon Sep 17 00:00:00 2001 From: Michele Peresano Date: Thu, 25 Mar 2021 13:33:02 +0100 Subject: [PATCH 8/8] Update documentation --- docs/contribute/beforepushing.rst | 38 +++++++++++++++++++------------ 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/contribute/beforepushing.rst b/docs/contribute/beforepushing.rst index 0248b0fe..3063087a 100644 --- a/docs/contribute/beforepushing.rst +++ b/docs/contribute/beforepushing.rst @@ -35,18 +35,16 @@ You will have to fix any warning that appears during documentation building, because the documentation also runs on `readthedocs `__ with an option to treat warnings as errors. -Unit and integration tests --------------------------- +Unit tests +---------- .. note:: This is a maintenance activity which has being long overdue and we need manpower for it, so if you have experience on this or you want to contribute please feel free to do so. - For more information on how to contribute to this effort check these issues, - - - `unit-tests `__, - - `integration tests `__. + For more information on how to contribute to this effort check + `this issue `__. Being *protopipe* based on *ctapipe*, all the tools imported from the latter have been already tested and approved (*protopipe* uses always a version of @@ -64,18 +62,28 @@ Same for *pyirf*. order to code a new feature, this has to be pull-requested to *ctapipe* and at the same time hardcoded in *protopipe*, until the new version of *ctapipe* is released. -For the moment there is only **one integration test**. +Integration tests +^^^^^^^^^^^^^^^^^ + +.. note:: + For more information on how to contribute to this effort check + `this issue `__. + +The integration tests are defined in the dedicated module ``pipeline/scripts/tests/test_pipeline.py`` +and start from test simtel files stored on a CC-IN2P3 dataserver. + +The test data is diffuse data from the Prod3b baseline simulations of both +CTAN and CTAS produced with the following Corsika settings, -This test is in charge to detect if changes in the script -``protopipe.scripts.data_training`` and any code used by it, produce -any fatal behaviour or crash. +- gammas, ``NSHOW=10 ESLOPE=-2.0 EMIN=10 EMAX=20 NSCAT=1 CSCAT=200 VIEWCONE=3`` +- protons, ``NSHOW=10 ESLOPE=-2.0 EMIN=100 EMAX=200 NSCAT=1 CSCAT=200 VIEWCONE=3`` +- electrons, ``NSHOW=10 ESLOPE=-2.0 EMIN=10 EMAX=20 NSCAT=1 CSCAT=200 VIEWCONE=3`` -For the moment it uses an old test file from *ctapipe* (a Prod2 CTA South array composed of -LSTCam, FlashCam and ASTRICam with about ~100 simulated showers). -Each test is expected to produce a non-empty HDF5 file. +in the same proportions as a standard full-scale analysis. -The test can be executed directly from the main folder of *protopipe* by launching -``pytest``. It is also automatically triggered by the CI every time a new +The pipeline integration testing can be executed directly from the main folder +of *protopipe* by launching ``pytest``. +It is also automatically triggered by the CI every time a new pull-request is pushed to the repository, and its correct execution is a mandatory condition for merging.