docs: add documentation for almost everything

I haven't documented amplitude.rs yet because it has some fundamental flaws pointed out to me by Lawrence Ng.
denehoffman · May 30, 2024 · 04b99c3 · 04b99c3
1 parent a155903
commit 04b99c3
Show file tree

Hide file tree

Showing 4 changed files with 352 additions and 13 deletions.
diff --git a/crates/rustitude-core/src/amplitude.rs b/crates/rustitude-core/src/amplitude.rs
@@ -14,16 +14,32 @@ use crate::{
     errors::RustitudeError,
 };
 
+/// A single parameter within an [`Amplitude`].
 #[derive(Clone)]
 pub struct Parameter {
+    /// Name of the parent [`Amplitude`] containing this parameter.
     pub amplitude: String,
+    /// Name of the parameter.
     pub name: String,
+    /// Index of the parameter with respect to the [`Model`]. This will be [`Option::None`] if
+    /// the parameter is fixed.
     pub index: Option<usize>,
+    /// A separate index for fixed parameters to ensure they stay constrained properly if freed.
+    /// This will be [`Option::None`] if the parameter is free in the [`Model`].
     pub fixed_index: Option<usize>,
+    /// The initial value the parameter takes, or alternatively the value of the parameter if it is
+    /// fixed in the fit.
     pub initial: f64,
+    /// Bounds for the given parameter (defaults to +/- infinity). This is mostly optional and
+    /// isn't used in any Rust code asside from being able to get and set it.
     pub bounds: (f64, f64),
 }
 impl Parameter {
+    /// Creates a new [`Parameter`] within an [`Amplitude`] using the name of the [`Amplitude`],
+    /// the name of the [`Parameter`], and the index of the parameter within the [`Model`].
+    ///
+    /// By default, new [`Parameter`]s are free, have an initial value of `0.0`, and their bounds
+    /// are set to `(f64::NEG_INFINITY, f64::INFINITY)`.
     pub fn new(amplitude: &str, name: &str, index: usize) -> Self {
         Self {
             amplitude: amplitude.to_string(),
@@ -194,6 +210,11 @@ pub trait Node: Sync + Send {
     /// parameters. For instance, to calculate a spherical harmonic, we don't actually need any
     /// other information than what is contained in the [`Event`], so we can calculate a spherical
     /// harmonic for every event once and then retrieve the data in the [`Node::calculate`] method.
+    ///
+    /// # Errors
+    ///
+    /// This function should be written to return a [`RustitudeError`] if any part of the
+    /// calculation fails.
     fn precalculate(&mut self, _dataset: &Dataset) -> Result<(), RustitudeError> {
         Ok(())
     }
@@ -206,6 +227,11 @@ pub trait Node: Sync + Send {
     /// a slice of [`f64`]s. This slice is guaranteed to have the same length and order as
     /// specified in the [`Node::parameters`] method, or it will be empty if that method returns
     /// [`None`].
+    ///
+    /// # Errors
+    ///
+    /// This function should be written to return a [`RustitudeError`] if any part of the
+    /// calculation fails.
     fn calculate(&self, parameters: &[f64], event: &Event) -> Result<Complex64, RustitudeError>;
 
     /// A method which specifies the number and order of parameters used by the [`Node`].

diff --git a/crates/rustitude-core/src/dataset.rs b/crates/rustitude-core/src/dataset.rs
@@ -1,3 +1,75 @@
+//! This module contains all the resources needed to load and examine datasets.
+//!
+//! A [`Dataset`] is, in essence, a list of [`Event`]s, each of which contain all the pertinent
+//! information about a single set of initial- and final-state particles, as well as an index
+//! and weight within the [`Dataset`].
+//!
+//! This crate currently supports loading [`Dataset`]s from ROOT and Parquet files (see
+//! [`Dataset::from_root`] and [`Dataset::from_parquet`]. These methods require the following
+//! "branches" or "columns" to be present in the file:
+//!
+//! | Branch Name | Data Type | Notes |
+//! |---|---|---|
+//! | `Weight` | Float32 |  |
+//! | `E_Beam` | Float32 |  |
+//! | `Px_Beam` | Float32 |  |
+//! | `Py_Beam` | Float32 |  |
+//! | `Pz_Beam` | Float32 |  |
+//! | `E_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
+//! | `Px_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
+//! | `Py_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
+//! | `Pz_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
+//! | `EPS` | \[Float32\] | \[$`P_\gamma \cos(\Phi)`$, $`P_\gamma \sin(\Phi)`$, $`0.0`$\] for linear polarization with magnitude $`P_\gamma`$ and angle $`\Phi`$ |
+//!
+//! The `EPS` branch is optional and files without such a branch can be loaded under the
+//! following conditions. First, if we don't care about polarization, and wish to set `EPS` =
+//! `[0.0, 0.0, 0.0]`, we can do so using the methods [`Dataset::from_root_unpolarized`] or
+//! [`Dataset::from_parquet_unpolarized`]. If a data file contains events with only one
+//! polarization, we can compute the `EPS` vector ourselves and use
+//! [`Dataset::from_root_with_eps`] or [`Dataset::from_parquet_with_eps`] to load the same vector
+//! for every event. Finally, to provide compatibility with the way polarization is sometimes
+//! included in `AmpTools` files, we can note that the beam is often only moving along the
+//! $`z`$-axis, so the $`x`$ and $`y`$ components are typically `0.0` anyway, so we can store
+//! the $`x`$ and $`y`$ components of `EPS` in the beam's four-momentum and use the methods
+//! [`Dataset::from_root_eps_in_beam`] or [`Dataset::from_parquet_eps_in_beam`] to extract it.
+//!
+//! There are also several methods used to split up [`Dataset`]s based on their component
+//! values. The [`Dataset::select`] method takes mutable access to a dataset along with a query
+//! function which takes an [`Event`] and returns a [`bool`]. For each event, if the query
+//! returns `true`, the event is removed from the original dataset and added to a new dataset
+//! which is then returned by the `select` function. The [`Dataset::reject`] method does the
+//! opposite. For example,
+//!
+//! ```ignore
+//! let ds_original = Dataset::from_root("path.root").unwrap();
+//! let ds_a = ds_original.clone();
+//! let ds_b = ds_original.clone();
+//! let mass_gt_1_gev = |e: &Event| -> bool {
+//!     (e.daughter_p4s[0] + e.daughter_p4s[1]).m() > 1.0
+//! };
+//! let ds_a_selected = ds_a.select(mass_gt_1_gev);
+//! let ds_b_rejected = ds_b.reject(mass_gt_1_gev);
+//! ```
+//!
+//! After this, `ds_a` and `ds_b_rejected` will contain events where the four-momentum of the
+//! first two daughter particles combined has a mass *less than* $`1.0`$ ``GeV``. On the other hand,
+//! `ds_a_selected` and `ds_b` will have events where the opposite is true and the mass is
+//! *greater than* $`1.0`$ ``GeV``. The reason for this logic is two-fold. First, we might be
+//! dealing with large datasets, so we don't want to create copies of events if it can be
+//! avoided. If copies are needed, they should be made explicitly with [`Dataset::clone`].
+//! Otherwise, we just extract the events from the dataset. The other reason is that the syntax
+//! reads in a "correct" way. We expect `let selected = data.select(condition);` to put the
+//! selected data into the `selected` dataset. We can then choose if we want to hold on to the
+//! rejected data.
+//!
+//! Since it is a common operation, there is also a method [`Dataset::split`] which will bin data
+//! by a query which takes an [`Event`] and returns an [`f64`] value (rather than a [`bool`]).
+//! This method also takes a `range: (f64, f64)` and a number of bins `nbins: usize`, and it
+//! returns a `(Vec<Dataset>, Dataset, Dataset)`. These fields correspond to the binned datasets,
+//! the underflow bin, and the overflow bin respectively, so no data should ever be "lost" by this
+//! operation. There is also a convenience method, [`Dataset::split_m`], to split the dataset by
+//! the mass of the summed four-momentum of any of the daughter particles, specified by their
+//! index.
 use std::{fmt::Display, fs::File, path::Path, sync::Arc};
 
 use itertools::izip;
@@ -12,13 +84,21 @@ use rayon::prelude::*;
 
 use crate::{errors::RustitudeError, prelude::FourMomentum};
 
+/// The [`Event`] struct contains all the information concerning a single interaction between
+/// particles in the experiment. See the individual fields for additional information.
 #[derive(Debug, Default, Clone)]
 pub struct Event {
+    /// The index of the event with the parent [`Dataset`].
     pub index: usize,
+    /// The weight of the event with the parent [`Dataset`].
     pub weight: f64,
+    /// The beam [`FourMomentum`].
     pub beam_p4: FourMomentum,
+    /// The recoil (target after interaction) [`FourMomentum`].
     pub recoil_p4: FourMomentum,
+    /// [`FourMomentum`] of each other final state particle.
     pub daughter_p4s: Vec<FourMomentum>,
+    /// A vector corresponding to the polarization of the beam.
     pub eps: Vector3<f64>,
 }
 
@@ -42,6 +122,12 @@ impl Display for Event {
 }
 impl Event {
     /// Reads an [`Event`] from a single [`Row`] in a Parquet file.
+    ///
+    /// # Panics
+    ///
+    /// This method currently panics if the list-like group types don't contain floats. This
+    /// eventually needs to be sorted out.
+    fn read_parquet_row(
         index: usize,
         row: Result<Row, parquet::errors::ParquetError>,
     ) -> Result<Self, RustitudeError> {
@@ -154,6 +240,13 @@ impl Event {
         Ok(event)
     }
     /// Reads an [`Event`] from a single [`Row`] in a Parquet file, assuming EPS is stored in the
+    /// beam four-momentum.
+    ///
+    /// # Panics
+    ///
+    /// This method currently panics if the list-like group types don't contain floats. This
+    /// eventually needs to be sorted out.
+    fn read_parquet_row_eps_in_beam(
         index: usize,
         row: Result<Row, parquet::errors::ParquetError>,
     ) -> Result<Self, RustitudeError> {
@@ -253,6 +346,12 @@ impl Event {
     }
 
     /// Reads an [`Event`] from a single [`Row`] in a Parquet file and set EPS for all events.
+    ///
+    /// # Panics
+    ///
+    /// This method currently panics if the list-like group types don't contain floats. This
+    /// eventually needs to be sorted out.
+    fn read_parquet_row_with_eps(
         index: usize,
         row: Result<Row, parquet::errors::ParquetError>,
         eps: Vector3<f64>,
@@ -354,15 +453,30 @@ impl Event {
     }
 
     /// Reads an [`Event`] from a single [`Row`] in a Parquet file and set EPS = `[0, 0, 0]` for
+    /// all events.
+    ///
+    /// # Panics
+    ///
+    /// This method currently panics if the list-like group types don't contain floats. This
+    /// eventually needs to be sorted out.
+    fn read_parquet_row_unpolarized(
         index: usize,
         row: Result<Row, parquet::errors::ParquetError>,
     ) -> Result<Self, RustitudeError> {
         Self::read_parquet_row_with_eps(index, row, Vector3::default())
     }
 }
 
+/// An array of [`Event`]s with some helpful methods for accessing and parsing the data they
+/// contain.
+///
+/// A [`Dataset`] can be loaded from either Parquet and ROOT files using the corresponding
+/// `Dataset::from_*` methods. Events are stored in an [`Arc<RwLock<Vec<Event>>>`], since we
+/// rarely need to write data to a dataset (splitting/selecting/rejecting events) but often need to
+/// read events from a dataset.
 #[derive(Default, Debug, Clone)]
 pub struct Dataset {
+    /// Storage for events.
     pub events: Arc<RwLock<Vec<Event>>>,
 }
 
@@ -374,16 +488,9 @@ impl Dataset {
         self.events.read_arc().iter().map(|e| e.weight).collect()
     }
 
-    // TODO:
-    // pub fn select(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> PyDataset {}
-    // pub fn reject(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> PyDataset {}
-    // pub fn split(
-    //     mut self,
-    //     variable: impl Fn(&Event) -> f64 + Sync + Send,
-    //     range: (f64, f64),
-    //     nbins: usize,
-    // ) -> (Vec<PyDataset>, PyDataset, PyDataset) {}
-
+    /// Splits the dataset by the mass of the combination of specified daughter particles in the
+    /// event. If no daughters are given, the first and second particle are assumed to form the
+    /// desired combination.
     pub fn split_m(
         &self,
         range: (f64, f64),
@@ -421,6 +528,13 @@ impl Dataset {
         ))
     }
 
+    /// Generates a new [`Dataset`] from a Parquet file, assuming the EPS vector can be constructed
+    /// from the x and y-components of the beam.
+    ///
+    /// # Errors
+    ///
+    /// This method will fail if any individual event is missing all of the required fields, if
+    /// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
     pub fn from_parquet_eps_in_beam(path: &str) -> Result<Self, RustitudeError> {
         let path = Path::new(path);
         let file = File::open(path)?;
@@ -434,6 +548,12 @@ impl Dataset {
         ))
     }
 
+    /// Generates a new [`Dataset`] from a Parquet file with a given EPS polarization vector.
+    ///
+    /// # Errors
+    ///
+    /// This method will fail if any individual event is missing all of the required fields, if
+    /// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
     pub fn from_parquet_with_eps(path: &str, eps: Vec<f64>) -> Result<Self, RustitudeError> {
         let path = Path::new(path);
         let file = File::open(path)?;
@@ -448,6 +568,12 @@ impl Dataset {
         ))
     }
 
+    /// Generates a new [`Dataset`] from a Parquet file with EPS = `[0, 0, 0]`.
+    ///
+    /// # Errors
+    ///
+    /// This method will fail if any individual event is missing all of the required fields, if
+    /// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
     pub fn from_parquet_unpolarized(path: &str) -> Result<Self, RustitudeError> {
         let path = Path::new(path);
         let file = File::open(path)?;
@@ -683,20 +809,29 @@ impl Dataset {
                 .collect(),
         ))
     }
+
+    /// Generate a new [`Dataset`] from a [`Vec<Event>`].
     pub fn new(events: Vec<Event>) -> Self {
         Self {
             events: Arc::new(RwLock::new(events)),
         }
     }
 
+    /// Checks if the dataset is empty.
     pub fn is_empty(&self) -> bool {
         self.events.read().is_empty()
     }
 
+    /// Returns the number of events in the dataset.
     pub fn len(&self) -> usize {
         self.events.read().len()
     }
 
+    /// Selects events in the dataset using the given query and remove them from the [`Dataset`],
+    /// returning them in a new [`Dataset`]. The original [`Dataset`] will then contain the
+    /// "rejected" events, events for which the query returned `false`.
+    ///
+    /// See also: [`Dataset::reject`]
     pub fn select(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> Self {
         let (mut selected, mut rejected): (Vec<_>, Vec<_>) =
             self.events.write().par_drain(..).partition(query);
@@ -712,10 +847,19 @@ impl Dataset {
         Self::new(rejected)
     }
 
+    /// Removes events from the dataset if the query returns `true` and returns them in a new
+    /// [`Dataset`]. The original [`Dataset`] will contain events for which the query returned
+    /// `true`.
+    ///
+    /// See also: [`Dataset::select`]
     pub fn reject(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> Self {
         self.select(|event| !query(event))
     }
 
+    /// Splits the dataset into bins of the specified variable derived from an [`Event`]. This
+    /// method returns a [`Vec<Dataset>`] containing the binned datasets, an underflow [`Dataset`]
+    /// (events which are below the lower range), and an overflow [`Dataset`] (events above the
+    /// upper range) in that order.
     pub fn split(
         mut self,
         variable: impl Fn(&Event) -> f64 + Sync + Send,

diff --git a/crates/rustitude-core/src/lib.rs b/crates/rustitude-core/src/lib.rs
@@ -259,7 +259,6 @@
     clippy::missing_safety_doc,
     clippy::missing_panics_doc,
     clippy::missing_errors_doc,
-    clippy::missing_docs_in_private_items,
     missing_docs
 )]
 #![cfg_attr(feature = "simd", feature(portable_simd))]