Skip to content

Commit

Permalink
docs: add documentation for almost everything
Browse files Browse the repository at this point in the history
I haven't documented amplitude.rs yet because it has some fundamental flaws pointed out to me by Lawrence Ng.
  • Loading branch information
denehoffman committed May 30, 2024
1 parent a155903 commit 04b99c3
Show file tree
Hide file tree
Showing 4 changed files with 352 additions and 13 deletions.
26 changes: 26 additions & 0 deletions crates/rustitude-core/src/amplitude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,32 @@ use crate::{
errors::RustitudeError,
};

/// A single parameter within an [`Amplitude`].
#[derive(Clone)]
pub struct Parameter {
/// Name of the parent [`Amplitude`] containing this parameter.
pub amplitude: String,
/// Name of the parameter.
pub name: String,
/// Index of the parameter with respect to the [`Model`]. This will be [`Option::None`] if
/// the parameter is fixed.
pub index: Option<usize>,
/// A separate index for fixed parameters to ensure they stay constrained properly if freed.
/// This will be [`Option::None`] if the parameter is free in the [`Model`].
pub fixed_index: Option<usize>,
/// The initial value the parameter takes, or alternatively the value of the parameter if it is
/// fixed in the fit.
pub initial: f64,
/// Bounds for the given parameter (defaults to +/- infinity). This is mostly optional and
/// isn't used in any Rust code asside from being able to get and set it.
pub bounds: (f64, f64),
}
impl Parameter {
/// Creates a new [`Parameter`] within an [`Amplitude`] using the name of the [`Amplitude`],
/// the name of the [`Parameter`], and the index of the parameter within the [`Model`].
///
/// By default, new [`Parameter`]s are free, have an initial value of `0.0`, and their bounds
/// are set to `(f64::NEG_INFINITY, f64::INFINITY)`.
pub fn new(amplitude: &str, name: &str, index: usize) -> Self {
Self {
amplitude: amplitude.to_string(),
Expand Down Expand Up @@ -194,6 +210,11 @@ pub trait Node: Sync + Send {
/// parameters. For instance, to calculate a spherical harmonic, we don't actually need any
/// other information than what is contained in the [`Event`], so we can calculate a spherical
/// harmonic for every event once and then retrieve the data in the [`Node::calculate`] method.
///
/// # Errors
///
/// This function should be written to return a [`RustitudeError`] if any part of the
/// calculation fails.
fn precalculate(&mut self, _dataset: &Dataset) -> Result<(), RustitudeError> {
Ok(())
}
Expand All @@ -206,6 +227,11 @@ pub trait Node: Sync + Send {
/// a slice of [`f64`]s. This slice is guaranteed to have the same length and order as
/// specified in the [`Node::parameters`] method, or it will be empty if that method returns
/// [`None`].
///
/// # Errors
///
/// This function should be written to return a [`RustitudeError`] if any part of the
/// calculation fails.
fn calculate(&self, parameters: &[f64], event: &Event) -> Result<Complex64, RustitudeError>;

/// A method which specifies the number and order of parameters used by the [`Node`].
Expand Down
164 changes: 154 additions & 10 deletions crates/rustitude-core/src/dataset.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,75 @@
//! This module contains all the resources needed to load and examine datasets.
//!
//! A [`Dataset`] is, in essence, a list of [`Event`]s, each of which contain all the pertinent
//! information about a single set of initial- and final-state particles, as well as an index
//! and weight within the [`Dataset`].
//!
//! This crate currently supports loading [`Dataset`]s from ROOT and Parquet files (see
//! [`Dataset::from_root`] and [`Dataset::from_parquet`]. These methods require the following
//! "branches" or "columns" to be present in the file:
//!
//! | Branch Name | Data Type | Notes |
//! |---|---|---|
//! | `Weight` | Float32 | |
//! | `E_Beam` | Float32 | |
//! | `Px_Beam` | Float32 | |
//! | `Py_Beam` | Float32 | |
//! | `Pz_Beam` | Float32 | |
//! | `E_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
//! | `Px_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
//! | `Py_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
//! | `Pz_FinalState` | \[Float32\] | \[recoil, daughter #1, daughter #2, ...\] |
//! | `EPS` | \[Float32\] | \[$`P_\gamma \cos(\Phi)`$, $`P_\gamma \sin(\Phi)`$, $`0.0`$\] for linear polarization with magnitude $`P_\gamma`$ and angle $`\Phi`$ |
//!
//! The `EPS` branch is optional and files without such a branch can be loaded under the
//! following conditions. First, if we don't care about polarization, and wish to set `EPS` =
//! `[0.0, 0.0, 0.0]`, we can do so using the methods [`Dataset::from_root_unpolarized`] or
//! [`Dataset::from_parquet_unpolarized`]. If a data file contains events with only one
//! polarization, we can compute the `EPS` vector ourselves and use
//! [`Dataset::from_root_with_eps`] or [`Dataset::from_parquet_with_eps`] to load the same vector
//! for every event. Finally, to provide compatibility with the way polarization is sometimes
//! included in `AmpTools` files, we can note that the beam is often only moving along the
//! $`z`$-axis, so the $`x`$ and $`y`$ components are typically `0.0` anyway, so we can store
//! the $`x`$ and $`y`$ components of `EPS` in the beam's four-momentum and use the methods
//! [`Dataset::from_root_eps_in_beam`] or [`Dataset::from_parquet_eps_in_beam`] to extract it.
//!
//! There are also several methods used to split up [`Dataset`]s based on their component
//! values. The [`Dataset::select`] method takes mutable access to a dataset along with a query
//! function which takes an [`Event`] and returns a [`bool`]. For each event, if the query
//! returns `true`, the event is removed from the original dataset and added to a new dataset
//! which is then returned by the `select` function. The [`Dataset::reject`] method does the
//! opposite. For example,
//!
//! ```ignore
//! let ds_original = Dataset::from_root("path.root").unwrap();
//! let ds_a = ds_original.clone();
//! let ds_b = ds_original.clone();
//! let mass_gt_1_gev = |e: &Event| -> bool {
//! (e.daughter_p4s[0] + e.daughter_p4s[1]).m() > 1.0
//! };
//! let ds_a_selected = ds_a.select(mass_gt_1_gev);
//! let ds_b_rejected = ds_b.reject(mass_gt_1_gev);
//! ```
//!
//! After this, `ds_a` and `ds_b_rejected` will contain events where the four-momentum of the
//! first two daughter particles combined has a mass *less than* $`1.0`$ ``GeV``. On the other hand,
//! `ds_a_selected` and `ds_b` will have events where the opposite is true and the mass is
//! *greater than* $`1.0`$ ``GeV``. The reason for this logic is two-fold. First, we might be
//! dealing with large datasets, so we don't want to create copies of events if it can be
//! avoided. If copies are needed, they should be made explicitly with [`Dataset::clone`].
//! Otherwise, we just extract the events from the dataset. The other reason is that the syntax
//! reads in a "correct" way. We expect `let selected = data.select(condition);` to put the
//! selected data into the `selected` dataset. We can then choose if we want to hold on to the
//! rejected data.
//!
//! Since it is a common operation, there is also a method [`Dataset::split`] which will bin data
//! by a query which takes an [`Event`] and returns an [`f64`] value (rather than a [`bool`]).
//! This method also takes a `range: (f64, f64)` and a number of bins `nbins: usize`, and it
//! returns a `(Vec<Dataset>, Dataset, Dataset)`. These fields correspond to the binned datasets,
//! the underflow bin, and the overflow bin respectively, so no data should ever be "lost" by this
//! operation. There is also a convenience method, [`Dataset::split_m`], to split the dataset by
//! the mass of the summed four-momentum of any of the daughter particles, specified by their
//! index.
use std::{fmt::Display, fs::File, path::Path, sync::Arc};

use itertools::izip;
Expand All @@ -12,13 +84,21 @@ use rayon::prelude::*;

use crate::{errors::RustitudeError, prelude::FourMomentum};

/// The [`Event`] struct contains all the information concerning a single interaction between
/// particles in the experiment. See the individual fields for additional information.
#[derive(Debug, Default, Clone)]
pub struct Event {
/// The index of the event with the parent [`Dataset`].
pub index: usize,
/// The weight of the event with the parent [`Dataset`].
pub weight: f64,
/// The beam [`FourMomentum`].
pub beam_p4: FourMomentum,
/// The recoil (target after interaction) [`FourMomentum`].
pub recoil_p4: FourMomentum,
/// [`FourMomentum`] of each other final state particle.
pub daughter_p4s: Vec<FourMomentum>,
/// A vector corresponding to the polarization of the beam.
pub eps: Vector3<f64>,
}

Expand All @@ -42,6 +122,12 @@ impl Display for Event {
}
impl Event {
/// Reads an [`Event`] from a single [`Row`] in a Parquet file.
///
/// # Panics
///
/// This method currently panics if the list-like group types don't contain floats. This
/// eventually needs to be sorted out.
fn read_parquet_row(
index: usize,
row: Result<Row, parquet::errors::ParquetError>,
) -> Result<Self, RustitudeError> {
Expand Down Expand Up @@ -154,6 +240,13 @@ impl Event {
Ok(event)
}
/// Reads an [`Event`] from a single [`Row`] in a Parquet file, assuming EPS is stored in the
/// beam four-momentum.
///
/// # Panics
///
/// This method currently panics if the list-like group types don't contain floats. This
/// eventually needs to be sorted out.
fn read_parquet_row_eps_in_beam(
index: usize,
row: Result<Row, parquet::errors::ParquetError>,
) -> Result<Self, RustitudeError> {
Expand Down Expand Up @@ -253,6 +346,12 @@ impl Event {
}

/// Reads an [`Event`] from a single [`Row`] in a Parquet file and set EPS for all events.
///
/// # Panics
///
/// This method currently panics if the list-like group types don't contain floats. This
/// eventually needs to be sorted out.
fn read_parquet_row_with_eps(
index: usize,
row: Result<Row, parquet::errors::ParquetError>,
eps: Vector3<f64>,
Expand Down Expand Up @@ -354,15 +453,30 @@ impl Event {
}

/// Reads an [`Event`] from a single [`Row`] in a Parquet file and set EPS = `[0, 0, 0]` for
/// all events.
///
/// # Panics
///
/// This method currently panics if the list-like group types don't contain floats. This
/// eventually needs to be sorted out.
fn read_parquet_row_unpolarized(
index: usize,
row: Result<Row, parquet::errors::ParquetError>,
) -> Result<Self, RustitudeError> {
Self::read_parquet_row_with_eps(index, row, Vector3::default())
}
}

/// An array of [`Event`]s with some helpful methods for accessing and parsing the data they
/// contain.
///
/// A [`Dataset`] can be loaded from either Parquet and ROOT files using the corresponding
/// `Dataset::from_*` methods. Events are stored in an [`Arc<RwLock<Vec<Event>>>`], since we
/// rarely need to write data to a dataset (splitting/selecting/rejecting events) but often need to
/// read events from a dataset.
#[derive(Default, Debug, Clone)]
pub struct Dataset {
/// Storage for events.
pub events: Arc<RwLock<Vec<Event>>>,
}

Expand All @@ -374,16 +488,9 @@ impl Dataset {
self.events.read_arc().iter().map(|e| e.weight).collect()
}

// TODO:
// pub fn select(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> PyDataset {}
// pub fn reject(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> PyDataset {}
// pub fn split(
// mut self,
// variable: impl Fn(&Event) -> f64 + Sync + Send,
// range: (f64, f64),
// nbins: usize,
// ) -> (Vec<PyDataset>, PyDataset, PyDataset) {}

/// Splits the dataset by the mass of the combination of specified daughter particles in the
/// event. If no daughters are given, the first and second particle are assumed to form the
/// desired combination.
pub fn split_m(
&self,
range: (f64, f64),
Expand Down Expand Up @@ -421,6 +528,13 @@ impl Dataset {
))
}

/// Generates a new [`Dataset`] from a Parquet file, assuming the EPS vector can be constructed
/// from the x and y-components of the beam.
///
/// # Errors
///
/// This method will fail if any individual event is missing all of the required fields, if
/// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
pub fn from_parquet_eps_in_beam(path: &str) -> Result<Self, RustitudeError> {
let path = Path::new(path);
let file = File::open(path)?;
Expand All @@ -434,6 +548,12 @@ impl Dataset {
))
}

/// Generates a new [`Dataset`] from a Parquet file with a given EPS polarization vector.
///
/// # Errors
///
/// This method will fail if any individual event is missing all of the required fields, if
/// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
pub fn from_parquet_with_eps(path: &str, eps: Vec<f64>) -> Result<Self, RustitudeError> {
let path = Path::new(path);
let file = File::open(path)?;
Expand All @@ -448,6 +568,12 @@ impl Dataset {
))
}

/// Generates a new [`Dataset`] from a Parquet file with EPS = `[0, 0, 0]`.
///
/// # Errors
///
/// This method will fail if any individual event is missing all of the required fields, if
/// they have the wrong type, or if the file doesn't exist/can't be read for any reason.
pub fn from_parquet_unpolarized(path: &str) -> Result<Self, RustitudeError> {
let path = Path::new(path);
let file = File::open(path)?;
Expand Down Expand Up @@ -683,20 +809,29 @@ impl Dataset {
.collect(),
))
}

/// Generate a new [`Dataset`] from a [`Vec<Event>`].
pub fn new(events: Vec<Event>) -> Self {
Self {
events: Arc::new(RwLock::new(events)),
}
}

/// Checks if the dataset is empty.
pub fn is_empty(&self) -> bool {
self.events.read().is_empty()
}

/// Returns the number of events in the dataset.
pub fn len(&self) -> usize {
self.events.read().len()
}

/// Selects events in the dataset using the given query and remove them from the [`Dataset`],
/// returning them in a new [`Dataset`]. The original [`Dataset`] will then contain the
/// "rejected" events, events for which the query returned `false`.
///
/// See also: [`Dataset::reject`]
pub fn select(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> Self {
let (mut selected, mut rejected): (Vec<_>, Vec<_>) =
self.events.write().par_drain(..).partition(query);
Expand All @@ -712,10 +847,19 @@ impl Dataset {
Self::new(rejected)
}

/// Removes events from the dataset if the query returns `true` and returns them in a new
/// [`Dataset`]. The original [`Dataset`] will contain events for which the query returned
/// `true`.
///
/// See also: [`Dataset::select`]
pub fn reject(&mut self, query: impl Fn(&Event) -> bool + Sync + Send) -> Self {
self.select(|event| !query(event))
}

/// Splits the dataset into bins of the specified variable derived from an [`Event`]. This
/// method returns a [`Vec<Dataset>`] containing the binned datasets, an underflow [`Dataset`]
/// (events which are below the lower range), and an overflow [`Dataset`] (events above the
/// upper range) in that order.
pub fn split(
mut self,
variable: impl Fn(&Event) -> f64 + Sync + Send,
Expand Down
1 change: 0 additions & 1 deletion crates/rustitude-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@
clippy::missing_safety_doc,
clippy::missing_panics_doc,
clippy::missing_errors_doc,
clippy::missing_docs_in_private_items,
missing_docs
)]
#![cfg_attr(feature = "simd", feature(portable_simd))]
Expand Down
Loading

0 comments on commit 04b99c3

Please sign in to comment.