diff --git a/Cargo.toml b/Cargo.toml index ceccf3be..a85c18d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,6 @@ parking_lot = { workspace = true, optional = true } rayon.workspace = true once_cell = { version = "1.17.1", optional = true, features = ["std"] } num-traits = "0.2.15" -smallvec = { version = "1.11.1", optional = true } matrixmultiply_mt = { version = "0.2.1", optional = true } matrixmultiply = { version = "0.3.8", optional = true } wide = "0.7.13" @@ -91,7 +90,7 @@ dataset = ["dep:rand"] iris = [] mnist = ["dataset", "dep:dirs", "dep:flate2", "dep:downloader", "dep:byteorder", "dep:http"] learn = [] -neural-network = ["learn", "dep:autograph_derive", "dep:crossbeam-channel", "dep:parking_lot", "dep:rand", "dep:once_cell", "dep:smallvec"] +neural-network = ["learn", "dep:autograph_derive", "dep:crossbeam-channel", "dep:parking_lot", "dep:rand", "dep:once_cell"] [package.metadata.krnlc] diff --git a/README.md b/README.md index 90ed8ecc..99987396 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ model.set_training(true)?; let y = model.forward(x)?; let loss = y.cross_entropy_loss(t)?; loss.backward()?; -for parameter in model.parameters_mut()? { +for parameter in model.make_parameters_mut()? { optimizer.update(learning_rate, parameter)?; } ``` diff --git a/autograph_derive/src/lib.rs b/autograph_derive/src/lib.rs index 1cea4bcb..6eb95db4 100644 --- a/autograph_derive/src/lib.rs +++ b/autograph_derive/src/lib.rs @@ -1,35 +1,8 @@ -/*! -# Usage -You can derive Layer and Forward for structs and enums: -```text -use autograph::{ - anyhow::Result, - learn::neural_network::{ - autograd::{Variable4, Variable2}, - layer::{Layer, Forward, Flatten, Conv2, Relu, MaxPool2, Dense}, - }, -}; - -// Layer and Forward can be derived for structs composed of layers. -#[derive(Layer, Forward)] -#[autograph(forward(Variable4, Output=Variable2))] -struct Network { - conv: Conv2, - flatten: Flatten, - dense: Dense, -} +#![forbid(unsafe_code)] -// Can also be applied to enums. -#[derive(Layer, Forward)] -#[autograph(forward(Variable4, Output=Variable4))] -enum Dynamic { - Conv(Conv2), - Pool(MaxPool2), -} -``` +/*! +Derive macros for [**autograph**](https://docs.rs/autograph). */ -// TOOD: move docs to autograph::neural_network::layer -// TODO: remove `#[layer]` attribute. use derive_syn_parse::Parse; use proc_macro::TokenStream; @@ -170,44 +143,64 @@ impl Layers { } } } - fn collect(&self, method: Ident) -> TokenStream2 { + fn iter(&self, method: Ident) -> TokenStream2 { match self { Self::Struct(layers) => { quote! { ::std::iter::empty() #(.chain(self.#layers.#method()))* - .collect() } } Self::Enum(layers) => { quote! { - match self { - #( - Self::#layers(layer) => layer.#method(), - )* - } + ::std::iter::empty() + #( + .chain((if let Self::#layers(layer) = self { + Some(layer.#method()) + } else { + None + }).into_iter().flatten()) + )* } } } } - fn try_collect(&self, method: Ident) -> TokenStream2 { + fn try_iter_mut(&self, method: Ident) -> TokenStream2 { match self { Self::Struct(layers) => { quote! { Ok( ::std::iter::empty() #(.chain(self.#layers.#method()?))* - .collect() ) } } Self::Enum(layers) => { + let some_layer = quote! { Some(layer) }; + let none = quote! { None }; + let match_arms = layers.iter().enumerate().map(|(i, layer)| { + let fields = + (0..layers.len()).map(|u| if i == u { &some_layer } else { &none }); + quote! { + Self::#layer(layer) => (#(#fields),*) + } + }); + let iters = (0 .. layers.len()).map(|u| { + let index = Index::from(u); + quote! { + layers.#index.map(|layer| layer.#method()).transpose()?.into_iter().flatten() + } + }); quote! { - match self { + let layers = match self { + #(#match_arms),* + }; + Ok( + ::std::iter::empty() #( - Self::#layers(layer) => layer.#method(), + .chain(#iters) )* - } + ) } } } @@ -227,7 +220,7 @@ impl Layers { quote! { match self { #( - Self::#layers(layer) => Ok(Self::#layers(layer.#method()?)), + Self::#layers(layer) => Ok(Self::#layers(layer.#method(#arg)?)), )* } } @@ -282,23 +275,23 @@ fn layer_impl(input: TokenStream2) -> Result { let autograph = autograph_crate(&input.attrs)?; let ident = &input.ident; let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + let parameters = layers.iter(format_ident!("parameters")); + let make_parameters_mut = layers.try_iter_mut(format_ident!("make_parameters_mut")); let set_training = layers.try_for_each(format_ident!("set_training"), quote! { training }); - let parameters = layers.collect(format_ident!("parameters")); - let parameters_mut = layers.try_collect(format_ident!("parameters_mut")); let cast_mut = layers.try_for_each(format_ident!("cast_mut"), quote!(scalar_type)); let to_device_mut = layers.try_for_each(format_ident!("to_device_mut"), quote!(device.clone())); let into_device = layers.try_map(format_ident!("into_device"), quote! { device.clone() }); Ok(quote! { #[automatically_derived] impl #impl_generics Layer for #ident #ty_generics #where_clause { - fn set_training(&mut self, training: bool) -> #autograph::anyhow::Result<()> { - #set_training - } - fn parameters(&self) -> #autograph::learn::neural_network::layer::ParameterVec { + fn parameters(&self) -> impl ::std::iter::Iterator + '_ { #parameters } - fn parameters_mut(&mut self) -> #autograph::anyhow::Result<#autograph::learn::neural_network::layer::ParameterMutVec> { - #parameters_mut + fn make_parameters_mut(&mut self) -> #autograph::anyhow::Result + '_> { + #make_parameters_mut + } + fn set_training(&mut self, training: bool) -> #autograph::anyhow::Result<()> { + #set_training } fn cast_mut(&mut self, scalar_type: #autograph::krnl::scalar::ScalarType) -> #autograph::anyhow::Result<()> { #cast_mut @@ -315,8 +308,6 @@ fn layer_impl(input: TokenStream2) -> Result { } /// Derive for Layer. -/// -/// See [`autograph_derive`](crate). #[proc_macro_derive(Layer, attributes(autograph, layer))] pub fn layer(input: TokenStream) -> TokenStream { match layer_impl(input.into()) { @@ -370,8 +361,6 @@ fn forward_impl(input: TokenStream2) -> Result { } /// Derive for Forward. -/// -/// See [`autograph_derive`](crate). #[proc_macro_derive(Forward, attributes(autograph, layer))] pub fn forward(input: TokenStream) -> TokenStream { match forward_impl(input.into()) { diff --git a/benches/neural-network-benches/src/autograph_backend.rs b/benches/neural-network-benches/src/autograph_backend.rs index 2299c5c1..d80074bd 100644 --- a/benches/neural-network-benches/src/autograph_backend.rs +++ b/benches/neural-network-benches/src/autograph_backend.rs @@ -80,7 +80,7 @@ impl LeNet5Classifier { loss.backward()?; let optimizer = self.optimizer.as_ref().unwrap(); let learning_rate = 0.01; - for parameter in self.model.parameters_mut()? { + for parameter in self.model.make_parameters_mut()? { optimizer.update(learning_rate, parameter)?; } self.model.set_training(false)?; diff --git a/examples/neural-network-mnist/src/main.rs b/examples/neural-network-mnist/src/main.rs index 2dda9dad..778a5399 100644 --- a/examples/neural-network-mnist/src/main.rs +++ b/examples/neural-network-mnist/src/main.rs @@ -163,7 +163,6 @@ fn main() -> Result<()> { println!("model: {model:#?}"); let parameter_count = model .parameters() - .iter() .map(|x| x.raw_dim().size()) .sum::(); println!( @@ -303,7 +302,7 @@ fn train, Tensor1)>>>( .into_array()? .into_scalar(); loss.backward()?; - for parameter in model.parameters_mut()? { + for parameter in model.make_parameters_mut()? { optimizer.update(learning_rate, parameter)?; } model.set_training(false)?; diff --git a/src/learn/neural_network.rs b/src/learn/neural_network.rs index 9f34ebb2..14a0ba4b 100644 --- a/src/learn/neural_network.rs +++ b/src/learn/neural_network.rs @@ -113,7 +113,7 @@ model.set_training(true)?; let y = model.forward(x)?; let loss = y.cross_entropy_loss(t)?; loss.backward()?; -for parameter in model.parameters_mut()? { +for parameter in model.make_parameters_mut()? { optimizer.update(learning_rate, parameter)?; } # Ok(()) diff --git a/src/learn/neural_network/layer.rs b/src/learn/neural_network/layer.rs index d8dd131b..9348a536 100644 --- a/src/learn/neural_network/layer.rs +++ b/src/learn/neural_network/layer.rs @@ -21,26 +21,23 @@ use dry::macro_for; use half::bf16; #[cfg(feature = "device")] use krnl::buffer::ScalarSliceMut; +#[cfg(feature = "device")] +use krnl::macros::module; use krnl::{ buffer::{Buffer, ScalarBuffer, ScalarData}, device::Device, scalar::{Scalar, ScalarType}, }; +use ndarray::{linalg::Dot, Dimension, IntoDimension, Ix1, Ix2}; #[cfg(feature = "device")] use paste::paste; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; - -#[cfg(feature = "device")] -use krnl::macros::module; -use ndarray::{linalg::Dot, Dimension, IntoDimension, Ix1, Ix2}; - use rand::{ distributions::{Distribution, Uniform}, thread_rng, }; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use smallvec::SmallVec; use std::any::Any; mod conv_direct; @@ -598,15 +595,6 @@ pub mod builder { } use builder::*; -/// ParameterVec -/// -/// See [`Layer::parameters()`](Layer::parameters). -pub type ParameterVec = SmallVec<[ParameterD; 2]>; -/// ParameterMutVec -/// -/// See [`Layer::parameters_mut()`](Layer::parameters_mut). -pub type ParameterMutVec<'a> = SmallVec<[ParameterViewMutD<'a>; 2]>; - /// Layer. /// /// Typically Layers implement [`Forward>`](Forward) for the appropriate @@ -615,27 +603,57 @@ pub type ParameterMutVec<'a> = SmallVec<[ParameterViewMutD<'a>; 2]>; /// Layers with parameters or those that store the `device` or `scalar_type` should implement the /// relevant methods. Functional layers and activations may only need the default implementation. /// -/// Layer can be [derived](autograph_derive) for structs and enums where each field or variant -/// is a layer. +/// # Derive +/// [`Layer`] and [`Forward`] can be derived for structs and enums: +/** +```no_run +# use autograph::anyhow::Result; +# use autograph::learn::neural_network; +# use neural_network::autograd::{Variable4, Variable2}; +# use neural_network::layer::{Layer, Forward, Flatten, Conv2, Relu, MaxPool2, Dense}; + +// Layer and Forward can be derived for structs composed of layers. +#[derive(Layer, Forward)] +#[autograph(forward(Variable4, Output=Variable2))] +struct Network { + conv: Conv2, + flatten: Flatten, + dense: Dense, +} + +// Can also be applied to enums. +#[derive(Layer, Forward)] +#[autograph(forward(Variable4, Output=Variable4))] +enum Dynamic { + Conv(Conv2), + Pool(MaxPool2), +} +``` +*/ pub trait Layer { - /// Prepares for training or inference. + /// Iterator over Parameters of the layer. /// - /// Calls [`.set_training(training)`](Parameter::set_training) on each parameter and - /// [`.set_training(training)`][Layer::set_training] on each child layer as appropriate. - fn set_training(&mut self, #[allow(unused_variables)] training: bool) -> Result<()> { - Ok(()) - } - /// Parameters of the layer. - fn parameters(&self) -> ParameterVec { - ParameterVec::new() + /// Layers with parameters should implement all the relevant Layer methods. + fn parameters(&self) -> impl Iterator + '_ { + std::iter::empty() } - /// Mutable parameter views of the parameters of the layer. + /// Makes an iterator over mutable parameter views of the layer. /// /// The mutable parameter views can be provided to [`Optimizer::update()`](Optimizer::update). /// + /// # Errors + /// - The parameters are not exclusive, and could not be copied on the device. + /// /// See [`Parameter::make_view_mut()`](Parameter::make_view_mut). - fn parameters_mut(&mut self) -> Result { - Ok(ParameterMutVec::new()) + fn make_parameters_mut(&mut self) -> Result + '_> { + Ok(std::iter::empty()) + } + /// Prepares for training or inference. + /// + /// Calls [`.set_training(training)`](Parameter::set_training) on each parameter and + /// [`.set_training(training)`][Layer::set_training] on each child layer as appropriate. + fn set_training(&mut self, #[allow(unused_variables)] training: bool) -> Result<()> { + Ok(()) } /// Casts the layer to `scalar_type` in place. fn cast_mut(&mut self, #[allow(unused_variables)] scalar_type: ScalarType) -> Result<()> { @@ -653,10 +671,9 @@ pub trait Layer { Ok(self) } } - /// Forward. /// -/// Forward can be [derived](autograph_derive). +/// Forward can be [derived](Layer#derive). pub trait Forward { /// The type of the Output. type Output; @@ -665,22 +682,24 @@ pub trait Forward { } impl Layer for Option { + fn parameters(&self) -> impl Iterator + '_ { + self.as_ref() + .into_iter() + .flat_map(|layer| layer.parameters()) + } + fn make_parameters_mut(&mut self) -> Result + '_> { + Ok(self + .as_mut() + .map(Layer::make_parameters_mut) + .transpose()? + .into_iter() + .flatten()) + } fn set_training(&mut self, training: bool) -> Result<()> { if let Some(layer) = self.as_mut() { - layer.set_training(training) - } else { - Ok(()) + layer.set_training(training)?; } - } - fn parameters(&self) -> ParameterVec { - self.as_ref() - .map(|layer| layer.parameters()) - .unwrap_or_default() - } - fn parameters_mut(&mut self) -> Result { - self.as_mut() - .map(|layer| layer.parameters_mut()) - .unwrap_or(Ok(ParameterMutVec::new())) + Ok(()) } fn cast_mut(&mut self, scalar_type: ScalarType) -> Result<()> { if let Some(layer) = self.as_mut() { @@ -688,7 +707,7 @@ impl Layer for Option { } Ok(()) } - fn to_device_mut(&mut self, #[allow(unused_variables)] device: Device) -> Result<()> { + fn to_device_mut(&mut self, device: Device) -> Result<()> { if let Some(layer) = self.as_mut() { layer.to_device_mut(device)?; } @@ -718,21 +737,16 @@ impl Layer for Vec { self.iter_mut() .try_for_each(|layer| layer.set_training(training)) } - fn parameters(&self) -> ParameterVec { - self.iter().flat_map(Layer::parameters).collect() + fn parameters(&self) -> impl Iterator + '_ { + self.iter().flat_map(Layer::parameters) } - fn parameters_mut(&mut self) -> Result { - if self.is_empty() { - Ok(ParameterMutVec::new()) - } else if self.len() == 1 { - self.first_mut().unwrap().parameters_mut() - } else { - let mut parameter_vecs = SmallVec::<[ParameterMutVec; 8]>::with_capacity(self.len()); - for layer in self.iter_mut() { - parameter_vecs.push(layer.parameters_mut()?); - } - Ok(parameter_vecs.into_iter().flatten().collect()) + fn make_parameters_mut(&mut self) -> Result + '_> { + for layer in self.iter_mut() { + layer.make_parameters_mut()?; } + Ok(self + .iter_mut() + .flat_map(|layer| layer.make_parameters_mut().unwrap())) } fn cast_mut(&mut self, scalar_type: ScalarType) -> Result<()> { self.iter_mut() @@ -832,6 +846,20 @@ impl Conv { } impl Layer for Conv { + fn parameters(&self) -> impl Iterator + '_ { + let weight = self.weight.clone().into_dyn(); + let bias = self.bias.clone().map(Parameter::into_dyn); + std::iter::once(weight).chain(bias) + } + fn make_parameters_mut(&mut self) -> Result + '_> { + let weight = self.weight.make_view_mut()?.into_dyn(); + let bias = if let Some(bias) = self.bias.as_mut() { + Some(bias.make_view_mut()?.into_dyn()) + } else { + None + }; + Ok(std::iter::once(weight).chain(bias)) + } fn set_training(&mut self, training: bool) -> Result<()> { self.weight.set_training(training); if let Some(bias) = self.bias.as_mut() { @@ -839,22 +867,6 @@ impl Layer for Conv { } Ok(()) } - fn parameters(&self) -> ParameterVec { - let mut parameters = ParameterVec::new(); - parameters.push(self.weight.clone().into_dyn()); - if let Some(bias) = self.bias.as_ref() { - parameters.push(bias.clone().into_dyn()); - } - parameters - } - fn parameters_mut(&mut self) -> Result { - let mut parameters = ParameterMutVec::new(); - parameters.push(self.weight.make_view_mut()?.into_dyn()); - if let Some(bias) = self.bias.as_mut() { - parameters.push(bias.make_view_mut()?.into_dyn()); - } - Ok(parameters) - } fn to_device_mut(&mut self, device: Device) -> Result<()> { self.weight.to_device_mut(device.clone())?; if let Some(bias) = self.bias.as_mut() { @@ -1051,6 +1063,20 @@ impl Dense { } impl Layer for Dense { + fn parameters(&self) -> impl Iterator + '_ { + let weight = self.weight.clone().into_dyn(); + let bias = self.bias.clone().map(Parameter::into_dyn); + std::iter::once(weight).chain(bias) + } + fn make_parameters_mut(&mut self) -> Result + '_> { + let weight = self.weight.make_view_mut()?.into_dyn(); + let bias = if let Some(bias) = self.bias.as_mut() { + Some(bias.make_view_mut()?.into_dyn()) + } else { + None + }; + Ok(std::iter::once(weight).chain(bias)) + } fn set_training(&mut self, training: bool) -> Result<()> { self.weight.set_training(training); if let Some(bias) = self.bias.as_mut() { @@ -1058,22 +1084,6 @@ impl Layer for Dense { } Ok(()) } - fn parameters(&self) -> ParameterVec { - let mut parameters = ParameterVec::new(); - parameters.push(self.weight.clone().into_dyn()); - if let Some(bias) = self.bias.as_ref() { - parameters.push(bias.clone().into_dyn()); - } - parameters - } - fn parameters_mut(&mut self) -> Result { - let mut parameters = ParameterMutVec::new(); - parameters.push(self.weight.make_view_mut()?.into_dyn()); - if let Some(bias) = self.bias.as_mut() { - parameters.push(bias.make_view_mut()?.into_dyn()); - } - Ok(parameters) - } fn to_device_mut(&mut self, device: Device) -> Result<()> { self.weight.to_device_mut(device.clone())?; if let Some(bias) = self.bias.as_mut() {