From b0ff6d4f28b9a4bd0b7b7c56c826500fbad9e140 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 23 Jan 2024 07:04:28 +0000
Subject: [PATCH 01/16] build: remove Flux and Optimisers

---
 Project.toml     | 9 +++------
 src/NeuralPDE.jl | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1013977bad..bfd0eeb542 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,7 +15,6 @@ DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
-Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 Integrals = "de52edbc-65ea-441a-8357-d3a637375a31"
@@ -26,8 +25,8 @@ MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
-Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
+OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -53,7 +52,6 @@ DiffEqNoiseProcess = "5.1"
 Distributions = "0.23, 0.24, 0.25"
 DocStringExtensions = "0.8, 0.9"
 DomainSets = "0.6, 0.7"
-Flux = "0.13, 0.14"
 ForwardDiff = "0.10"
 Functors = "0.4"
 Integrals = "4"
@@ -64,8 +62,8 @@ MCMCChains = "6"
 ModelingToolkit = "8"
 MonteCarloMeasurements = "1"
 Optim = "1.7.8"
-Optimisers = "0.2, 0.3"
 Optimization = "3"
+OptimizationOptimisers = "0.1"
 QuasiMonteCarlo = "0.3.2"
 Reexport = "1.0"
 RuntimeGeneratedFunctions = "0.5"
@@ -83,7 +81,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
-OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
@@ -91,4 +88,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimisers", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "cuDNN", "LuxCUDA"]
+test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "cuDNN", "LuxCUDA"]
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 931fb24a5c..0ca4009da7 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -24,12 +24,10 @@ using Symbolics: wrap, unwrap, arguments, operation
 using SymbolicUtils
 using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
 using MonteCarloMeasurements
-
 import ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives
 import DomainSets: Domain, ClosedInterval
 import ModelingToolkit: Interval, infimum, supremum #,Ball
 import SciMLBase: @add_kwonly, parameterless_type
-import Optimisers
 import UnPack: @unpack
 import ChainRulesCore, Flux, Lux, ComponentArrays
 import ChainRulesCore: @non_differentiable

From ddbd569eaeb5979b19b834943361c570dda21a20 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 23 Jan 2024 07:04:49 +0000
Subject: [PATCH 02/16] refactor: remove Flux support

---
 src/BPINN_ode.jl           |  21 ++---
 src/NeuralPDE.jl           |   5 +-
 src/PDE_BPINN.jl           | 169 ++++++++++++-------------------------
 src/adaptive_losses.jl     |  32 +++----
 src/advancedHMC_MCMC.jl    |  68 +++++----------
 src/discretize.jl          | 119 ++++++--------------------
 src/ode_solve.jl           |  42 ++-------
 src/pinn_types.jl          |  26 ++----
 src/training_strategies.jl |   1 +
 9 files changed, 138 insertions(+), 345 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 3b6fd428d3..06c4fa4feb 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -22,7 +22,7 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a `Lux.AbstractExplicitLayer`.
+* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
 * `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
 
 ## Keyword Arguments
@@ -46,18 +46,18 @@ dataset = [x̂, time]
 
 chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
 
-alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2000,
+alg = BNNODE(chainlux, draw_samples = 2000,
                        l2std = [0.05], phystd = [0.05],
                        priorsNNw = (0.0, 3.0), progress = true)
 
 sol_lux = solve(prob, alg)
 
 # with parameter estimation
-alg = NeuralPDE.BNNODE(chainlux,dataset = dataset,
-                        draw_samples = 2000,l2std = [0.05],
-                        phystd = [0.05],priorsNNw = (0.0, 10.0),
-                        param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
-                        progress = true)
+alg = BNNODE(chainlux,dataset = dataset,
+                draw_samples = 2000,l2std = [0.05],
+                phystd = [0.05],priorsNNw = (0.0, 10.0),
+                param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+                progress = true)
 
 sol_lux_pestim = solve(prob, alg)
 ```
@@ -222,13 +222,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
         luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
         # only need for size
         θinit = collect(ComponentArrays.ComponentArray(θinit))
-    elseif chain isa Flux.Chain
-        θinit, re1 = Flux.destructure(chain)
-        out = re1.([samples[i][1:(end - ninv)]
-                    for i in (draw_samples - numensemble):draw_samples])
-        luxar = collect(out[i](t') for i in eachindex(out))
     else
-        throw(error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported"))
+        throw(error("Only Lux.AbstractExplicitLayer neural networks are supported"))
     end
 
     # contructing ensemble predictions
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 0ca4009da7..7d457c2650 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -11,6 +11,7 @@ using Reexport, Statistics
 using Zygote, ForwardDiff, Random, Distributions
 using Adapt, DiffEqNoiseProcess, StochasticDiffEq
 using Optimization
+using OptimizationOptimisers
 using Integrals, Cubature
 using QuasiMonteCarlo
 using RuntimeGeneratedFunctions
@@ -29,7 +30,7 @@ import DomainSets: Domain, ClosedInterval
 import ModelingToolkit: Interval, infimum, supremum #,Ball
 import SciMLBase: @add_kwonly, parameterless_type
 import UnPack: @unpack
-import ChainRulesCore, Flux, Lux, ComponentArrays
+import ChainRulesCore, Lux, ComponentArrays
 import ChainRulesCore: @non_differentiable
 
 RuntimeGeneratedFunctions.init(@__MODULE__)
@@ -43,7 +44,7 @@ include("symbolic_utilities.jl")
 include("training_strategies.jl")
 include("adaptive_losses.jl")
 include("ode_solve.jl")
-include("rode_solve.jl")
+# include("rode_solve.jl")
 include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index b63741e9b6..65bbcd884a 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -79,36 +79,22 @@ function setparameters(Tar::PDELogTargetDensity, θ)
     ps_new = θ[1:(end - Tar.extraparams)]
     ps = Tar.init_params
 
-    if (ps[names[1]] isa ComponentArrays.ComponentVector)
-        # multioutput case for Lux chains, for each depvar ps would contain Lux ComponentVectors
-        # which we use for mapping current ahmc sampled vector of parameters onto NNs
+    # multioutput case for Lux chains, for each depvar ps would contain Lux ComponentVectors
+    # which we use for mapping current ahmc sampled vector of parameters onto NNs
+    i = 0
+    Luxparams = [vector_to_parameters(ps_new[((i += length(ps[x])) - length(ps[x]) + 1):i],
+        ps[x]) for x in names]
 
-        i = 0
-        Luxparams = [vector_to_parameters(ps_new[((i += length(ps[x])) - length(ps[x]) + 1):i],
-            ps[x]) for x in names]
+    a = ComponentArrays.ComponentArray(NamedTuple{Tar.names}(i for i in Luxparams))
 
+    if Tar.extraparams > 0
+        b = θ[(end - Tar.extraparams + 1):end]
+        return ComponentArrays.ComponentArray(;
+            depvar = a,
+            p = b)
     else
-        # multioutput Flux
-        Luxparams = θ
-    end
-
-    if (Luxparams isa AbstractVector) && (Luxparams[1] isa ComponentArrays.ComponentVector)
-        # multioutput Lux
-        a = ComponentArrays.ComponentArray(NamedTuple{Tar.names}(i for i in Luxparams))
-
-        if Tar.extraparams > 0
-            b = θ[(end - Tar.extraparams + 1):end]
-
-            return ComponentArrays.ComponentArray(;
-                depvar = a,
-                p = b)
-        else
-            return ComponentArrays.ComponentArray(;
-                depvar = a)
-        end
-    else
-        # multioutput fLux case
-        return vector_to_parameters(Luxparams, ps)
+        return ComponentArrays.ComponentArray(;
+            depvar = a)
     end
 end
 
@@ -138,33 +124,18 @@ function L2LossData(Tar::PDELogTargetDensity, θ)
     # dataset[i][:, 1] -> depvar col of depvar's dataset
 
     if Tar.extraparams > 0
-        if Tar.init_params isa ComponentArrays.ComponentVector
-            for i in eachindex(Φ)
-                sumt += logpdf(MvNormal(Φ[i](dataset[i][:, 2:end]',
-                            vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                                init_params)[Tar.names[i]])[1,
-                            :],
-                        LinearAlgebra.Diagonal(abs2.(ones(size(dataset[i])[1]) .*
-                                                     L2stds[i]))),
-                    dataset[i][:, 1])
-            end
-            sumt
-        else
-            # Flux case needs subindexing wrt Tar.names indices(hence stored in Tar.names)
-            for i in eachindex(Φ)
-                sumt += logpdf(MvNormal(Φ[i](dataset[i][:, 2:end]',
-                            vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                                init_params)[Tar.names[2][i]])[1,
-                            :],
-                        LinearAlgebra.Diagonal(abs2.(ones(size(dataset[i])[1]) .*
-                                                     L2stds[i]))),
-                    dataset[i][:, 1])
-            end
-            sumt
+        for i in eachindex(Φ)
+            sumt += logpdf(MvNormal(Φ[i](dataset[i][:, 2:end]',
+                        vector_to_parameters(θ[1:(end - Tar.extraparams)],
+                            init_params)[Tar.names[i]])[1,
+                        :],
+                    LinearAlgebra.Diagonal(abs2.(ones(size(dataset[i])[1]) .*
+                                                 L2stds[i]))),
+                dataset[i][:, 1])
         end
-    else
-        return 0
+        return sumt
     end
+    return 0
 end
 
 # priors for NN parameters + ODE constants
@@ -182,10 +153,9 @@ function priorlogpdf(Tar::PDELogTargetDensity, θ)
 
         return (invlogpdf
                 +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
-    else
-        return logpdf(nnwparams, θ)
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)])) 
     end
+    return logpdf(nnwparams, θ)
 end
 
 function integratorchoice(Integratorkwargs, initial_ϵ)
@@ -243,56 +213,34 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
                             for i in (nnparams + 1):(nnparams + ninv)]
     end
 
-    # names is an indicator of type of chain
-    if names[1] != 1
-        # getting parameter ranges in case of Lux chains
-        Luxparams = []
-        i = 0
-        for x in names
-            len = length(initial_nnθ[x])
-            push!(Luxparams, (i + 1):(i + len))
-            i += len
-        end
-
-        # convert to format directly usable by lux
-        estimatedLuxparams = [vector_to_parameters(estimnnparams[Luxparams[i]],
-            initial_nnθ[names[i]]) for i in eachindex(phi)]
-
-        # infer predictions(preds) each row - NN, each col - ith sample
-        samplesn = reduce(hcat, samples)
-        preds = []
-        for j in eachindex(phi)
-            push!(preds,
-                [phi[j](timepoints[j],
-                    vector_to_parameters(samplesn[:, i][Luxparams[j]],
-                        initial_nnθ[names[j]])) for i in 1:numensemble])
-        end
-
-        # note here no of samples referse to numensemble and points is the no of points in each dep_vars discretization
-        # each phi will give output in single domain of depvar(so we have each row as a vector of vector outputs)
-        # so we get after reduce a single matrix of n rows(samples), and j cols(points)
-        ensemblecurves = [Particles(reduce(vcat, preds[i])) for i in eachindex(phi)]
-
-        return ensemblecurves, estimatedLuxparams, estimated_params, timepoints
-    else
-        # get intervals for parameters corresponding to flux chains
-        Fluxparams = names[2]
-
-        # convert to format directly usable by Flux
-        estimatedFluxparams = [estimnnparams[Fluxparams[i]] for i in eachindex(phi)]
-
-        # infer predictions(preds) each row - NN, each col - ith sample
-        samplesn = reduce(hcat, samples)
-        preds = []
-        for j in eachindex(phi)
-            push!(preds,
-                [phi[j](timepoints[j], samplesn[:, i][Fluxparams[j]]) for i in 1:numensemble])
-        end
-
-        ensemblecurves = [Particles(reduce(vcat, preds[i])) for i in eachindex(phi)]
+    # getting parameter ranges in case of Lux chains
+    Luxparams = []
+    i = 0
+    for x in names
+        len = length(initial_nnθ[x])
+        push!(Luxparams, (i + 1):(i + len))
+        i += len
+    end
 
-        return ensemblecurves, estimatedFluxparams, estimated_params, timepoints
+    # convert to format directly usable by lux
+    estimatedLuxparams = [vector_to_parameters(estimnnparams[Luxparams[i]],
+        initial_nnθ[names[i]]) for i in eachindex(phi)]
+
+    # infer predictions(preds) each row - NN, each col - ith sample
+    samplesn = reduce(hcat, samples)
+    preds = []
+    for j in eachindex(phi)
+        push!(preds,
+            [phi[j](timepoints[j],
+                vector_to_parameters(samplesn[:, i][Luxparams[j]],
+                    initial_nnθ[names[j]])) for i in 1:numensemble])
     end
+
+    # note here no of samples referse to numensemble and points is the no of points in each dep_vars discretization
+    # each phi will give output in single domain of depvar(so we have each row as a vector of vector outputs)
+    # so we get after reduce a single matrix of n rows(samples), and j cols(points)
+    ensemblecurves = [Particles(reduce(vcat, preds[i])) for i in eachindex(phi)]
+    return ensemblecurves, estimatedLuxparams, estimated_params, timepoints
 end
 
 """
@@ -396,20 +344,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # contains only NN parameters
     initial_nnθ = pinnrep.init_params
 
-    if (discretization.multioutput && chain[1] isa Lux.AbstractExplicitLayer)
-        # converting vector of parameters to ComponentArray for runtimegenerated functions
-        names = ntuple(i -> pinnrep.depvars[i], length(chain))
-    else
-        # Flux multioutput
-        i = 0
-        temp = []
-        for j in eachindex(initial_nnθ)
-            len = length(initial_nnθ[j])
-            push!(temp, (i + 1):(i + len))
-            i += len
-        end
-        names = tuple(1, temp)
-    end
+    names = ntuple(i -> pinnrep.depvars[i], length(chain))
 
     #ode parameter estimation
     nparameters = length(initial_θ)
diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index b37023da7c..6bfb192194 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -11,7 +11,6 @@ function vectorify(x, t::Type{T}) where {T <: Real}
 end
 
 # Dispatches
-
 """
 ```julia
 NonAdaptiveLoss{T}(; pde_loss_weights = 1,
@@ -159,8 +158,8 @@ end
 """
 ```julia
 function MiniMaxAdaptiveLoss(reweight_every;
-                             pde_max_optimiser = Flux.ADAM(1e-4),
-                             bc_max_optimiser = Flux.ADAM(0.5),
+                             pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+                             bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
                              pde_loss_weights = 1,
                              bc_loss_weights = 1,
                              additional_loss_weights = 1)
@@ -178,9 +177,9 @@ where loss functions that have not been satisfied get a greater weight,
 
 ## Keyword Arguments
 
-* `pde_max_optimiser`: a Flux.Optimise.AbstractOptimiser that is used internally to
+* `pde_max_optimiser`: a OptimizationOptimisers optimiser that is used internally to
   maximize the weights of the PDE loss functions.
-* `bc_max_optimiser`: a Flux.Optimise.AbstractOptimiser that is used internally to maximize
+* `bc_max_optimiser`: a OptimizationOptimisers optimiser that is used internally to maximize
   the weights of the BC loss functions.
 
 ## References
@@ -190,8 +189,8 @@ Levi McClenny, Ulisses Braga-Neto
 https://arxiv.org/abs/2009.04544
 """
 mutable struct MiniMaxAdaptiveLoss{T <: Real,
-                                   PDE_OPT <: Flux.Optimise.AbstractOptimiser,
-                                   BC_OPT <: Flux.Optimise.AbstractOptimiser} <:
+                                   PDE_OPT,
+                                   BC_OPT} <:
                AbstractAdaptiveLoss
     reweight_every::Int64
     pde_max_optimiser::PDE_OPT
@@ -201,17 +200,15 @@ mutable struct MiniMaxAdaptiveLoss{T <: Real,
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss{T,
                                                        PDE_OPT, BC_OPT}(reweight_every;
-                                                                        pde_max_optimiser = Flux.ADAM(1e-4),
-                                                                        bc_max_optimiser = Flux.ADAM(0.5),
+                                                                        pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+                                                                        bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
                                                                         pde_loss_weights = 1,
                                                                         bc_loss_weights = 1,
                                                                         additional_loss_weights = 1) where {
                                                                                                             T <:
                                                                                                             Real,
-                                                                                                            PDE_OPT <:
-                                                                                                            Flux.Optimise.AbstractOptimiser,
-                                                                                                            BC_OPT <:
-                                                                                                            Flux.Optimise.AbstractOptimiser
+                                                                                                            PDE_OPT,
+                                                                                                            BC_OPT
                                                                                                             }
         new(convert(Int64, reweight_every), convert(PDE_OPT, pde_max_optimiser),
             convert(BC_OPT, bc_max_optimiser),
@@ -222,8 +219,8 @@ end
 
 # default to Float64, ADAM, ADAM
 SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss(reweight_every;
-                                                   pde_max_optimiser = Flux.ADAM(1e-4),
-                                                   bc_max_optimiser = Flux.ADAM(0.5),
+                                                   pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+                                                   bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
                                                    pde_loss_weights = 1,
                                                    bc_loss_weights = 1,
                                                    additional_loss_weights = 1)
@@ -245,9 +242,8 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
 
     function run_minimax_adaptive_loss(θ, pde_losses, bc_losses)
         if iteration[1] % adaloss.reweight_every == 0
-            Flux.Optimise.update!(pde_max_optimiser, adaloss.pde_loss_weights,
-                                  -pde_losses)
-            Flux.Optimise.update!(bc_max_optimiser, adaloss.bc_loss_weights, -bc_losses)
+            OptimizationOptimisers.Optimisers.update(pde_max_optimiser, adaloss.pde_loss_weights, -pde_losses)
+            OptimizationOptimisers.Optimisers.update(bc_max_optimiser, adaloss.bc_loss_weights, -bc_losses)
             logvector(pinnrep.logger, adaloss.pde_loss_weights,
                       "adaptive_loss/pde_loss_weights", iteration[1])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 1a2c47de0d..aa1839557b 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -285,25 +285,9 @@ function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
     return θ, chain, st
 end
 
-function generate_Tar(chain::Flux.Chain, init_params)
-    θ, re = Flux.destructure(chain)
-    return init_params, re, nothing
-end
-
-function generate_Tar(chain::Flux.Chain, init_params::Nothing)
-    θ, re = Flux.destructure(chain)
-    # find_good_stepsize,phasepoint takes only float64
-    return θ, re, nothing
-end
-
 """
 nn OUTPUT AT t,θ ~ phi(t,θ)
 """
-function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Optimisers.Restructure, S}
-    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
     θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
@@ -312,12 +296,6 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
     f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
 end
 
-function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Optimisers.Restructure, S}
-    #  must handle paired odes hence u0 broadcasted
-    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
-end
-
 function (f::LogTargetDensity{C, S})(t::Number,
     θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
@@ -407,24 +385,24 @@ time = sol.t[1:100]
 x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
 dataset = [x̂, time]
 
-chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
+chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
 
 # simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
-fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0))
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0))
 
 # solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
-fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
-                                                                          dataset = dataset,
-                                                                          draw_samples = 1500,
-                                                                          l2std = [0.05],
-                                                                          phystd = [0.05],
-                                                                          priorsNNw = (0.0,3.0),
-                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
+                                                            dataset = dataset,
+                                                            draw_samples = 1500,
+                                                            l2std = [0.05],
+                                                            phystd = [0.05],
+                                                            priorsNNw = (0.0,3.0),
+                                                            param = [Normal(6.5,0.5), Normal(-3,0.5)])
 
 ## NOTES 
 Dataset is required for accurate Parameter estimation + solving equations
@@ -432,7 +410,7 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Positional Arguments
 * `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
-* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
+* `chain`: Lux Neural Netork which would be made the Bayesian PINN
 
 ## Keyword Arguments
 * `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
@@ -497,11 +475,11 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         throw(error("Dataset Required for Parameter Estimation."))
     end
 
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
-        # Flux-vector, Lux-Named Tuple
+    if chain isa Lux.AbstractExplicitLayer
+        # Lux-Named Tuple
         initial_nnθ, recon, st = generate_Tar(chain, init_params)
     else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+        error("Only Lux.AbstractExplicitLayer neural networks are supported")
     end
 
     if nchains > Threads.nthreads()
@@ -511,13 +489,9 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     end
 
     # eltype(physdt) cause needs Float64 for find_good_stepsize
-    if chain isa Lux.AbstractExplicitLayer
-        # Lux chain(using component array later as vector_to_parameter need namedtuple)
-        initial_θ = collect(eltype(physdt),
-            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
-    else
-        initial_θ = collect(eltype(physdt), initial_nnθ)
-    end
+    # Lux chain(using component array later as vector_to_parameter need namedtuple)
+    initial_θ = collect(eltype(physdt),
+        vcat(ComponentArrays.ComponentArray(initial_nnθ)))
 
     # adding ode parameter estimation
     nparameters = length(initial_θ)
diff --git a/src/discretize.jl b/src/discretize.jl
index c8412b2d15..57b13b5eb6 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -10,21 +10,6 @@ Take expressions in the form:
 
 to
 
-:((cord, θ, phi, derivative, u)->begin
-          #= ... =#
-          #= ... =#
-          begin
-              (θ1, θ2) = (θ[1:33], θ"[34:66])
-              (phi1, phi2) = (phi[1], phi[2])
-              let (x, y) = (cord[1], cord[2])
-                  [(+)(derivative(phi1, u, [x, y], [[ε, 0.0]], 1, θ1), (*)(4, derivative(phi2, u, [x, y], [[0.0, ε]], 1, θ2))) - 0,
-                   (+)(derivative(phi2, u, [x, y], [[ε, 0.0]], 1, θ2), (*)(9, derivative(phi1, u, [x, y], [[0.0, ε]], 1, θ1))) - 0]
-              end
-          end
-      end)
-
-for Flux.Chain, and
-
 :((cord, θ, phi, derivative, u)->begin
           #= ... =#
           #= ... =#
@@ -86,13 +71,7 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
 
         for i in eachindex(depvars)
-            if (phi isa Vector && phi[1].f isa Optimisers.Restructure) ||
-               (!(phi isa Vector) && phi.f isa Optimisers.Restructure)
-                # Flux.Chain
-                push!(expr_θ, :($θ[$(sep[i])]))
-            else # Lux.AbstractExplicitLayer
-                push!(expr_θ, :($θ.depvar.$(depvars[i])))
-            end
+            push!(expr_θ, :($θ.depvar.$(depvars[i])))
             push!(expr_phi, :(phi[$i]))
         end
 
@@ -105,17 +84,10 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
 
     #Add an expression for parameter symbols
     if param_estim == true && eq_params != SciMLBase.NullParameters()
-        param_len = length(eq_params)
-        last_indx = [0; accumulate(+, map(length, init_params))][end]
         params_symbols = Symbol[]
         expr_params = Expr[]
         for (i, eq_param) in enumerate(eq_params)
-            if (phi isa Vector && phi[1].f isa Optimisers.Restructure) ||
-               (!(phi isa Vector) && phi.f isa Optimisers.Restructure)
-                push!(expr_params, :($θ[$((i + last_indx):(i + last_indx))]))
-            else
-                push!(expr_params, :($θ.p[$((i):(i))]))
-            end
+            push!(expr_params, :($θ.p[$((i):(i))]))
             push!(params_symbols, Symbol(:($eq_param)))
         end
         params_eq = Expr(:(=), build_expr(:tuple, params_symbols),
@@ -156,7 +128,6 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
 
     if !(dict_transformation_vars isa Nothing)
         transformation_expr_ = Expr[]
-
         for (i, u) in dict_transformation_vars
             push!(transformation_expr_, :($i = $u))
         end
@@ -426,63 +397,40 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     if init_params === nothing
         # Use the initialization of the neural network framework
         # But for Lux, default to Float64
-        # For Flux, default to the types matching the values in the neural network
         # This is done because Float64 is almost always better for these applications
-        # But with Flux there's already a chosen type from the user
-
         if chain isa AbstractArray
-            if chain[1] isa Flux.Chain
-                init_params = map(chain) do x
-                    _x = Flux.destructure(x)[1]
-                end
-            else
-                x = map(chain) do x
-                    _x = ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
-                                                                              x))
-                    Float64.(_x) # No ComponentArray GPU support
-                end
-                names = ntuple(i -> depvars[i], length(chain))
-                init_params = ComponentArrays.ComponentArray(NamedTuple{names}(i
-                                                                               for i in x))
+            x = map(chain) do x
+                _x = ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
+                                                                          x))
+                Float64.(_x) # No ComponentArray GPU support
             end
+            names = ntuple(i -> depvars[i], length(chain))
+            init_params = ComponentArrays.ComponentArray(NamedTuple{names}(i
+                                                                           for i in x))
         else
-            if chain isa Flux.Chain
-                init_params = Flux.destructure(chain)[1]
-                init_params = init_params isa Array ? Float64.(init_params) :
-                              init_params
-            else
-                init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
+            init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
                                                                                             chain)))
-            end
         end
     else
         init_params = init_params
     end
 
-    if (discretization.phi isa Vector && discretization.phi[1].f isa Optimisers.Restructure) ||
-       (!(discretization.phi isa Vector) && discretization.phi.f isa Optimisers.Restructure)
-        # Flux.Chain
-        flat_init_params = multioutput ? reduce(vcat, init_params) : init_params
-        flat_init_params = param_estim == false ? flat_init_params :
-                           vcat(flat_init_params,
-                                adapt(typeof(flat_init_params), default_p))
+    flat_init_params = if init_params isa ComponentArrays.ComponentArray
+        init_params
+    elseif multioutput
+        @assert length(init_params) == length(depvars)
+        names = ntuple(i -> depvars[i], length(init_params))
+        x = ComponentArrays.ComponentArray(NamedTuple{names}(i for i in init_params))
     else
-        flat_init_params = if init_params isa ComponentArrays.ComponentArray
-            init_params
-        elseif multioutput
-            @assert length(init_params) == length(depvars)
-            names = ntuple(i -> depvars[i], length(init_params))
-            x = ComponentArrays.ComponentArray(NamedTuple{names}(i for i in init_params))
-        else
-            ComponentArrays.ComponentArray(init_params)
-        end
-        flat_init_params = if param_estim == false && multioutput
-            ComponentArrays.ComponentArray(; depvar = flat_init_params)
-        elseif param_estim == false && !multioutput
-            flat_init_params
-        else
-            ComponentArrays.ComponentArray(; depvar = flat_init_params, p = default_p)
-        end
+        ComponentArrays.ComponentArray(init_params)
+    end
+
+    flat_init_params = if param_estim == false && multioutput
+        ComponentArrays.ComponentArray(; depvar = flat_init_params)
+    elseif param_estim == false && !multioutput
+        flat_init_params
+    else
+        ComponentArrays.ComponentArray(; depvar = flat_init_params, p = default_p)
     end
 
     eltypeθ = eltype(flat_init_params)
@@ -615,13 +563,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             else
                 function _additional_loss(phi, θ)
                     (θ_, p_) = if (param_estim == true)
-                        if (phi isa Vector && phi[1].f isa Optimisers.Restructure) ||
-                        (!(phi isa Vector) && phi.f isa Optimisers.Restructure)
-                            # Isa Flux Chain
-                            θ[1:(end - length(default_p))], θ[(end - length(default_p) + 1):end]
-                        else
-                            θ.depvar, θ.p
-                        end
+                        θ.depvar, θ.p
                     else
                         θ, nothing
                     end
@@ -731,14 +673,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             else
                 function _additional_loss(phi, θ)
                     (θ_, p_) = if (param_estim == true)
-                        if (phi isa Vector && phi[1].f isa Optimisers.Restructure) ||
-                        (!(phi isa Vector) && phi.f isa Optimisers.Restructure)
-                            # Isa Flux Chain
-                            θ[1:(end - length(default_p))],
-                            θ[(end - length(default_p) + 1):end]
-                        else
-                            θ.depvar, θ.p
-                        end
+                        θ.depvar, θ.p
                     else
                         θ, nothing
                     end
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index e724b93d7b..64ccb16acc 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -18,7 +18,7 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a `Lux.AbstractExplicitLayer`.
+* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
 * `opt`: The optimizer to train the neural network. Defaults to `OptimizationPolyalgorithms.PolyOpt()`
 * `init_params`: The initial parameter of the neural network. By default, this is `nothing`
   which thus uses the random initialization provided by the neural network library.
@@ -60,8 +60,8 @@ f(u,p,t) = cos(2pi*t)
 tspan = (0.0f0, 1.0f0)
 u0 = 0.0f0
 prob = ODEProblem(linear, u0 ,tspan)
-chain = Flux.Chain(Dense(1,5,σ),Dense(5,1))
-opt = Flux.ADAM(0.1)
+chain = Lux.Chain(Lux.Dense(1,5,σ), Lux.Dense(5,1))
+opt = OptimizationOptimisers.Adam(0.1)
 sol = solve(prob, NeuralPDE.NNODE(chain,opt), dt=1/20f0, verbose = true,
             abstol=1e-10, maxiters = 200)
 ```
@@ -100,7 +100,6 @@ end
 """
 ```julia
 ODEPhi(chain::Lux.AbstractExplicitLayer, t, u0, st)
-ODEPhi(chain::Flux.Chain, t, u0, nothing)
 ```
 
 Internal, used as a constructor used for representing the ODE solution as a
@@ -112,13 +111,9 @@ mutable struct ODEPhi{C, T, U, S}
     t0::T
     u0::U
     st::S
-
     function ODEPhi(chain::Lux.AbstractExplicitLayer, t::Number, u0, st)
         new{typeof(chain), typeof(t), typeof(u0), typeof(st)}(chain, t, u0, st)
     end
-    function ODEPhi(re::Optimisers.Restructure, t, u0)
-        new{typeof(re), typeof(t), typeof(u0), Nothing}(re, t, u0, nothing)
-    end
 end
 
 function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params)
@@ -131,14 +126,6 @@ function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params)
     ODEPhi(chain, t, u0, st), init_params
 end
 
-function generate_phi_θ(chain::Flux.Chain, t, u0, init_params)
-    θ, re = Flux.destructure(chain)
-    if init_params === nothing
-        init_params = θ
-    end
-    ODEPhi(re, t, u0), init_params
-end
-
 function (f::ODEPhi{C, T, U})(t::Number,
                               θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
@@ -168,25 +155,6 @@ function (f::ODEPhi{C, T, U})(t::AbstractVector,
     f.u0 .+ (t' .- f.t0) .* y
 end
 
-function (f::ODEPhi{C, T, U})(t::Number,
-                              θ) where {C <: Optimisers.Restructure, T, U <: Number}
-    f.u0 + (t - f.t0) * first(f.chain(θ)(adapt(parameterless_type(θ), [t])))
-end
-
-function (f::ODEPhi{C, T, U})(t::AbstractVector,
-                              θ) where {C <: Optimisers.Restructure, T, U <: Number}
-    f.u0 .+ (t' .- f.t0) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
-function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Optimisers.Restructure, T, U}
-    f.u0 + (t - f.t0) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
-end
-
-function (f::ODEPhi{C, T, U})(t::AbstractVector,
-                              θ) where {C <: Optimisers.Restructure, T, U}
-    f.u0 .+ (t .- f.t0)' .* f.chain(θ)(adapt(parameterless_type(θ), t'))
-end
-
 """
 Computes u' using either forward-mode automatic differentiation or
 numerical differentiation.
@@ -397,10 +365,10 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     #train points generation
     init_params = alg.init_params
 
-    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
+    if chain isa Lux.AbstractExplicitLayer
         phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
     else
-        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+        error("Only Lux.AbstractExplicitLayer neural networks are supported")
     end
 
     if isinplace(prob)
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 48c8f46da9..e1c13fd949 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -47,7 +47,7 @@ methodology.
 
 ## Positional Arguments
 
-* `chain`: a vector of Flux.jl or Lux.jl chains with a d-dimensional input and a
+* `chain`: a vector of Lux.jl chains with a d-dimensional input and a
   1-dimensional output corresponding to each of the dependent variables. Note that this
   specification respects the order of the dependent variables as specified in the PDESystem.
 * `strategy`: determines which training strategy will be used. See the Training Strategy
@@ -55,9 +55,7 @@ methodology.
 
 ## Keyword Arguments
 
-* `init_params`: the initial parameters of the neural networks. This should match the
-  specification of the chosen `chain` library. For example, if a Flux.chain is used, then
-  `init_params` should match `Flux.destructure(chain)[1]` in shape. If `init_params` is not
+* `init_params`: the initial parameters of the neural networks. If `init_params` is not
   given, then the neural network default parameters are used. Note that for Lux, the default
   will convert to Float64.
 * `phi`: a trial solution, specified as `phi(x,p)` where `x` is the coordinates vector for
@@ -173,7 +171,7 @@ methodology.
 
 ## Positional Arguments
 
-* `chain`: a vector of Flux.jl or Lux.jl chains with a d-dimensional input and a
+* `chain`: a vector of Lux.jl chains with a d-dimensional input and a
   1-dimensional output corresponding to each of the dependent variables. Note that this
   specification respects the order of the dependent variables as specified in the PDESystem.
 * `strategy`: determines which training strategy will be used. See the Training Strategy
@@ -184,9 +182,7 @@ methodology.
 * `Dataset`: A vector of matrix, each matrix for ith dependant
   variable and first col in matrix is for dependant variables,
   remaining coloumns for independant variables. Needed for inverse problem solving.
-* `init_params`: the initial parameters of the neural networks. This should match the
-  specification of the chosen `chain` library. For example, if a Flux.chain is used, then
-  `init_params` should match `Flux.destructure(chain)[1]` in shape. If `init_params` is not
+* `init_params`: the initial parameters of the neural networks. If `init_params` is not
   given, then the neural network default parameters are used. Note that for Lux, the default
   will convert to Float64.
 * `phi`: a trial solution, specified as `phi(x,p)` where `x` is the coordinates vector for
@@ -380,9 +376,7 @@ mutable struct PINNRepresentation
     If `param_estim = true`, then `flat_init_params.p` are the parameters and
     `flat_init_params.depvar.x` are the neural network parameters, so
     `flat_init_params.depvar.x` would be the parameters of the neural network for the
-    dependent variable `x` if it's a system. If a Flux.jl neural network is used, this is
-    simply an `AbstractArray` to be indexed and the sizes from the chains must be
-    remembered/stored/used.
+    dependent variable `x` if it's a system.
     """
     flat_init_params::Any
     """
@@ -474,10 +468,8 @@ value at domain points x
 
 Fields:
 
-- `f`: A representation of the chain function. If FastChain, then `f(x,p)`,
-  if Chain then `f(p)(x)` (from Flux.destructure)
-- `st`: The state of the Lux.AbstractExplicitLayer. If a Flux.Chain then this is `nothing`.
-  It should be updated on each call.
+- `f`: A representation of the chain function.
+- `st`: The state of the Lux.AbstractExplicitLayer. It should be updated on each call.
 """
 mutable struct Phi{C, S}
     f::C
@@ -486,10 +478,6 @@ mutable struct Phi{C, S}
         st = Lux.initialstates(Random.default_rng(), chain)
         new{typeof(chain), typeof(st)}(chain, st)
     end
-    function Phi(chain::Flux.Chain)
-        re = Flux.destructure(chain)[2]
-        new{typeof(re), Nothing}(re, nothing)
-    end
 end
 
 function (f::Phi{<:Lux.AbstractExplicitLayer})(x::Number, θ)
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index d4edd26bfc..a74a1bed98 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -366,4 +366,5 @@ function get_loss_function(loss_function, train_set, eltypeθ,
                            strategy::WeightedIntervalTraining;
                            τ = nothing)
     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
+    return loss
 end
\ No newline at end of file

From 8ee54bfc55b2d54ccd80725701472b062dae9c61 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 23 Jan 2024 07:05:15 +0000
Subject: [PATCH 03/16] test: remove Flux based tests

---
 test/BPINN_PDE_tests.jl       |  42 +-----
 test/BPINN_PDEinvsol_tests.jl |  64 ++-------
 test/BPINN_Tests.jl           | 156 +++------------------
 test/IDE_tests.jl             | 114 ++++++---------
 test/NNODE_tests.jl           | 138 ++-----------------
 test/NNODE_tstops_test.jl     |   4 +-
 test/NNPDE_tests.jl           | 212 ++++++----------------------
 test/NNPDE_tests_gpu.jl       | 252 ----------------------------------
 test/NNPDE_tests_gpu_Lux.jl   | 139 ++++++-------------
 test/adaptive_loss_tests.jl   |   9 --
 test/additional_loss_tests.jl |  20 ---
 test/direct_function_tests.jl |  27 +---
 test/forward_tests.jl         |  32 ++---
 test/neural_adapter_tests.jl  |  92 ++++---------
 test/runtests.jl              |   5 -
 15 files changed, 215 insertions(+), 1091 deletions(-)
 delete mode 100644 test/NNPDE_tests_gpu.jl

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index dae9072c38..a63450e371 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -1,7 +1,7 @@
 using Test, MCMCChains, Lux, ModelingToolkit
 import ModelingToolkit: Interval, infimum, supremum
 using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, AdvancedHMC, Statistics, Random, Functors
+using AdvancedHMC, Statistics, Random, Functors
 using NeuralPDE, MonteCarloMeasurements
 using ComponentArrays
 
@@ -17,8 +17,6 @@ eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
 bcs = [u(0) ~ 0.0]
 domains = [t ∈ Interval(0.0, 2.0)]
 
-chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1)) |> Flux.f64
-init1, re1 = Flux.destructure(chainf)
 chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
 initl, st = Lux.setup(Random.default_rng(), chainl)
 
@@ -35,15 +33,6 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     priorsNNw = (0.0, 1.0),
     saveats = [1 / 50.0])
 
-discretization = NeuralPDE.BayesianPINN([chainf], GridTraining([0.01]))
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0])
-
 analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
 ts = vec(sol1.timepoints[1])
 u_real = [analytic_sol_func(0.0, t) for t in ts]
@@ -51,12 +40,6 @@ u_predict = pmean(sol1.ensemblesol[1])
 @test u_predict≈u_real atol=0.5
 @test mean(u_predict .- u_real) < 0.1
 
-ts = vec(sol2.timepoints[1])
-u_real = [analytic_sol_func(0.0, t) for t in ts]
-u_predict = pmean(sol2.ensemblesol[1])
-@test u_predict≈u_real atol=0.5
-@test mean(u_predict .- u_real) < 0.1
-
 ## Example 1, 1D ode
 @parameters θ
 @variables u(..)
@@ -73,10 +56,9 @@ bcs = [u(0.0) ~ 1.0]
 domains = [θ ∈ Interval(0.0, 1.0)]
 
 # Neural network
-chain = Lux.Chain(Lux.Dense(1, 12, Flux.σ), Lux.Dense(12, 1))
+chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
 
-discretization = NeuralPDE.BayesianPINN([chain],
-    GridTraining([0.01]))
+discretization = BayesianPINN([chain], GridTraining([0.01]))
 
 @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
@@ -123,7 +105,7 @@ chain = [
         Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
     Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1))]
 
-discretization = NeuralPDE.BayesianPINN(chain, GridTraining(0.01))
+discretization = BayesianPINN(chain, GridTraining(0.01))
 
 @named pde_system = PDESystem(eq, bcs, domains, [x],
     [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
@@ -143,11 +125,6 @@ xs = vec(sol1.timepoints[1])
 u_real = [analytic_sol_func(x) for x in xs]
 @test u_predict≈u_real atol=0.5
 
-# diff_u = abs.(u_real .- u_predict)
-# plot(xs, u_real)
-# plot!(xs, u_predict)
-# plot!(xs, diff_u)
-
 # 2D Poissons equation 
 @parameters x y
 @variables u(..)
@@ -171,7 +148,7 @@ chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(
 
 # Discretization
 dx = 0.05
-discretization = NeuralPDE.BayesianPINN([chain], GridTraining(dx))
+discretization = BayesianPINN([chain], GridTraining(dx))
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
@@ -190,12 +167,3 @@ u_predict = pmean(sol1.ensemblesol[1])
 u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
 diff_u = abs.(u_predict .- u_real)
 @test u_predict≈u_real atol=1.5
-
-# using Plots, StatsPlots
-# plotly()
-# plot(sol1.timepoints[1][1, :],
-#     sol1.timepoints[1][2, :],
-#     pmean(sol1.ensemblesol[1]),
-#     linetype = :contourf)
-# plot(sol1.timepoints[1][1, :], sol1.timepoints[1][2, :], u_real, linetype = :contourf)
-# plot(sol1.timepoints[1][1, :], sol1.timepoints[1][2, :], diff_u, linetype = :contourf)
\ No newline at end of file
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 3521c8c913..e8e72d8797 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -1,7 +1,7 @@
 using Test, MCMCChains, Lux, ModelingToolkit
 import ModelingToolkit: Interval, infimum, supremum
 using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, AdvancedHMC, Statistics, Random, Functors
+using AdvancedHMC, Statistics, Random, Functors
 using NeuralPDE, MonteCarloMeasurements
 using ComponentArrays
 
@@ -17,8 +17,6 @@ eqs = Dt(u(t)) - cos(p * t) ~ 0
 bcs = [u(0) ~ 0.0]
 domains = [t ∈ Interval(0.0, 2.0)]
 
-chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1)) |> Flux.f64
-init1, re1 = Flux.destructure(chainf)
 chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
 initl, st = Lux.setup(Random.default_rng(), chainl)
 
@@ -36,13 +34,9 @@ u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
 u = u .+ (u .* 0.2) .* randn(size(u))
 dataset = [hcat(u, timepoints)]
 
-# plot(dataset[1][:, 2], dataset[1][:, 1])
-# plot!(timepoints, u)
-
 # checking all training strategies
-discretization = NeuralPDE.BayesianPINN([chainl],
-    StochasticTraining(200),
-    param_estim = true, dataset = [dataset, nothing])
+discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true, 
+                              dataset = [dataset, nothing])
 
 ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -53,9 +47,8 @@ ahmc_bayesian_pinn_pde(pde_system,
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)])
 
-discretization = NeuralPDE.BayesianPINN([chainl],
-    QuasiRandomTraining(200),
-    param_estim = true, dataset = [dataset, nothing])
+discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true, 
+                              dataset = [dataset, nothing])
 
 ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -66,8 +59,8 @@ ahmc_bayesian_pinn_pde(pde_system,
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)])
 
-discretization = NeuralPDE.BayesianPINN([chainl],
-    QuadratureTraining(), param_estim = true, dataset = [dataset, nothing])
+discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true, 
+                              dataset = [dataset, nothing])
 
 ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -78,9 +71,8 @@ ahmc_bayesian_pinn_pde(pde_system,
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)])
 
-discretization = NeuralPDE.BayesianPINN([chainl],
-    GridTraining([0.02]),
-    param_estim = true, dataset = [dataset, nothing])
+discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true, 
+                              dataset = [dataset, nothing])
 
 sol1 = ahmc_bayesian_pinn_pde(pde_system,
     discretization;
@@ -91,18 +83,6 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
     saveats = [1 / 50.0],
     param = [LogNormal(6.0, 0.5)])
 
-discretization = NeuralPDE.BayesianPINN([chainf],
-    GridTraining([0.02]), param_estim = true, dataset = [dataset, nothing])
-
-sol2 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.03],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
 param = 2 * π
 ts = vec(sol1.timepoints[1])
 u_real = [analytic_sol_func1(0.0, t) for t in ts]
@@ -112,14 +92,6 @@ u_predict = pmean(sol1.ensemblesol[1])
 @test mean(u_predict .- u_real) < 0.1
 @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 
-ts = vec(sol2.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol2.ensemblesol[1])
-
-@test u_predict≈u_real atol=0.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol2.estimated_de_params[1]≈param atol=param * 0.3
-
 ## Example Lorenz System (Parameter Estimation)
 println("Example 2, Lorenz System")
 @parameters t, σ_
@@ -160,15 +132,8 @@ us = us .+ ((0.05 .* randn(size(us))) .* us)
 ts_ = hcat(sol(ts).t...)[1, :]
 dataset = [hcat(us[i, :], ts_) for i in 1:3]
 
-# using Plots, StatsPlots
-# plot(hcat(sol.u...)[1, :], hcat(sol.u...)[2, :], hcat(sol.u...)[3, :])
-# plot!(dataset[1][:, 1], dataset[2][:, 1], dataset[3][:, 1])
-# plot(dataset[1][:, 2:end], dataset[1][:, 1])
-# plot!(dataset[2][:, 2:end], dataset[2][:, 1])
-# plot!(dataset[3][:, 2:end], dataset[3][:, 1])
-
-discretization = NeuralPDE.BayesianPINN(chain, NeuralPDE.GridTraining([0.01]);
-    param_estim = true, dataset = [dataset, nothing])
+discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true, 
+                              dataset = [dataset, nothing])
 
 @named pde_system = PDESystem(eqs, bcs, domains,
     [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
@@ -186,10 +151,5 @@ sol1 = ahmc_bayesian_pinn_pde(pde_system,
 idealp = 10.0
 p_ = sol1.estimated_de_params[1]
 
-# plot(pmean(sol1.ensemblesol[1]), pmean(sol1.ensemblesol[2]), pmean(sol1.ensemblesol[3]))
-# plot(sol1.timepoints[1]', pmean(sol1.ensemblesol[1]))
-# plot!(sol1.timepoints[2]', pmean(sol1.ensemblesol[2]))
-# plot!(sol1.timepoints[3]', pmean(sol1.ensemblesol[3]))
-
 @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
\ No newline at end of file
+# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index cb0303daf0..fd43937ded 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -1,7 +1,7 @@
 # # Testing Code
 using Test, MCMCChains
 using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using OptimizationOptimisers, AdvancedHMC, Lux
 using Statistics, Random, Functors, ComponentArrays
 using NeuralPDE, MonteCarloMeasurements
 
@@ -31,51 +31,27 @@ x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
 time1 = vec(collect(Float64, ta0))
 physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
 chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux)
 θinit, st = Lux.setup(Random.default_rng(), chainlux)
 
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
-    draw_samples = 2500)
+fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux, draw_samples = 2500)
 
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
-    draw_samples = 2500)
-
-# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
-# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
-#     reltol = 1e-6,
-#     abstol = 1e-3, maxiters = 1000,
-#     batch = 0)
-
-alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500)
-sol1flux = solve(prob, alg)
-
-alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500)
+alg = BNNODE(chainlux, draw_samples = 2500)
 sol1lux = solve(prob, alg)
 
 # testing points
 t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.(fhsamples1[(end - 500):end])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+# Mean of last 500 sampled parameter's curves[Ensemble predictions]
+θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:2500]
 luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
 luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
 # --------------------- ahmc_bayesian_pinn_ode() call
-@test mean(abs.(x̂ .- meanscurve1)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
-@test mean(abs.(x̂ .- meanscurve2)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+@test mean(abs.(x̂ .- meanscurve)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve)) < 0.005
 
 #--------------------- solve() call 
-@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
 @test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
 @test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
 
@@ -107,37 +83,17 @@ x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
 time1 = vec(collect(Float64, ta0))
 physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
 chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-init1, re1 = destructure(chainflux1)
 θinit, st = Lux.setup(Random.default_rng(), chainlux1)
 
-fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ])
-
-fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux1,
     dataset = dataset,
     draw_samples = 2500,
     physdt = 1 / 50.0,
     priorsNNw = (0.0, 3.0),
     param = [LogNormal(9, 0.5)])
 
-alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
-    draw_samples = 2500, physdt = 1 / 50.0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)])
-
-sol2flux = solve(prob, alg)
-
-alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+alg = BNNODE(chainlux1, dataset = dataset,
     draw_samples = 2500,
     physdt = 1 / 50.0,
     priorsNNw = (0.0,
@@ -152,30 +108,21 @@ sol2lux = solve(prob, alg)
 # testing points
 t = time
 # Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
-yu = collect(out[i](t') for i in eachindex(out))
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:2500]
 luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
 luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
 
 # --------------------- ahmc_bayesian_pinn_ode() call  
-@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
-@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve)) < 0.15
 
 # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.35 * p)
-@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.35 * p)
+@test abs(p - mean([fhsamples[i][23] for i in 2000:2500])) < abs(0.35 * p)
 
 #-------------------------- solve() call  
-@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
 @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
 
 # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol2flux.estimated_de_params[1]) < abs(0.15 * p)
 @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
 
 ## PROBLEM-2
@@ -200,36 +147,9 @@ u1 = [linear_analytic(u0, p, ti) for ti in ta0]
 time1 = vec(collect(Float64, ta0))
 physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
 
-chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
-    Flux.Dense(6, 1)) |> Flux.f64
 chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
-init1, re1 = destructure(chainflux12)
 θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
-fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03],
-    priorsNNw = (0.0,
-        10.0))
-
-fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
-    chainflux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ])
-
 fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
     draw_samples = 1500,
     l2std = [0.03],
@@ -249,23 +169,7 @@ fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob,
             4),
     ])
 
-alg = NeuralPDE.BNNODE(chainflux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [
-        0.03,
-    ],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ])
-
-sol3flux_pestim = solve(prob, alg)
-
-alg = NeuralPDE.BNNODE(chainlux12,
+alg = BNNODE(chainlux12,
     dataset = dataset,
     draw_samples = 1500,
     l2std = [0.03],
@@ -281,27 +185,7 @@ sol3lux_pestim = solve(prob, alg)
 
 # testing timepoints
 t = sol.t
-#------------------------------ ahmc_bayesian_pinn_ode() call 
-# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
-out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
-yu = [out[i](t') for i in eachindex(out)]
-fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
-meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
-
-@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
-@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
-@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
-
-# estimated parameters(flux chain)
-param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
+#------------------------------ ahmc_bayesian_pinn_ode() call
 # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
 θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
 luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
@@ -323,14 +207,8 @@ param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
 @test abs(param1 - p) < abs(0.3 * p)
 
 #-------------------------- solve() call 
-# (flux chain)
-@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(flux chain)
-param1 = sol3flux_pestim.estimated_de_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
-
 # (lux chain)
 @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
 # estimated parameters(lux chain)
 param1 = sol3lux_pestim.estimated_de_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
\ No newline at end of file
+@test abs(param1 - p) < abs(0.45 * p)
diff --git a/test/IDE_tests.jl b/test/IDE_tests.jl
index 6bfac12d82..0033a658d4 100644
--- a/test/IDE_tests.jl
+++ b/test/IDE_tests.jl
@@ -1,7 +1,7 @@
 using Test, NeuralPDE
 using Optimization, OptimizationOptimJL
 import ModelingToolkit: Interval
-using DomainSets, Flux
+using DomainSets
 import Lux
 
 using Random
@@ -12,6 +12,8 @@ callback = function (p, l)
     return false
 end
 
+mse(x, y) = sum(abs2, x .- y)
+
 #Integration Tests
 println("Integral Tests")
 @parameters t
@@ -21,13 +23,11 @@ Ii = Integral(t in DomainSets.ClosedInterval(0, t))
 eq = Di(i(t)) + 2 * i(t) + 5 * Ii(i(t)) ~ 1
 bcs = [i(0.0) ~ 0.0]
 domains = [t ∈ Interval(0.0, 2.0)]
-chain = Chain(Dense(1, 15, Flux.σ), Dense(15, 1))
-strategy_ = NeuralPDE.GridTraining(0.1)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy_)
+chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+strategy_ = GridTraining(0.1)
+discretization = PhysicsInformedNN(chain, strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-prob = NeuralPDE.discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 100)
 ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
@@ -36,9 +36,7 @@ phi = discretization.phi
 analytic_sol_func(t) = 1 / 2 * (exp(-t)) * (sin(2 * t))
 u_real = [analytic_sol_func(t) for t in ts]
 u_predict = [first(phi([t], res.minimizer)) for t in ts]
-@test Flux.mse(u_real, u_predict) < 0.01
-# plot(ts,u_real)
-# plot!(ts,u_predict)
+@test mse(u_real, u_predict) < 0.01
 
 ## Simple Integral Test
 println("Simple Integral Test")
@@ -51,27 +49,21 @@ eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 
 bcs = [u(0.0) ~ 0.0]
 domains = [x ∈ Interval(0.0, 1.00)]
-# chain = Chain(Dense(1,15,Flux.σ),Dense(15,1))
-chain = Lux.Chain(Lux.Dense(1, 15, Flux.σ), Lux.Dense(15, 1))
-strategy_ = NeuralPDE.GridTraining(0.1)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy_)
+chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+strategy_ = GridTraining(0.1)
+discretization = PhysicsInformedNN(chain, strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 200)
 xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 phi = discretization.phi
 u_predict = [first(phi([x], res.minimizer)) for x in xs]
 u_real = [x^2 / cos(x) for x in xs]
-@test Flux.mse(u_real, u_predict) < 0.001
-
-# plot(xs,u_real)
-# plot!(xs,u_predict)
+@test mse(u_real, u_predict) < 0.001
 
 #simple multidimensitonal integral test
 println("simple multidimensitonal integral test")
-
 @parameters x, y
 @variables u(..)
 Dx = Differential(x)
@@ -80,27 +72,19 @@ Ix = Integral((x, y) in DomainSets.UnitSquare())
 eq = Ix(u(x, y)) ~ 1 / 3
 bcs = [u(0.0, 0.0) ~ 1, Dx(u(x, y)) ~ -2 * x, Dy(u(x, y)) ~ -2 * y]
 domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-chain = Chain(Dense(2, 15, Flux.σ), Dense(15, 1))
-strategy_ = NeuralPDE.GridTraining(0.1)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy_)
+chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+strategy_ = GridTraining(0.1)
+discretization = PhysicsInformedNN(chain, strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 100)
 xs = 0.00:0.01:1.00
 ys = 0.00:0.01:1.00
 phi = discretization.phi
-
 u_real = collect(1 - x^2 - y^2 for y in ys, x in xs);
 u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
-@test Flux.mse(u_real, u_predict) < 0.001
-
-# error_ = u_predict .- u_real
-# p1 = plot(xs,ys,u_real,linetype=:contourf,label = "analytic")
-# p2 = plot(xs,ys,u_predict,linetype=:contourf,label = "predict")
-# p3 = plot(xs,ys,error_,linetype=:contourf,label = "error")
-# plot(p1,p2,p3)
+@test mse(u_real, u_predict) < 0.001
 
 @parameters x, y
 @variables u(..)
@@ -110,13 +94,12 @@ Ix = Integral((x, y) in DomainSets.ProductDomain(UnitInterval(), ClosedInterval(
 eq = Ix(u(x, y)) ~ 5 / 12
 bcs = [u(0.0, 0.0) ~ 0, Dy(u(x, y)) ~ 2 * y, u(x, 0) ~ x]
 domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-chain = Chain(Dense(2, 15, Flux.σ), Dense(15, 1))
-strategy_ = NeuralPDE.GridTraining(0.1)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy_)
+chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+strategy_ = GridTraining(0.1)
+discretization = PhysicsInformedNN(chain, strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
+prob = discretize(pde_system, discretization)
+res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 100)
 xs = 0.00:0.01:1.00
 ys = 0.00:0.01:1.00
@@ -124,13 +107,7 @@ phi = discretization.phi
 
 u_real = collect(x + y^2 for y in ys, x in xs);
 u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
-@test Flux.mse(u_real, u_predict) < 0.01
-
-# error_ = u_predict .- u_real
-# p1 = plot(xs,ys,u_real,linetype=:contourf,label = "analytic")
-# p2 = plot(xs,ys,u_predict,linetype=:contourf,label = "predict")
-# p3 = plot(xs,ys,error_,linetype=:contourf,label = "error")
-# plot(p1,p2,p3)
+@test mse(u_real, u_predict) < 0.01
 
 ## Two variables Integral Test
 println("Two variables Integral Test")
@@ -147,13 +124,12 @@ eqs = [Ix(u(x) * w(x)) ~ log(abs(x)),
 bcs = [u(1.0) ~ 1.0, w(1.0) ~ 1.0]
 domains = [x ∈ Interval(1.0, 2.0)]
 
-chains = [Lux.Chain(Lux.Dense(1, 15, Flux.σ), Lux.Dense(15, 1)) for _ in 1:2]
-strategy_ = NeuralPDE.GridTraining(0.1)
-discretization = NeuralPDE.PhysicsInformedNN(chains,
-                                             strategy_)
+chains = [Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1)) for _ in 1:2]
+strategy_ = GridTraining(0.1)
+discretization = PhysicsInformedNN(chains, strategy_)
 @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x), w(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
+prob = discretize(pde_system, discretization)
+res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 200)
 xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 phi = discretization.phi
@@ -162,13 +138,8 @@ u_predict = [(phi[1]([x], res.u.depvar.u))[1] for x in xs]
 w_predict = [(phi[2]([x], res.u.depvar.w))[1] for x in xs]
 u_real = [x for x in xs]
 w_real = [1 / x^2 for x in xs]
-@test Flux.mse(u_real, u_predict) < 0.001
-@test Flux.mse(w_real, w_predict) < 0.001
-
-# plot(xs,u_real)
-# plot!(xs,u_predict)
-# plot(xs,w_real)
-# plot!(xs,w_predict)
+@test mse(u_real, u_predict) < 0.001
+@test mse(w_real, w_predict) < 0.001
 
 ## Infinity Integral Test
 println("Infinity Integral Test")
@@ -179,20 +150,17 @@ Iinf = Integral(x in ClosedInterval(1, Inf))
 eqs = [I(u(x)) ~ Iinf(u(x)) - 1 / x]
 bcs = [u(1) ~ 1]
 domains = [x ∈ Interval(1.0, 2.0)]
-chain = Lux.Chain(Lux.Dense(1, 10, Flux.σ), Lux.Dense(10, 1))
-discretization = NeuralPDE.PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
+chain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 1))
+discretization = PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
 @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
-sym_prob = SciMLBase.symbolic_discretize(pde_system, discretization)
-prob = SciMLBase.discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
+prob = discretize(pde_system, discretization)
+res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 200)
 xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 phi = discretization.phi
 u_predict = [first(phi([x], res.minimizer)) for x in xs]
 u_real = [1 / x^2 for x in xs]
 @test u_real≈u_predict rtol=10^-2
-# plot(xs,u_real)
-# plot!(xs,u_predict)
 
 # Infinity Integral equation Test
 println("Infinity Integral equation Test")
@@ -202,18 +170,14 @@ I = Integral(x in ClosedInterval(x, Inf))
 eq = I(u(x)) ~ 1 / x
 domains = [x ∈ Interval(1.0, 2.0)]
 bcs = [u(1) ~ 1]
-chain = Lux.Chain(Lux.Dense(1, 12, Flux.tanh), Lux.Dense(12, 1))
-discretization = NeuralPDE.PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
+chain = Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 1))
+discretization = PhysicsInformedNN(chain, GridTraining(0.1))
 @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-sym_prob = SciMLBase.symbolic_discretize(pde_system, discretization)
-prob = SciMLBase.discretize(pde_system, discretization)
-prob.f(prob.u0, nothing)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
+prob = discretize(pde_system, discretization)
+res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
                          maxiters = 300)
 xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 phi = discretization.phi
 u_predict = [first(phi([x], res.minimizer)) for x in xs]
 u_real = [1 / x^2 for x in xs]
 @test u_real≈u_predict rtol=10^-2
-# plot(xs,u_real)
-# plot!(xs,u_predict)
diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 3f7edf43c7..c39ca80d80 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -1,6 +1,6 @@
-using Test, Flux
+using Test
 using Random, NeuralPDE
-using OrdinaryDiffEq, Optimisers, Statistics
+using OrdinaryDiffEq, Statistics
 import Lux, OptimizationOptimisers, OptimizationOptimJL
 
 Random.seed!(100)
@@ -10,19 +10,9 @@ linear = (u, p, t) -> cos(2pi * t)
 tspan = (0.0f0, 1.0f0)
 u0 = 0.0f0
 prob = ODEProblem(linear, u0, tspan)
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) |> Flux.f64
 luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), dt = 1 / 20.0f0, verbose = true,
-            abstol = 1.0f-10, maxiters = 200)
-
-@test_throws ArgumentError solve(prob, NeuralPDE.NNODE(chain, opt; autodiff = true), dt = 1 / 20.0f0,
-                       verbose = true, abstol = 1.0f-10, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true,
-            abstol = 1.0f-6, maxiters = 200)
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
             abstol = 1.0f-10, maxiters = 200)
 
@@ -34,12 +24,6 @@ sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true,
             abstol = 1.0f-6, maxiters = 200)
 
 opt = OptimizationOptimJL.BFGS()
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), dt = 1 / 20.0f0, verbose = true,
-            abstol = 1.0f-10, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true,
-            abstol = 1.0f-6, maxiters = 200)
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
             abstol = 1.0f-10, maxiters = 200)
 
@@ -51,19 +35,9 @@ linear = (u, p, t) -> [cos(2pi * t)]
 tspan = (0.0f0, 1.0f0)
 u0 = [0.0f0]
 prob = ODEProblem(linear, u0, tspan)
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) |> Flux.f64
-luxchain = Lux.Chain(Lux.Dense(1, 5, σ), Lux.Dense(5, 1))
+luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
 opt = OptimizationOptimJL.BFGS()
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
-            verbose = true, maxiters = 200)
-
-@test_throws ArgumentError solve(prob, NeuralPDE.NNODE(chain, opt; autodiff = true), dt = 1 / 20.0f0,
-                       abstol = 1e-10, verbose = true, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), abstol = 1.0f-6,
-            verbose = true, maxiters = 200)
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
             verbose = true, maxiters = 200)
 
@@ -83,32 +57,15 @@ linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3)
                          u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
 linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
 prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
-chain = Flux.Chain(Dense(1, 128, σ), Dense(128, 1)) |> Flux.f64
-luxchain = Lux.Chain(Lux.Dense(1, 128, σ), Lux.Dense(128, 1))
+luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
 opt = OptimizationOptimisers.Adam(0.01)
 
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
-@test_throws AssertionError solve(prob, NeuralPDE.NNODE(chain, opt; batch = true), verbose = true,
-                       maxiters = 400)
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true, maxiters = 400)
 @test sol.errors[:l2] < 0.5
 
 @test_throws AssertionError solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
                        maxiters = 400)
 
-sol = solve(prob,
-            NeuralPDE.NNODE(chain, opt; batch = false, strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob,
-            NeuralPDE.NNODE(chain, opt; batch = true, strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
 sol = solve(prob,
             NeuralPDE.NNODE(luxchain, opt; batch = false,
                             strategy = StochasticTraining(100)),
@@ -121,14 +78,6 @@ sol = solve(prob,
             verbose = true, maxiters = 400)
 @test sol.errors[:l2] < 0.5
 
-sol = solve(prob, NeuralPDE.NNODE(chain, opt; batch = false), verbose = true,
-            maxiters = 400, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt; batch = true), verbose = true, maxiters = 400,
-            dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = false), verbose = true,
             maxiters = 400, dt = 1 / 5.0f0)
 @test sol.errors[:l2] < 0.5
@@ -142,18 +91,9 @@ sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
 linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
 linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
 prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) |> Flux.f64
-luxchain = Lux.Chain(Lux.Dense(1, 5, σ), Lux.Dense(5, 1))
+luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
 opt = OptimizationOptimisers.Adam(0.1)
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
-@test_throws AssertionError solve(prob, NeuralPDE.NNODE(chain, opt; batch = true), verbose = true,
-                       maxiters = 400,
-                       abstol = 1.0f-8)
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true, maxiters = 400,
             abstol = 1.0f-8)
 @test sol.errors[:l2] < 0.5
@@ -162,18 +102,6 @@ sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true, maxiters = 400
                        maxiters = 400,
                        abstol = 1.0f-8)
 
-sol = solve(prob,
-            NeuralPDE.NNODE(chain, opt; batch = false, strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob,
-            NeuralPDE.NNODE(chain, opt; batch = true, strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
 sol = solve(prob,
             NeuralPDE.NNODE(luxchain, opt; batch = false,
                             strategy = StochasticTraining(100)),
@@ -188,15 +116,6 @@ sol = solve(prob,
             abstol = 1.0f-8)
 @test sol.errors[:l2] < 0.5
 
-sol = solve(prob, NeuralPDE.NNODE(chain, opt; batch = false), verbose = true,
-            maxiters = 400,
-            abstol = 1.0f-8, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt; batch = true), verbose = true, maxiters = 400,
-            abstol = 1.0f-8, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
 sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = false), verbose = true,
             maxiters = 400,
             abstol = 1.0f-8, dt = 1 / 5.0f0)
@@ -214,9 +133,9 @@ u0 = [0.0f0, -1.0f0 / 2pi]
 linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
 odefunction = ODEFunction(linear, analytic = linear_analytic)
 prob = ODEProblem(odefunction, u0, tspan)
-chain = Flux.Chain(Dense(1, 10, σ), Dense(10, 2)) |> Flux.f64
+luxchain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 2))
 opt = OptimizationOptimisers.Adam(0.1)
-alg = NeuralPDE.NNODE(chain, opt; autodiff = false)
+alg = NeuralPDE.NNODE(luxchain, opt; autodiff = false)
 
 sol = solve(prob,
             alg, verbose = true, dt = 1 / 40.0f0,
@@ -237,7 +156,7 @@ N = 12
 chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
                   Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
 
-opt = Optimisers.Adam(0.01)
+opt = OptimizationOptimisers.Adam(0.01)
 weights = [0.7, 0.2, 0.1]
 points = 200
 alg = NeuralPDE.NNODE(chain, opt, autodiff = false,
@@ -258,20 +177,6 @@ opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 # Analytical solution
 u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
 
-# GridTraining (Flux Chain)
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1))
-
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-end
-
-alg1 = NeuralPDE.NNODE(chain, opt, strategy = GridTraining(0.01),
-                       additional_loss = additional_loss)
-
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-8, maxiters = 500)
-@test sol1.errors[:l2] < 0.5
-
 # GridTraining (Lux Chain)
 luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
@@ -286,19 +191,6 @@ alg1 = NeuralPDE.NNODE(luxchain, opt, strategy = GridTraining(0.01),
 sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-8, maxiters = 500)
 @test sol1.errors[:l2] < 0.5
 
-# QuadratureTraining (Flux Chain)
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1))
-
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-end
-
-alg1 = NeuralPDE.NNODE(chain, opt, additional_loss = additional_loss)
-
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-10, maxiters = 200)
-@test sol1.errors[:l2] < 0.5
-
 # QuadratureTraining (Lux Chain)
 luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
@@ -312,20 +204,6 @@ alg1 = NeuralPDE.NNODE(luxchain, opt, additional_loss = additional_loss)
 sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-10, maxiters = 200)
 @test sol1.errors[:l2] < 0.5
 
-# StochasticTraining(Flux Chain)
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) |> Flux.f64
-
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-end
-
-alg1 = NeuralPDE.NNODE(chain, opt, strategy = StochasticTraining(1000),
-                       additional_loss = additional_loss)
-
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-8, maxiters = 500)
-@test sol1.errors[:l2] < 0.5
-
 # StochasticTraining (Lux Chain)
 luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
index 82761f60bd..4c58987aea 100644
--- a/test/NNODE_tstops_test.jl
+++ b/test/NNODE_tstops_test.jl
@@ -1,4 +1,4 @@
-using OrdinaryDiffEq, Lux, OptimizationOptimisers, Test, Statistics, Optimisers, NeuralPDE
+using OrdinaryDiffEq, Lux, OptimizationOptimisers, Test, Statistics, NeuralPDE
 
 function fu(u, p, t)
     [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
@@ -22,7 +22,7 @@ N = 12
 chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
                     Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
 
-opt = Optimisers.Adam(0.01)
+opt = OptimizationOptimisers.Adam(0.01)
 threshold = 0.2
 
 #bad choices for weights, samples and dx so that the algorithm will fail without the added points
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index 6ad64fef27..1ed22ea8ba 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -1,4 +1,4 @@
-using Flux, NeuralPDE, Test
+using NeuralPDE, Test
 using Optimization, OptimizationOptimJL, OptimizationOptimisers
 using Integrals, Cubature
 using QuasiMonteCarlo
@@ -33,7 +33,7 @@ function test_ode(strategy_)
     domains = [θ ∈ Interval(0.0, 1.0)]
 
     # Neural network
-    chain = Lux.Chain(Lux.Dense(1, 12, Flux.σ), Lux.Dense(12, 1))
+    chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
 
     discretization = NeuralPDE.PhysicsInformedNN(chain,
                                                  strategy_)
@@ -54,100 +54,22 @@ function test_ode(strategy_)
     u_predict = [first(phi(t, res.minimizer)) for t in ts]
 
     @test u_predict≈u_real atol=0.1
-    # using Plots
-    # t_plot = collect(ts)
-    # plot(t_plot ,u_real)
-    # plot!(t_plot ,u_predict)
 end
 
-#TODO There is little meaning in these tests without checking the correctness of the prediction.
-#TODO I propose to simply remove them.
-function test_heterogeneous_equation(strategy_)
-    println("Simple Heterogeneous input PDE, strategy: $(nameof(typeof(strategy_)))")
-    @parameters x y
-    @variables p(..) q(..) r(..) s(..)
-    Dx = Differential(x)
-    Dy = Differential(y)
-
-    # 2D PDE
-    eq = p(x) + q(y) + Dx(r(x, y)) + Dy(s(y, x)) ~ 0
-    # eq  = Dx(p(x)) + Dy(q(y)) + Dx(r(x, y)) + Dy(s(y, x)) + p(x) + q(y) + r(x, y) + s(y, x) ~ 0
-
-    # Initial and boundary conditions
-    bcs = [p(1) ~ 0.0f0, q(-1) ~ 0.0f0,
-        r(x, -1) ~ 0.0f0, r(1, y) ~ 0.0f0,
-        s(y, 1) ~ 0.0f0, s(-1, x) ~ 0.0f0]
-    # bcs = [s(y, 1) ~ 0.0f0]
-    # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
-
-    # chain_ = Lux.Chain(Lux.Dense(2,12,Flux.σ),Lux.Dense(12,12,Flux.σ),Lux.Dense(12,1))
-    numhid = 3
-    luxchain = [[Lux.Chain(Lux.Dense(1, numhid, Flux.σ),
-                           Lux.Dense(numhid, numhid, Flux.σ), Lux.Dense(numhid, 1))
-                 for i in 1:2]
-                [Lux.Chain(Lux.Dense(2, numhid, Flux.σ),
-                           Lux.Dense(numhid, numhid, Flux.σ), Lux.Dense(numhid, 1))
-                 for i in 1:2]]
-    discretization = NeuralPDE.PhysicsInformedNN(luxchain,
-                                                 strategy_)
-
-    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y), r(x, y), s(y, x)])
-    prob = SciMLBase.discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 100)
-end
-
-## Heterogeneous system
-function test_heterogeneous_system(strategy_)
-    println("Heterogeneus input PDE with derivatives, strategy: $(nameof(typeof(strategy_)))")
-    @parameters x y
-    @variables p(..) q(..)
-    Dx = Differential(x)
-    Dy = Differential(y)
-
-    # 2D PDE
-    #TODO Dx(q(y)) = 0
-    #TODO so p(x) = 0, q = const is has only trivial solution
-    eq = p(x) + Dx(q(y)) ~ 0
-
-    # Initial and boundary conditions
-    bcs = [p(1) ~ 0.0f0, q(-1) ~ 0.0f0]
-
-    # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(-1.0, 0.0)]
-
-    # chain_ = Lux.Chain(Lux.Dense(2,12,Flux.σ),Lux.Dense(12,12,Flux.σ),Lux.Dense(12,1))
-    numhid = 3
-    luxchain = [[Lux.Chain(Lux.Dense(1, numhid, Flux.σ),
-                           Lux.Dense(numhid, numhid, Flux.σ), Lux.Dense(numhid, 1))
-                 for i in 1:2]
-                [Lux.Chain(Lux.Dense(2, numhid, Flux.σ),
-                           Lux.Dense(numhid, numhid, Flux.σ), Lux.Dense(numhid, 1))
-                 for i in 1:2]]
-    discretization = NeuralPDE.PhysicsInformedNN(luxchain,
-                                                 strategy_)
-
-    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y)])
-    prob = SciMLBase.discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 100)
-end
-
-grid_strategy = NeuralPDE.GridTraining(0.1)
-quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                                   reltol = 1e3, abstol = 1e-3,
-                                                   maxiters = 50, batch = 100)
-stochastic_strategy = NeuralPDE.StochasticTraining(100; bcs_points = 50)
-quasirandom_strategy = NeuralPDE.QuasiRandomTraining(100;
-                                                     sampling_alg = LatinHypercubeSample(),
-                                                     resampling = false,
-                                                     minibatch = 100)
-quasirandom_strategy_resampling = NeuralPDE.QuasiRandomTraining(100;
-                                                                bcs_points = 50,
-                                                                sampling_alg = LatticeRuleSample(),
-                                                                resampling = true,
-                                                                minibatch = 0)
+grid_strategy = GridTraining(0.1)
+quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
+                                        reltol = 1e3, abstol = 1e-3,
+                                        maxiters = 50, batch = 100)
+stochastic_strategy = StochasticTraining(100; bcs_points = 50)
+quasirandom_strategy = QuasiRandomTraining(100;
+                                            sampling_alg = LatinHypercubeSample(),
+                                            resampling = false,
+                                            minibatch = 100)
+quasirandom_strategy_resampling = QuasiRandomTraining(100;
+                                                    bcs_points = 50,
+                                                    sampling_alg = LatticeRuleSample(),
+                                                    resampling = true,
+                                                    minibatch = 0)
 
 strategies = [
     grid_strategy,
@@ -235,19 +157,9 @@ end end
     h_predict = [phi[3]([z], res.u.depvar.h)[1] for z in zs]
     p_predict = [phi[4]([x, z], res.u.depvar.p)[1] for x in xs for z in zs]
     predict = [u_predict, v_predict, h_predict, p_predict]
-
     for i in 1:4
         @test predict[i]≈real_[i] rtol=10^-2
     end
-
-    # x_plot = collect(xs)
-    # y_plot = collect(ys)
-    # i=1
-    # z=0
-    # u_real = collect(analytic_sol_func_[1](x,y,z) for y in ys, x in xs);
-    # u_predict = collect(phi[1]([x,y,z],minimizers[1])[1]  for y in ys, x in xs);
-    # plot(x_plot,y_plot,u_real)
-    # plot!(x_plot,y_plot,u_predict)
 end
 
 ## Example 2, 2D Poisson equation
@@ -265,17 +177,12 @@ function test_2d_poisson_equation(chain_, strategy_)
     bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
         u(x, 0) ~ 0.0, u(x, 1) ~ -sin(pi * x) * sin(pi * 1)]
     # Space and time domains
-    domains = [x ∈ Interval(0.0, 1.0),
-        y ∈ Interval(0.0, 1.0)]
-
-    discretization = NeuralPDE.PhysicsInformedNN(chain_,
-                                                 strategy_)
+    domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
+    discretization = PhysicsInformedNN(chain_, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 500,
-                             cb = callback)
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 500, cb = callback)
     phi = discretization.phi
 
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
@@ -285,35 +192,23 @@ function test_2d_poisson_equation(chain_, strategy_)
                         (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
                      (length(xs), length(ys)))
-    diff_u = abs.(u_predict .- u_real)
-
     @test u_predict≈u_real atol=2.0
-
-    # p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-    # p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-    # p3 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-    # plot(p1,p2,p3)
 end
 
 @testset "Example 2, 2D Poisson equation" begin
     grid_strategy = GridTraining(0.1)
-
-    chain = Lux.Chain(Lux.Dense(2, 12, Flux.σ), Lux.Dense(12, 12, Flux.σ), Lux.Dense(12, 1))
-    fluxchain = Chain(Dense(2, 12, Flux.σ), Dense(12, 12, Flux.σ), Dense(12, 1)) |> f64
-    chains = [fluxchain, chain]
-    for chain in chains
-        test_2d_poisson_equation(chain, grid_strategy)
-    end
+    chain = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ), Lux.Dense(12, 1))
+    test_2d_poisson_equation(chain, grid_strategy)
 
     for strategy_ in strategies
-        chain_ = Lux.Chain(Lux.Dense(2, 12, Flux.σ), Lux.Dense(12, 12, Flux.σ),
+        chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
                            Lux.Dense(12, 1))
         test_2d_poisson_equation(chain_, strategy_)
     end
 
     algs = [CubatureJLp()] #CubatureJLh(),
     for alg in algs
-        chain_ = Lux.Chain(Lux.Dense(2, 12, Flux.σ), Lux.Dense(12, 12, Flux.σ),
+        chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
                            Lux.Dense(12, 1))
         strategy_ = NeuralPDE.QuadratureTraining(quadrature_alg = alg, reltol = 1e-4,
                                                  abstol = 1e-3, maxiters = 30, batch = 10)
@@ -348,16 +243,15 @@ end
     chain = [[Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
                         Lux.Dense(12, 1)) for _ in 1:3]
              [Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)) for _ in 1:2]]
-    quasirandom_strategy = NeuralPDE.QuasiRandomTraining(100; #points
-                                                         sampling_alg = LatinHypercubeSample())
+    quasirandom_strategy = QuasiRandomTraining(100; sampling_alg = LatinHypercubeSample())
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, quasirandom_strategy)
+    discretization = PhysicsInformedNN(chain, quasirandom_strategy)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x],
                                   [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
 
-    prob = NeuralPDE.discretize(pde_system, discretization)
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
+    sym_prob = symbolic_discretize(pde_system, discretization)
 
     pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
     bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
@@ -379,11 +273,8 @@ end
     u_predict = [first(phi(x, res.u.depvar.u)) for x in xs]
 
     @test u_predict≈u_real atol=10^-4
-
-    # x_plot = collect(xs)
-    # plot(x_plot ,u_real)
-    # plot!(x_plot ,u_predict)
 end
+
 ## Example 4, system of pde
 @testset "Example 4, system of pde" begin
     @parameters x, y
@@ -406,16 +297,16 @@ end
     chain1 = Lux.Chain(Lux.Dense(2, 15, Lux.tanh), Lux.Dense(15, 1))
     chain2 = Lux.Chain(Lux.Dense(2, 15, Lux.tanh), Lux.Dense(15, 1))
 
-    quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                                       reltol = 1e-3, abstol = 1e-3,
-                                                       maxiters = 50, batch = 100)
+    quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
+                                            reltol = 1e-3, abstol = 1e-3,
+                                            maxiters = 50, batch = 100)
     chain = [chain1, chain2]
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, quadrature_strategy)
+    discretization = PhysicsInformedNN(chain, quadrature_strategy)
 
     @named pde_system = PDESystem(eqs, bcs, domains, [x, y], [u1(x, y), u2(x, y)])
 
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
 
     res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 1000)
     phi = discretization.phi
@@ -430,10 +321,6 @@ end
 
     @test u_predict[1]≈u_real[1] atol=0.1
     @test u_predict[2]≈u_real[2] atol=0.1
-
-    # p1 =plot(xs, ys, u_predict, st=:surface);
-    # p2 = plot(xs, ys, u_real, st=:surface);
-    # plot(p1,p2)
 end
 
 ## Example 5, 2d wave equation, neumann boundary condition
@@ -465,12 +352,12 @@ end
     phi = NeuralPDE.Phi(chain)
     derivative = NeuralPDE.numeric_derivative
 
-    quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                                       reltol = 1e-3, abstol = 1e-3,
-                                                       maxiters = 50, batch = 100)
+    quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
+                                            reltol = 1e-3, abstol = 1e-3,
+                                            maxiters = 50, batch = 100)
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, quadrature_strategy)
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    discretization = PhysicsInformedNN(chain, quadrature_strategy)
+    prob = discretize(pde_system, discretization)
 
     cb_ = function (p, l)
         println("loss: ", l)
@@ -490,15 +377,9 @@ end
                         (length(xs), length(ts)))
     u_real = reshape([analytic_sol_func(x, t) for x in xs for t in ts],
                      (length(xs), length(ts)))
-
     @test u_predict≈u_real atol=0.1
-
-    # diff_u = abs.(u_predict .- u_real)
-    # p1 = plot(xs, ts, u_real, linetype=:contourf,title = "analytic");
-    # p2 =plot(xs, ts, u_predict, linetype=:contourf,title = "predict");
-    # p3 = plot(xs, ts, diff_u,linetype=:contourf,title = "error");
-    # plot(p1,p2,p3)
 end
+
 ## Example 6, pde with mixed derivative
 @testset "Example 6, pde with mixed derivative" begin
     @parameters x y
@@ -518,16 +399,16 @@ end
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
-    quadrature_strategy = NeuralPDE.QuadratureTraining()
+    quadrature_strategy = QuadratureTraining()
     # Neural network
     inner = 20
     chain = Lux.Chain(Lux.Dense(2, inner, Lux.tanh), Lux.Dense(inner, inner, Lux.tanh),
                       Lux.Dense(inner, 1))
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, quadrature_strategy)
+    discretization = PhysicsInformedNN(chain, quadrature_strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
 
     res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 1500)
     @show res.original
@@ -541,12 +422,5 @@ end
                         (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
                      (length(xs), length(ys)))
-    diff_u = abs.(u_predict .- u_real)
-
     @test u_predict≈u_real rtol=0.1
-
-    # p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-    # p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-    # p3 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-    # plot(p1,p2,p3)
 end
diff --git a/test/NNPDE_tests_gpu.jl b/test/NNPDE_tests_gpu.jl
deleted file mode 100644
index 4a2896a84d..0000000000
--- a/test/NNPDE_tests_gpu.jl
+++ /dev/null
@@ -1,252 +0,0 @@
-using Flux, OptimizationOptimisers
-using Test, NeuralPDE
-using Optimization
-using CUDA, QuasiMonteCarlo
-import ModelingToolkit: Interval, infimum, supremum
-
-using Random
-Random.seed!(100)
-
-callback = function (p, l)
-    println("Current loss is: $l")
-    return false
-end
-CUDA.allowscalar(false)
-#const gpuones = cu(ones(1))
-
-## ODE
-println("ode")
-@parameters θ
-@variables u(..)
-Dθ = Differential(θ)
-
-# 1D ODE
-eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ + (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
-                u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
-
-# Initial and boundary conditions
-bcs = [u(0.f0) ~ 1.0f0]
-
-# Space and time domains
-domains = [θ ∈ Interval(0.0f0, 1.0f0)]
-# Discretization
-dt = 0.1f0
-# Neural network
-inner = 20
-chain = Chain(Dense(1, inner, Flux.σ),
-              Dense(inner, inner, Flux.σ),
-              Dense(inner, inner, Flux.σ),
-              Dense(inner, inner, Flux.σ),
-              Dense(inner, inner, Flux.σ),
-              Dense(inner, 1)) |> gpu
-
-strategy = NeuralPDE.GridTraining(dt)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy)
-
-@named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
-phi = discretization.phi
-
-analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
-ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
-u_real = [analytic_sol_func(t) for t in ts]
-u_predict = [first(Array(phi([t], res.minimizer))) for t in ts]
-
-@test u_predict≈u_real atol=0.2
-
-# t_plot = collect(ts)
-# plot(t_plot ,u_real)
-# plot!(t_plot ,u_predict)
-
-## 1D PDE Dirichlet boundary conditions
-println("1D PDE Dirichlet boundary conditions")
-@parameters t x
-@variables u(..)
-Dt = Differential(t)
-Dxx = Differential(x)^2
-
-eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-bcs = [u(0, x) ~ cos(x),
-    u(t, 0) ~ exp(-t),
-    u(t, 1) ~ exp(-t) * cos(1)]
-
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
-
-@named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
-
-inner = 30
-chain = Flux.Chain(Dense(2, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, 1)) |> gpu |> f64
-
-strategy = NeuralPDE.StochasticTraining(500)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy)
-prob = NeuralPDE.discretize(pdesys, discretization)
-symprob = NeuralPDE.symbolic_discretize(pdesys, discretization)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 1000)
-phi = discretization.phi
-
-u_exact = (t, x) -> exp.(-t) * cos.(x)
-ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
-                    (length(ts), length(xs)))
-u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
-diff_u = abs.(u_predict .- u_real)
-
-@test u_predict≈u_real atol=1.0
-
-# p1 = plot(ts, xs, u_real, linetype=:contourf,title = "analytic");
-# p2 = plot(ts, xs, u_predict, linetype=:contourf,title = "predict");
-# p3 = plot(ts, xs, diff_u,linetype=:contourf,title = "error");
-# plot(p1,p2,p3)
-
-## 1D PDE Neumann boundary conditions and Float64 accuracy
-println("1D PDE Neumann boundary conditions and Float64 accuracy")
-@parameters t x
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dxx = Differential(x)^2
-
-# 1D PDE and boundary conditions
-eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-bcs = [u(0, x) ~ cos(x),
-    Dx(u(t, 0)) ~ 0.0,
-    Dx(u(t, 1)) ~ -exp(-t) * sin(1.0)]
-
-# Space and time domains
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
-
-# PDE system
-@named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
-
-inner = 20
-chain = Flux.Chain(Dense(2, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, 1)) |> gpu |> f64
-
-strategy = NeuralPDE.QuasiRandomTraining(500; #points
-                                         sampling_alg = SobolSample(),
-                                         resampling = false,
-                                         minibatch = 30)
-
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy)
-prob = NeuralPDE.discretize(pdesys, discretization)
-symprob = NeuralPDE.symbolic_discretize(pdesys, discretization)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 2000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
-phi = discretization.phi
-
-u_exact = (t, x) -> exp(-t) * cos(x)
-ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
-                    (length(ts), length(xs)))
-u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
-diff_u = abs.(u_predict .- u_real)
-
-@test u_predict≈u_real atol=1.0
-
-# p1 = plot(ts, xs, u_real, linetype=:contourf,title = "analytic");
-# p2 = plot(ts, xs, u_predict, linetype=:contourf,title = "predict");
-# p3 = plot(ts, xs, diff_u,linetype=:contourf,title = "error");
-# plot(p1,p2,p3)
-
-## 2D PDE
-println("2D PDE")
-@parameters t x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-Dt = Differential(t)
-t_min = 0.0
-t_max = 2.0
-x_min = 0.0
-x_max = 2.0
-y_min = 0.0
-y_max = 2.0
-
-# 3D PDE
-eq = Dt(u(t, x, y)) ~ Dxx(u(t, x, y)) + Dyy(u(t, x, y))
-
-analytic_sol_func(t, x, y) = exp(x + y) * cos(x + y + 4t)
-# Initial and boundary conditions
-bcs = [u(t_min, x, y) ~ analytic_sol_func(t_min, x, y),
-    u(t, x_min, y) ~ analytic_sol_func(t, x_min, y),
-    u(t, x_max, y) ~ analytic_sol_func(t, x_max, y),
-    u(t, x, y_min) ~ analytic_sol_func(t, x, y_min),
-    u(t, x, y_max) ~ analytic_sol_func(t, x, y_max)]
-
-# Space and time domains
-domains = [t ∈ Interval(t_min, t_max),
-    x ∈ Interval(x_min, x_max),
-    y ∈ Interval(y_min, y_max)]
-
-# Neural network
-inner = 25
-chain = Flux.Chain(Dense(3, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, inner, Flux.σ),
-                   Dense(inner, 1)) |> gpu |> f64
-
-strategy = NeuralPDE.GridTraining(0.05)
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy)
-
-@named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2500)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 2500)
-@show res.original
-
-phi = discretization.phi
-ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
-u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
-u_predict = [first(Array(phi([t, x, y], res.minimizer))) for t in ts for x in xs
-             for y in ys]
-
-@test u_predict≈u_real rtol=0.2
-
-# using Plots
-# using Printf
-#
-# function plot_(res)
-#     # Animate
-#     anim = @animate for (i, t) in enumerate(0:0.05:t_max)
-#         @info "Animating frame $i..."
-#         u_real = reshape([analytic_sol_func(t,x,y) for x in xs for y in ys], (length(xs),length(ys)))
-#         u_predict = reshape([Array(phi([t, x, y], res.minimizer))[1] for x in xs for y in ys], length(xs), length(ys))
-#         u_error = abs.(u_predict .- u_real)
-#         title = @sprintf("predict t = %.3f", t)
-#         p1 = plot(xs, ys, u_predict,st=:surface, label="", title=title)
-#         title = @sprintf("real")
-#         p2 = plot(xs, ys, u_real,st=:surface, label="", title=title)
-#         title = @sprintf("error")
-#         p3 = plot(xs, ys, u_error, st=:contourf,label="", title=title)
-#         plot(p1,p2,p3)
-#     end
-#     gif(anim,"3pde.gif", fps=10)
-# end
-#
-# plot_(res)
diff --git a/test/NNPDE_tests_gpu_Lux.jl b/test/NNPDE_tests_gpu_Lux.jl
index 6dfbe3aac1..2dca784943 100644
--- a/test/NNPDE_tests_gpu_Lux.jl
+++ b/test/NNPDE_tests_gpu_Lux.jl
@@ -33,22 +33,21 @@ domains = [θ ∈ Interval(0.0f0, 1.0f0)]
 dt = 0.1f0
 # Neural network
 inner = 20
-chain = Chain(Dense(1, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, 1))
-
-strategy = NeuralPDE.GridTraining(dt)
+chain = Lux.Chain(Lux.Dense(1, inner, Lux.σ),
+              Lux.Dense(inner, inner, Lux.σ),
+              Lux.Dense(inner, inner, Lux.σ),
+              Lux.Dense(inner, inner, Lux.σ),
+              Lux.Dense(inner, inner, Lux.σ),
+              Lux.Dense(inner, 1))
+
+strategy = GridTraining(dt)
 ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy;
-                                             init_params = ps)
+discretization = PhysicsInformedNN(chain,
+                                strategy;
+                                init_params = ps)
 
 @named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
 phi = discretization.phi
 
@@ -56,13 +55,8 @@ analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
 ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
 u_real = [analytic_sol_func(t) for t in ts]
 u_predict = [first(Array(phi([t], res.minimizer))) for t in ts]
-
 @test u_predict≈u_real atol=0.2
 
-# t_plot = collect(ts)
-# plot(t_plot ,u_real)
-# plot!(t_plot ,u_predict)
-
 ## 1D PDE Dirichlet boundary conditions
 println("1D PDE Dirichlet boundary conditions")
 @parameters t x
@@ -81,22 +75,18 @@ domains = [t ∈ Interval(0.0, 1.0),
 @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 
 inner = 30
-chain = Lux.Chain(Dense(2, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, 1))
-
-strategy = NeuralPDE.StochasticTraining(500)
+chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, 1))
+
+strategy = StochasticTraining(500)
 ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy;
-                                             init_params = ps)
-prob = NeuralPDE.discretize(pdesys, discretization)
-symprob = NeuralPDE.symbolic_discretize(pdesys, discretization)
-
+discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+prob = discretize(pdesys, discretization)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000)
 prob = remake(prob, u0 = res.minimizer)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 1000)
@@ -108,14 +98,8 @@ u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x
                     (length(ts), length(xs)))
 u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
 diff_u = abs.(u_predict .- u_real)
-
 @test u_predict≈u_real atol=1.0
 
-# p1 = plot(ts, xs, u_real, linetype=:contourf,title = "analytic");
-# p2 = plot(ts, xs, u_predict, linetype=:contourf,title = "predict");
-# p3 = plot(ts, xs, diff_u,linetype=:contourf,title = "error");
-# plot(p1,p2,p3)
-
 ## 1D PDE Neumann boundary conditions and Float64 accuracy
 println("1D PDE Neumann boundary conditions and Float64 accuracy")
 @parameters t x
@@ -138,23 +122,16 @@ domains = [t ∈ Interval(0.0, 1.0),
 @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 
 inner = 20
-chain = Lux.Chain(Dense(2, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, 1))
+chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, 1))
 
-strategy = NeuralPDE.QuasiRandomTraining(500; #points
-                                         sampling_alg = SobolSample(),
-                                         resampling = false,
-                                         minibatch = 30)
+strategy = QuasiRandomTraining(500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
 ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy;
-                                             init_params = ps)
-prob = NeuralPDE.discretize(pdesys, discretization)
-symprob = NeuralPDE.symbolic_discretize(pdesys, discretization)
-
+discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+prob = discretize(pdesys, discretization)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 2000)
 prob = remake(prob, u0 = res.minimizer)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
@@ -166,15 +143,8 @@ u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x
                     (length(ts), length(xs)))
 u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
 diff_u = abs.(u_predict .- u_real)
-
 @test u_predict≈u_real atol=1.0
 
-# p1 = plot(ts, xs, u_real, linetype=:contourf,title = "analytic");
-# p2 = plot(ts, xs, u_predict, linetype=:contourf,title = "predict");
-# p3 = plot(ts, xs, diff_u,linetype=:contourf,title = "error");
-# plot(p1,p2,p3)
-
-## 2D PDE
 println("2D PDE")
 @parameters t x y
 @variables u(..)
@@ -188,7 +158,6 @@ x_max = 2.0
 y_min = 0.0
 y_max = 2.0
 
-# 3D PDE
 eq = Dt(u(t, x, y)) ~ Dxx(u(t, x, y)) + Dyy(u(t, x, y))
 
 analytic_sol_func(t, x, y) = exp(x + y) * cos(x + y + 4t)
@@ -206,22 +175,17 @@ domains = [t ∈ Interval(t_min, t_max),
 
 # Neural network
 inner = 25
-chain = Lux.Chain(Dense(3, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, inner, Lux.σ),
-                  Dense(inner, 1))
+chain = Lux.Chain(Lux.Dense(3, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, inner, Lux.σ),
+                  Lux.Dense(inner, 1))
 
-strategy = NeuralPDE.GridTraining(0.05)
+strategy = GridTraining(0.05)
 ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             strategy;
-                                             init_params = ps)
-
+discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
 @named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
+prob = discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2500)
 prob = remake(prob, u0 = res.minimizer)
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 2500)
@@ -231,29 +195,6 @@ phi = discretization.phi
 ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
 u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
 u_predict = [first(Array(phi([t, x, y], res.minimizer))) for t in ts for x in xs
-             for y in ys]
+            for y in ys]
 
 @test u_predict≈u_real rtol=0.2
-
-# using Plots
-# using Printf
-#
-# function plot_(res)
-#     # Animate
-#     anim = @animate for (i, t) in enumerate(0:0.05:t_max)
-#         @info "Animating frame $i..."
-#         u_real = reshape([analytic_sol_func(t,x,y) for x in xs for y in ys], (length(xs),length(ys)))
-#         u_predict = reshape([Array(phi([t, x, y], res.minimizer))[1] for x in xs for y in ys], length(xs), length(ys))
-#         u_error = abs.(u_predict .- u_real)
-#         title = @sprintf("predict t = %.3f", t)
-#         p1 = plot(xs, ys, u_predict,st=:surface, label="", title=title)
-#         title = @sprintf("real")
-#         p2 = plot(xs, ys, u_real,st=:surface, label="", title=title)
-#         title = @sprintf("error")
-#         p3 = plot(xs, ys, u_error, st=:contourf,label="", title=title)
-#         plot(p1,p2,p3)
-#     end
-#     gif(anim,"3pde.gif", fps=10)
-# end
-#
-# plot_(res)
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index bd49783ec4..ad21395e3c 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -70,11 +70,6 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
     total_diff = sum(diff_u)
     total_u = sum(abs.(u_real))
     total_diff_rel = total_diff / total_u
-
-    #p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-    #p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-    #p3 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-    #(plot=plot(p1,p2,p3), error=total_diff, total_diff_rel=total_diff_rel)
     (error = total_diff, total_diff_rel = total_diff_rel)
 end
 
@@ -94,7 +89,3 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @test error_results_no_logs[1][:total_diff_rel] < 0.4
 @test error_results_no_logs[2][:total_diff_rel] < 0.4
 @test error_results_no_logs[3][:total_diff_rel] < 0.4
-
-#plots_diffs[1][:plot]
-#plots_diffs[2][:plot]
-#plots_diffs[3][:plot]
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index 4cbf8150bc..e91478f834 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -122,9 +122,6 @@ u_predict = [first(phi(x, res.u)) for x in xs]
 
 @test u_predict≈u_real rtol=1e-3
 
-# plot(xs ,u_real, label = "analytic")
-# plot!(xs ,u_predict, label = "predict")
-
 ## Example 8, Lorenz System (Parameter Estimation)
 println("Example 8, Lorenz System")
 
@@ -228,16 +225,6 @@ p_ = res.minimizer[(end - 2):end]
 @test sum(abs2, p_[2] - 28.00) < 0.1
 @test sum(abs2, p_[3] - (8 / 3)) < 0.1
 
-#Plotting the system
-# init_params = discretization.init_params
-# acum =  [0;accumulate(+, length.(init_params))]
-# sep = [acum[i]+1 : acum[i+1] for i in 1:length(acum)-1]
-# minimizers = [res.minimizer[s] for s in sep]
-# ts = [infimum(d.domain):dt/10:supremum(d.domain) for d in domains][1]
-# u_predict  = [[discretization.phi[i]([t],minimizers[i])[1] for t in ts] for i in 1:3]
-# plot(sol)
-# plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
-
 ## approximation from data
 println("Approximation of function from data and additional_loss")
 
@@ -282,10 +269,3 @@ prob = remake(prob, u0 = res.minimizer)
 res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
 
 @test phi(xs, res.u)≈aproxf_(xs) rtol=0.01
-
-# xs_ = xs'
-# plot(xs_,data')
-# plot!(xs_, phi(xs,res.u)')
-
-# func(x,y) = -20.0 * exp(-0.2 * sqrt(0.5 * (x^2 + y^2))) - exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + e + 20
-# func(x,y) = -abs(sin(x) * cos(y) * exp(abs(1 - (sqrt(x^2 + y^2)/pi))))
diff --git a/test/direct_function_tests.jl b/test/direct_function_tests.jl
index 8871e29eef..663537c01f 100644
--- a/test/direct_function_tests.jl
+++ b/test/direct_function_tests.jl
@@ -1,4 +1,4 @@
-using Flux, NeuralPDE, Test
+using NeuralPDE, Test
 using Optimization, OptimizationOptimJL, OptimizationOptimisers
 using QuasiMonteCarlo
 import ModelingToolkit: Interval, infimum, supremum
@@ -37,7 +37,6 @@ strategy = NeuralPDE.GridTraining(0.01)
 discretization = NeuralPDE.PhysicsInformedNN(chain, strategy)
 @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
 prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
 
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.05), maxiters = 1000)
 prob = remake(prob, u0 = res.minimizer)
@@ -46,9 +45,6 @@ res = Optimization.solve(prob, OptimizationOptimJL.BFGS(initial_stepnorm = 0.01)
 
 @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
 
-# plot(xs,func(xs))
-# plot!(xs, discretization.phi(xs',res.u)')
-
 ## Approximation of function 1D 2
 println("Approximation of function 1D 2")
 
@@ -68,12 +64,11 @@ chain = Lux.Chain(Lux.Dense(1, hidden, Lux.sin),
                   Lux.Dense(hidden, hidden, Lux.sin),
                   Lux.Dense(hidden, 1))
 
-strategy = NeuralPDE.GridTraining(0.01)
+strategy = GridTraining(0.01)
 
-discretization = NeuralPDE.PhysicsInformedNN(chain, strategy)
+discretization = PhysicsInformedNN(chain, strategy)
 @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-prob = NeuralPDE.discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 
 res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
 prob = remake(prob, u0 = res.minimizer)
@@ -85,9 +80,6 @@ func_s = func(xs)
 
 @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
 
-# plot(xs,func(xs))
-# plot!(xs, discretization.phi(xs',res.u)')
-
 ## Approximation of function 2D
 println("Approximation of function 2D")
 
@@ -111,10 +103,10 @@ chain = Lux.Chain(Lux.Dense(2, hidden, Lux.tanh),
                   Lux.Dense(hidden, hidden, Lux.tanh),
                   Lux.Dense(hidden, 1))
 
-strategy = NeuralPDE.GridTraining(d)
-discretization = NeuralPDE.PhysicsInformedNN(chain, strategy)
+strategy = GridTraining(d)
+discretization = PhysicsInformedNN(chain, strategy)
 @named pde_system = PDESystem(eq, bc, domain, [x, y], [u(x, y)])
-prob = NeuralPDE.discretize(pde_system, discretization)
+prob = discretize(pde_system, discretization)
 symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
 symprob.loss_functions.full_loss_function(symprob.flat_init_params, nothing)
 
@@ -133,8 +125,3 @@ u_real = reshape([func(x, y) for x in xs for y in ys], (length(xs), length(ys)))
 diff_u = abs.(u_predict .- u_real)
 
 @test u_predict≈u_real rtol=0.05
-
-# p1 = plot(xs, ys, u_real, st=:surface,title = "analytic");
-# p2 = plot(xs, ys, u_predict, st=:surface,title = "predict");
-# p3 = plot(xs, ys, diff_u,st=:surface,title = "error");
-# plot(p1,p2,p3)
diff --git a/test/forward_tests.jl b/test/forward_tests.jl
index a8fc17fa1a..48d18b9189 100644
--- a/test/forward_tests.jl
+++ b/test/forward_tests.jl
@@ -2,7 +2,7 @@ using Test, NeuralPDE
 using SciMLBase
 using DomainSets
 import ModelingToolkit: Interval
-import Lux, Random, Zygote, Flux
+import Lux, Random, Zygote
 using ComponentArrays
 
 @testset "ODE" begin
@@ -18,10 +18,10 @@ using ComponentArrays
     init_params = Float64[]
 
     chain([1], Float64[], st)
-    strategy_ = NeuralPDE.GridTraining(0.1)
-    discretization = NeuralPDE.PhysicsInformedNN(chain, strategy_; init_params = Float64[])
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_; init_params = Float64[])
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
 
     eqs = pde_system.eqs
@@ -46,9 +46,9 @@ using ComponentArrays
 end
 
 @testset "derivatives" begin
-    chain = Flux.Chain(Flux.Dense(2, 16, Lux.σ), Flux.Dense(16, 16, Lux.σ),
-                       Flux.Dense(16, 1)) |> Flux.f64
-    init_params = Flux.destructure(chain)[1]
+    chain = Lux.Chain(Lux.Dense(2, 16, Lux.σ), Lux.Dense(16, 16, Lux.σ),
+                       Lux.Dense(16, 1))
+    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray .|> Float64 
 
     eltypeθ = eltype(init_params)
     phi = NeuralPDE.Phi(chain)
@@ -96,15 +96,13 @@ end
     domains = [x ∈ Interval(1.0, 2.0)]
     chain = Lux.Chain(x -> exp.(x) ./ (exp.(2 .* x) .+ 3))
     init_params, st = Lux.setup(Random.default_rng(), chain)
-    init_params = Float64[]
-
     chain([1], init_params, st)
-    strategy_ = NeuralPDE.GridTraining(0.1)
-    discretization = NeuralPDE.PhysicsInformedNN(chain, strategy_;
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_;
                                                  init_params = init_params)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-    sym_prob = SciMLBase.symbolic_discretize(pde_system, discretization)
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
     inner_loss = sym_prob.loss_functions.datafree_pde_loss_functions[1]
     exact_u = π / (3 * sqrt(3))
     @test inner_loss(ones(1, 1), init_params)[1]≈exact_u rtol=1e-5
@@ -119,11 +117,11 @@ end
     chain = Lux.Chain(x -> x .* exp.(-x .^ 2))
     chain([1], init_params, st)
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain, strategy_;
-                                                 init_params = init_params)
+    discretization = PhysicsInformedNN(chain, strategy_;
+                                        init_params = init_params)
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
-    sym_prob = SciMLBase.symbolic_discretize(pde_system, discretization)
-    prob = SciMLBase.discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
     inner_loss = sym_prob.loss_functions.datafree_pde_loss_functions[1]
     exact_u = 0
     @test inner_loss(ones(1, 1), init_params)[1]≈exact_u rtol=1e-9
diff --git a/test/neural_adapter_tests.jl b/test/neural_adapter_tests.jl
index 16dedff24a..d99793408c 100644
--- a/test/neural_adapter_tests.jl
+++ b/test/neural_adapter_tests.jl
@@ -1,4 +1,3 @@
-using Flux
 using Test, NeuralPDE
 using Optimization, OptimizationOptimJL
 import ModelingToolkit: Interval, infimum, supremum
@@ -33,23 +32,22 @@ domains = [x ∈ Interval(0.0, 1.0),
 quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-2, abstol = 1e-2,
                                                    maxiters = 50, batch = 100)
 inner = 8
-af = Flux.tanh
-chain1 = Chain(Dense(2, inner, af),
-               Dense(inner, inner, af),
-               Dense(inner, 1)) |> f64
-init_params = Flux.destructure(chain1)[1]
+af = Lux.tanh
+chain1 = Lux.Chain(Lux.Dense(2, inner, af),
+               Lux.Dense(inner, inner, af),
+               Lux.Dense(inner, 1))
+init_params = Lux.setup(Random.default_rng(), chain1)[1] |> ComponentArray .|> Float64 
 discretization = NeuralPDE.PhysicsInformedNN(chain1,
                                              quadrature_strategy;
                                              init_params = init_params)
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
 res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 2000)
 phi = discretization.phi
 
 inner_ = 10
-af = Flux.tanh
+af = Lux.tanh
 chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
                    Lux.Dense(inner_, inner_, af),
                    Lux.Dense(inner_, inner_, af),
@@ -63,39 +61,39 @@ function loss(cord, θ)
     ch2 .- phi(cord, res.minimizer)
 end
 
-grid_strategy = NeuralPDE.GridTraining(0.05)
-quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-5, abstol = 1e-5,
+grid_strategy = GridTraining(0.05)
+quadrature_strategy = QuadratureTraining(reltol = 1e-5, abstol = 1e-5,
                                                    maxiters = 50, batch = 100)
-stochastic_strategy = NeuralPDE.StochasticTraining(400)
-quasirandom_strategy = NeuralPDE.QuasiRandomTraining(400, resampling = false,
+stochastic_strategy = StochasticTraining(400)
+quasirandom_strategy = QuasiRandomTraining(400, resampling = false,
                                                      minibatch = 200)
-quasirandom_strategy_resampling = NeuralPDE.QuasiRandomTraining(250)
+quasirandom_strategy_resampling = QuasiRandomTraining(250)
 
 strategies1 = [grid_strategy, quadrature_strategy]
 
 reses_1 = map(strategies1) do strategy_
     println("Neural adapter Poisson equation, strategy: $(nameof(typeof(strategy_)))")
     prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy_)
-    res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(0.01); maxiters = 8000)
+    res_ = solve(prob_, OptimizationOptimisers.Adam(0.01); maxiters = 8000)
     prob_ = remake(prob_, u0 = res_.minimizer)
-    res_ = Optimization.solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 200)
+    res_ = solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 200)
 end
 strategies2 = [stochastic_strategy, quasirandom_strategy]# quasirandom_strategy_resampling]
 reses_2 = map(strategies2) do strategy_
     println("Neural adapter Poisson equation, strategy: $(nameof(typeof(strategy_)))")
     prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy_)
-    res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(0.01); maxiters = 8000)
+    res_ = solve(prob_, OptimizationOptimisers.Adam(0.01); maxiters = 8000)
     prob_ = remake(prob_, u0 = res_.minimizer)
-    res_ = Optimization.solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 200)
+    res_ = solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 200)
 end
 reses_ = [reses_1; reses_2]
 
-discretizations = map(res_ -> NeuralPDE.PhysicsInformedNN(chain2,
+discretizations = map(res_ -> PhysicsInformedNN(chain2,
                                                           grid_strategy;
                                                           init_params = res_.minimizer),
                       reses_)
 
-probs = map(discret -> NeuralPDE.discretize(pde_system, discret), discretizations)
+probs = map(discret -> discretize(pde_system, discret), discretizations)
 phis = map(discret -> discret.phi, discretizations)
 
 xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
@@ -118,17 +116,6 @@ map(u_predicts[2:end]) do upred
     @test_broken upred≈u_real atol=1e-2
 end
 
-#using Plots
-# i=3
-# diff_u = abs.(u_predict .- u_real)
-# diff_u_ = abs.(u_predicts[i] .- u_real)
-# p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-# p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-# p5 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-# p3 = plot(xs, ys, u_predicts[i],linetype=:contourf,title = "predict_");
-# p6 = plot(xs, ys, diff_u_,linetype=:contourf,title = "error_");
-# plot(p2,p1,p5,p3,p6)
-
 ## Example, 2D Poisson equation, domain decomposition
 println("Example, 2D Poisson equation, domain decomposition")
 @parameters x y
@@ -146,13 +133,11 @@ x_0 = 0.0
 x_end = 1.0
 x_domain = Interval(x_0, x_end)
 y_domain = Interval(0.0, 1.0)
-domains = [x ∈ x_domain,
-    y ∈ y_domain]
-
+domains = [x ∈ x_domain, y ∈ y_domain]
 count_decomp = 10
 
 # Neural network
-af = Flux.tanh
+af = Lux.tanh
 inner = 12
 chains = [Lux.Chain(Lux.Dense(2, inner, af), Lux.Dense(inner, inner, af),
                     Lux.Dense(inner, 1)) for _ in 1:count_decomp]
@@ -200,13 +185,9 @@ for i in 1:count_decomp
     bcs_ = create_bcs(domains_[1].domain, phi_bound)
     @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
     push!(pde_system_map, pde_system_)
-    strategy = NeuralPDE.GridTraining([0.1 / count_decomp, 0.1])
-
-    discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
-                                                 init_params = init_params[i])
-
-    prob = NeuralPDE.discretize(pde_system_, discretization)
-    symprob = NeuralPDE.symbolic_discretize(pde_system_, discretization)
+    strategy = GridTraining([0.1 / count_decomp, 0.1])
+    discretization = PhysicsInformedNN(chains[i], strategy;  init_params = init_params[i])
+    prob = discretize(pde_system_, discretization)
     res_ = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1500)
     @show res_.minimum
     phi = discretization.phi
@@ -214,18 +195,6 @@ for i in 1:count_decomp
     push!(phis, phi)
 end
 
-# function plot_(i)
-#     xs, ys = [infimum(d.domain):dx:supremum(d.domain) for (dx,d) in zip([0.001,0.01], domains_map[i])]
-#     u_predict = reshape([first(phis[i]([x,y],reses[i].minimizer)) for x in xs for y in ys],(length(xs),length(ys)))
-#     u_real = reshape([analytic_sol_func(x,y) for x in xs for y in ys], (length(xs),length(ys)))
-#     diff_u = abs.(u_predict .- u_real)
-#     p1 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-#     p2 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict");
-#     p3 = plot(xs, ys, diff_u,linetype=:contourf,title = "error");
-#     plot(p1,p2,p3)
-# end
-# ps =[plot_(i) for i in 1:count_decomp]
-
 function compose_result(dx)
     u_predict_array = Float64[]
     diff_u_array = Float64[]
@@ -256,7 +225,7 @@ dx = 0.01
 u_predict, diff_u = compose_result(dx)
 
 inner_ = 18
-af = Flux.tanh
+af = Lux.tanh
 chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
                    Lux.Dense(inner_, inner_, af),
                    Lux.Dense(inner_, inner_, af),
@@ -277,12 +246,12 @@ losses = map(1:count_decomp) do i
 end
 
 prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                 NeuralPDE.GridTraining([0.1 / count_decomp, 0.1]))
-res_ = Optimization.solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 2000)
+                                 GridTraining([0.1 / count_decomp, 0.1]))
+res_ = solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 2000)
 @show res_.minimum
 prob_ = NeuralPDE.neural_adapter(losses, res_.minimizer, pde_system_map,
-                                 NeuralPDE.GridTraining(0.01))
-res_ = Optimization.solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 1000)
+                                 GridTraining(0.01))
+res_ = solve(prob_, OptimizationOptimJL.BFGS(); maxiters = 1000)
 @show res_.minimum
 
 phi_ = NeuralPDE.Phi(chain2)
@@ -296,10 +265,3 @@ diff_u_ = u_predict_ .- u_real
 
 @test u_predict≈u_real rtol=0.1
 @test u_predict_≈u_real rtol=0.1
-
-# p1 = plot(xs, ys, u_predict, linetype=:contourf,title = "predict 1");
-# p2 = plot(xs, ys, u_predict_,linetype=:contourf,title = "predict 2");
-# p3 = plot(xs, ys, u_real, linetype=:contourf,title = "analytic");
-# p4 = plot(xs, ys, diff_u,linetype=:contourf,title = "error 1");
-# p5 = plot(xs, ys, diff_u_,linetype=:contourf,title = "error 2");
-# plot(p1,p2,p3,p4,p5)
diff --git a/test/runtests.jl b/test/runtests.jl
index 5d6ac6909e..bda0cfa812 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,10 +5,6 @@ const GROUP = get(ENV, "GROUP", "All")
 
 const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
 
-const is_TRAVIS = haskey(ENV, "TRAVIS")
-
-const is_CI = haskey(ENV, "CI")
-
 function dev_subpkg(subpkg)
     subpkg_path = joinpath(dirname(@__DIR__), "lib", subpkg)
     Pkg.develop(PackageSpec(path = subpkg_path))
@@ -62,7 +58,6 @@ end
         Pkg.test(PackageSpec(name = "NeuralPDELogging", path = subpkg_path))
     end
     if !is_APPVEYOR && GROUP == "GPU"
-        @safetestset "NNPDE_gpu" begin include("NNPDE_tests_gpu.jl") end
         @safetestset "NNPDE_gpu_Lux" begin include("NNPDE_tests_gpu_Lux.jl") end
     end
 end
\ No newline at end of file

From 805e376d23806b23d67c65a8883f286bcb21cf7a Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 30 Jan 2024 14:33:48 +0000
Subject: [PATCH 04/16] refactor: added support for converting Flux to Lux in
 NNODE and cleaned up docstrings

---
 src/ode_solve.jl | 101 +++++++++++++++++++++--------------------------
 1 file changed, 45 insertions(+), 56 deletions(-)

diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index 64ccb16acc..4035e540bd 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -1,11 +1,7 @@
 abstract type NeuralPDEAlgorithm <: DiffEqBase.AbstractODEAlgorithm end
 
 """
-```julia
-NNODE(chain, opt=OptimizationPolyalgorithms.PolyOpt(), init_params = nothing;
-                          autodiff=false, batch=0,additional_loss=nothing,
-                          kwargs...)
-```
+    NNODE(chain, opt, init_params = nothing; autodiff = false, batch = 0, additional_loss = nothing, kwargs...)
 
 Algorithm for solving ordinary differential equations using a neural network. This is a specialization
 of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
@@ -18,52 +14,49 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer`.
-* `opt`: The optimizer to train the neural network. Defaults to `OptimizationPolyalgorithms.PolyOpt()`
-* `init_params`: The initial parameter of the neural network. By default, this is `nothing`
-  which thus uses the random initialization provided by the neural network library.
+* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer` or `Flux.Chain`. 
+          `Flux.Chain` will be converted to `Lux` using `Lux.transform`.
+* `opt`: The optimizer to train the neural network.
+* `init_params`: The initial parameter of the neural network. By default, this is `nothing` 
+                 which thus uses the random initialization provided by the neural network library.
 
 ## Keyword Arguments
 * `additional_loss`: A function additional_loss(phi, θ) where phi are the neural network trial solutions,
                      θ are the weights of the neural network(s).
-
-## Example
-
-```julia
-u0 = [1.0, 1.0]
-    ts=[t for t in 1:100]
-    (u_, t_) = (analytical_func(ts), ts)
-    function additional_loss(phi, θ)
-        return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-    end
-    alg = NeuralPDE.NNODE(chain, opt, additional_loss = additional_loss)
-```
-
 * `autodiff`: The switch between automatic and numerical differentiation for
               the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
 * `batch`: The batch size to use for the internal quadrature. Defaults to `0`, which
-  means the application of the neural network is done at individual time points one
-  at a time. `batch>0` means the neural network is applied at a row vector of values
-  `t` simultaneously, i.e. it's the batch size for the neural network evaluations.
-  This requires a neural network compatible with batched data.
+           means the application of the neural network is done at individual time points one
+           at a time. `batch>0` means the neural network is applied at a row vector of values
+           `t` simultaneously, i.e. it's the batch size for the neural network evaluations.
+           This requires a neural network compatible with batched data.
 * `strategy`: The training strategy used to choose the points for the evaluations.
-  Default of `nothing` means that `QuadratureTraining` with QuadGK is used if no
-  `dt` is given, and `GridTraining` is used with `dt` if given.
+              Default of `nothing` means that `QuadratureTraining` with QuadGK is used if no
+              `dt` is given, and `GridTraining` is used with `dt` if given.
 * `kwargs`: Extra keyword arguments are splatted to the Optimization.jl `solve` call.
 
-## Example
+## Examples
+
+```julia
+u0 = [1.0, 1.0]
+ts = [t for t in 1:100]
+(u_, t_) = (analytical_func(ts), ts)
+function additional_loss(phi, θ)
+    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+end
+alg = NNODE(chain, opt, additional_loss = additional_loss)
+```
 
 ```julia
 f(u,p,t) = cos(2pi*t)
-tspan = (0.0f0, 1.0f0)
-u0 = 0.0f0
+tspan = (0.0, 1.0)
+u0 = 0.0
 prob = ODEProblem(linear, u0 ,tspan)
-chain = Lux.Chain(Lux.Dense(1,5,σ), Lux.Dense(5,1))
+chain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 opt = OptimizationOptimisers.Adam(0.1)
-sol = solve(prob, NeuralPDE.NNODE(chain,opt), dt=1/20f0, verbose = true,
-            abstol=1e-10, maxiters = 200)
+sol = solve(prob, NNODE(chain, opt), verbose = true, abstol = 1e-10, maxiters = 200)
 ```
 
 ## Solution Notes
@@ -94,16 +87,14 @@ end
 function NNODE(chain, opt, init_params = nothing;
                strategy = nothing,
                autodiff = false, batch = nothing, additional_loss = nothing, kwargs...)
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     NNODE(chain, opt, init_params, autodiff, batch, strategy, additional_loss, kwargs)
 end
 
 """
-```julia
-ODEPhi(chain::Lux.AbstractExplicitLayer, t, u0, st)
-```
+    ODEPhi(chain::Lux.AbstractExplicitLayer, t, u0, st)
 
-Internal, used as a constructor used for representing the ODE solution as a
-neural network in a form that respects boundary conditions, i.e.
+Internal struct, used for representing the ODE solution as a neural network in a form that respects boundary conditions, i.e.
 `phi(t) = u0 + t*NN(t)`.
 """
 mutable struct ODEPhi{C, T, U, S}
@@ -156,8 +147,9 @@ function (f::ODEPhi{C, T, U})(t::AbstractVector,
 end
 
 """
-Computes u' using either forward-mode automatic differentiation or
-numerical differentiation.
+    ode_dfdx(phi, t, θ, autodiff)
+
+Computes u' using either forward-mode automatic differentiation or numerical differentiation.
 """
 function ode_dfdx end
 
@@ -188,7 +180,9 @@ function ode_dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool)
 end
 
 """
-Simple L2 inner loss at a time `t` with parameters θ
+    inner_loss(phi, f, autodiff, t, θ, p)
+
+Simple L2 inner loss at a time `t` with parameters `θ` of the neural network.
 """
 function inner_loss end
 
@@ -200,7 +194,7 @@ end
 function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector, θ,
                     p) where {C, T, U <: Number}
     out = phi(t, θ)
-    fs = reduce(hcat, [f(out[i], p, t[i]) for i in 1:size(out, 2)])
+    fs = reduce(hcat, [f(out[i], p, t[i]) for i in axes(out, 2)])
     dxdtguess = Array(ode_dfdx(phi, t, θ, autodiff))
     sum(abs2, dxdtguess .- fs) / length(t)
 end
@@ -220,7 +214,9 @@ function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector,
 end
 
 """
-Representation of the loss function, parametric on the training strategy `strategy`
+    generate_loss(strategy, phi, f, autodiff, tspan, p, batch)
+
+Representation of the loss function, parametric on the training strategy `strategy`.
 """
 function generate_loss(strategy::QuadratureTraining, phi, f, autodiff::Bool, tspan, p,
                        batch)
@@ -328,12 +324,12 @@ end
 
 function (f::NNODEInterpolation)(t::Vector, idxs::Nothing, ::Type{Val{0}}, p, continuity)
     out = f.phi(t, f.θ)
-    SciMLBase.RecursiveArrayTools.DiffEqArray([out[:, i] for i in 1:size(out, 2)], t)
+    SciMLBase.RecursiveArrayTools.DiffEqArray([out[:, i] for i in axes(out, 2)], t)
 end
 
 function (f::NNODEInterpolation)(t::Vector, idxs, ::Type{Val{0}}, p, continuity)
     out = f.phi(t, f.θ)
-    SciMLBase.RecursiveArrayTools.DiffEqArray([out[idxs, i] for i in 1:size(out, 2)], t)
+    SciMLBase.RecursiveArrayTools.DiffEqArray([out[idxs, i] for i in axes(out, 2)], t)
 end
 
 SciMLBase.interp_summary(::NNODEInterpolation) = "Trained neural network interpolation"
@@ -365,15 +361,10 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     #train points generation
     init_params = alg.init_params
 
-    if chain isa Lux.AbstractExplicitLayer
-        phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
-    else
-        error("Only Lux.AbstractExplicitLayer neural networks are supported")
-    end
+    !(chain isa Lux.AbstractExplicitLayer) && error("Only Lux.AbstractExplicitLayer neural networks are supported")
+    phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
 
-    if isinplace(prob)
-        throw(error("The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
-    end
+    isinplace(prob) && throw(error("The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
 
     try
         phi(t0, init_params)
@@ -428,12 +419,10 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
             else
                 return L2_loss + tstops_loss
             end
-            
             total_original_loss = L2_loss * num_original_points
             total_tstops_loss = tstops_loss * num_original_points
             total_points = num_original_points + num_tstops_points
             L2_loss = (total_original_loss + total_tstops_loss) / total_points
-
         end
         return L2_loss
     end

From ec72d767d91d98d73e1cf8ceac4b52d2247a5e1a Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 30 Jan 2024 14:41:34 +0000
Subject: [PATCH 05/16] test: cleaned up NNODE tests and refactored them into
 testsets

---
 test/NNODE_tests.jl       | 413 ++++++++++++++++++++------------------
 test/NNODE_tstops_test.jl |  75 ++++---
 2 files changed, 256 insertions(+), 232 deletions(-)

diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index c39ca80d80..254507c0e8 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -2,218 +2,233 @@ using Test
 using Random, NeuralPDE
 using OrdinaryDiffEq, Statistics
 import Lux, OptimizationOptimisers, OptimizationOptimJL
+using Flux
 
 Random.seed!(100)
 
-# Run a solve on scalars
-linear = (u, p, t) -> cos(2pi * t)
-tspan = (0.0f0, 1.0f0)
-u0 = 0.0f0
-prob = ODEProblem(linear, u0, tspan)
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
-opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
-            abstol = 1.0f-10, maxiters = 200)
-
-@test_throws ArgumentError solve(prob, NeuralPDE.NNODE(luxchain, opt; autodiff = true),
-                       dt = 1 / 20.0f0,
-                       verbose = true, abstol = 1.0f-10, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true,
-            abstol = 1.0f-6, maxiters = 200)
-
-opt = OptimizationOptimJL.BFGS()
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
-            abstol = 1.0f-10, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true,
-            abstol = 1.0f-6, maxiters = 200)
-
-# Run a solve on vectors
-linear = (u, p, t) -> [cos(2pi * t)]
-tspan = (0.0f0, 1.0f0)
-u0 = [0.0f0]
-prob = ODEProblem(linear, u0, tspan)
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
-
-opt = OptimizationOptimJL.BFGS()
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
-            verbose = true, maxiters = 200)
-
-@test_throws ArgumentError solve(prob, NeuralPDE.NNODE(luxchain, opt; autodiff = true),
-                       dt = 1 / 20.0f0,
-                       abstol = 1e-10, verbose = true, maxiters = 200)
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), abstol = 1.0f-6,
-            verbose = true, maxiters = 200)
-
-@test sol(0.5) isa Vector
-@test sol(0.5; idxs = 1) isa Number
-@test sol.k isa SciMLBase.OptimizationSolution
-
-#Example 1
-linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
-                         u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
-linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
-luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
-opt = OptimizationOptimisers.Adam(0.01)
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
-@test_throws AssertionError solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
-                       maxiters = 400)
-
-sol = solve(prob,
-            NeuralPDE.NNODE(luxchain, opt; batch = false,
-                            strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob,
-            NeuralPDE.NNODE(luxchain, opt; batch = true,
-                            strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = false), verbose = true,
-            maxiters = 400, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
-            maxiters = 400,
-            dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-#Example 2
-linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
-linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
-
-opt = OptimizationOptimisers.Adam(0.1)
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt), verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
-@test_throws AssertionError solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
-                       maxiters = 400,
-                       abstol = 1.0f-8)
-
-sol = solve(prob,
-            NeuralPDE.NNODE(luxchain, opt; batch = false,
-                            strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob,
-            NeuralPDE.NNODE(luxchain, opt; batch = true,
-                            strategy = StochasticTraining(100)),
-            verbose = true, maxiters = 400,
-            abstol = 1.0f-8)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = false), verbose = true,
-            maxiters = 400,
-            abstol = 1.0f-8, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
-            maxiters = 400,
-            abstol = 1.0f-8, dt = 1 / 5.0f0)
-@test sol.errors[:l2] < 0.5
-
-#Example 3 ODEs system
-linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
-tspan = (0.0f0, 1.0f0)
-u0 = [0.0f0, -1.0f0 / 2pi]
-linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
-odefunction = ODEFunction(linear, analytic = linear_analytic)
-prob = ODEProblem(odefunction, u0, tspan)
-luxchain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 2))
-opt = OptimizationOptimisers.Adam(0.1)
-alg = NeuralPDE.NNODE(luxchain, opt; autodiff = false)
-
-sol = solve(prob,
-            alg, verbose = true, dt = 1 / 40.0f0,
-            maxiters = 2000, abstol = 1.0f-7)
-@test sol.errors[:l2] < 0.5
-
-# WeightedIntervalTraining(Lux Chain)
-function f(u, p, t)
-    [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
-end
+@testset "Scalar" begin
+    # Run a solve on scalars
+    linear = (u, p, t) -> cos(2pi * t)
+    tspan = (0.0f0, 1.0f0)
+    u0 = 0.0f0
+    prob = ODEProblem(linear, u0, tspan)
+    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
-p = [1.5, 1.0, 3.0, 1.0]
-u0 = [1.0, 1.0]
-prob_oop = ODEProblem{false}(f, u0, (0.0, 3.0), p)
-true_sol = solve(prob_oop, Tsit5(), saveat = 0.01)
-func = Lux.σ
-N = 12
-chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
-                  Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
-
-opt = OptimizationOptimisers.Adam(0.01)
-weights = [0.7, 0.2, 0.1]
-points = 200
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false,
-                      strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
-sol = solve(prob_oop, alg, verbose = true, maxiters = 100000, saveat = 0.01)
-
-@test abs(mean(sol) - mean(true_sol)) < 0.2
-
-# Checking if additional_loss feature works for NNODE
-linear = (u, p, t) -> cos(2pi * t)
-linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
-tspan = (0.0f0, 1.0f0)
-dt = (tspan[2] - tspan[1]) / 99
-ts = collect(tspan[1]:dt:tspan[2])
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
-opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
-
-# Analytical solution
-u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
-
-# GridTraining (Lux Chain)
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
-
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
-end
+    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
+                abstol = 1.0f-10, maxiters = 200)
 
-alg1 = NeuralPDE.NNODE(luxchain, opt, strategy = GridTraining(0.01),
-                       additional_loss = additional_loss)
+    @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
+                        dt = 1 / 20.0f0,
+                        verbose = true, abstol = 1.0f-10, maxiters = 200)
 
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-8, maxiters = 500)
-@test sol1.errors[:l2] < 0.5
+    sol = solve(prob, NNODE(luxchain, opt), verbose = true,
+                abstol = 1.0f-6, maxiters = 200)
 
-# QuadratureTraining (Lux Chain)
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    opt = OptimizationOptimJL.BFGS()
+    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
+                abstol = 1.0f-10, maxiters = 200)
 
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+    sol = solve(prob, NNODE(luxchain, opt), verbose = true,
+                abstol = 1.0f-6, maxiters = 200)
 end
 
-alg1 = NeuralPDE.NNODE(luxchain, opt, additional_loss = additional_loss)
+@testset "Vector" begin
+    # Run a solve on vectors
+    linear = (u, p, t) -> [cos(2pi * t)]
+    tspan = (0.0f0, 1.0f0)
+    u0 = [0.0f0]
+    prob = ODEProblem(linear, u0, tspan)
+    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+
+    opt = OptimizationOptimJL.BFGS()
+    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
+                verbose = true, maxiters = 200)
 
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-10, maxiters = 200)
-@test sol1.errors[:l2] < 0.5
+    @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
+                        dt = 1 / 20.0f0,
+                        abstol = 1e-10, verbose = true, maxiters = 200)
 
-# StochasticTraining (Lux Chain)
-luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+    sol = solve(prob, NNODE(luxchain, opt), abstol = 1.0f-6,
+                verbose = true, maxiters = 200)
 
-(u_, t_) = (u_analytical(ts), ts)
-function additional_loss(phi, θ)
-    return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+    @test sol(0.5) isa Vector
+    @test sol(0.5; idxs = 1) isa Number
+    @test sol.k isa SciMLBase.OptimizationSolution
 end
 
-alg1 = NeuralPDE.NNODE(luxchain, opt, strategy = StochasticTraining(1000),
-                       additional_loss = additional_loss)
+@testset "Example 1" begin
+    linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
+                            u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
+    linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
+    luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
+    opt = OptimizationOptimisers.Adam(0.01)
+
+    sol = solve(prob, NNODE(luxchain, opt), verbose = true, maxiters = 400)
+    @test sol.errors[:l2] < 0.5
+
+    @test_throws AssertionError solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+                        maxiters = 400)
+
+    sol = solve(prob,
+                NNODE(luxchain, opt; batch = false,
+                                strategy = StochasticTraining(100)),
+                verbose = true, maxiters = 400)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob,
+                NNODE(luxchain, opt; batch = true,
+                                strategy = StochasticTraining(100)),
+                verbose = true, maxiters = 400)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = true,
+                maxiters = 400, dt = 1 / 5.0f0)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+                maxiters = 400,
+                dt = 1 / 5.0f0)
+    @test sol.errors[:l2] < 0.5
+end
+
+@testset "Example 2" begin
+    linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
+    linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
+    luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+
+    opt = OptimizationOptimisers.Adam(0.1)
+    sol = solve(prob, NNODE(luxchain, opt), verbose = true, maxiters = 400,
+                abstol = 1.0f-8)
+    @test sol.errors[:l2] < 0.5
+
+    @test_throws AssertionError solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+                        maxiters = 400,
+                        abstol = 1.0f-8)
+
+    sol = solve(prob,
+                NNODE(luxchain, opt; batch = false,
+                                strategy = StochasticTraining(100)),
+                verbose = true, maxiters = 400,
+                abstol = 1.0f-8)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob,
+                NNODE(luxchain, opt; batch = true,
+                                strategy = StochasticTraining(100)),
+                verbose = true, maxiters = 400,
+                abstol = 1.0f-8)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = true,
+                maxiters = 400,
+                abstol = 1.0f-8, dt = 1 / 5.0f0)
+    @test sol.errors[:l2] < 0.5
+
+    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+                maxiters = 400,
+                abstol = 1.0f-8, dt = 1 / 5.0f0)
+    @test sol.errors[:l2] < 0.5
+end
 
-sol1 = solve(prob, alg1, verbose = true, abstol = 1.0f-8, maxiters = 500)
-@test sol1.errors[:l2] < 0.5
+@testset "Example 3" begin
+    linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
+    tspan = (0.0f0, 1.0f0)
+    u0 = [0.0f0, -1.0f0 / 2pi]
+    linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
+    odefunction = ODEFunction(linear, analytic = linear_analytic)
+    prob = ODEProblem(odefunction, u0, tspan)
+    luxchain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 2))
+    opt = OptimizationOptimisers.Adam(0.1)
+    alg = NNODE(luxchain, opt; autodiff = false)
+
+    sol = solve(prob,
+                alg, verbose = true, dt = 1 / 40.0f0,
+                maxiters = 2000, abstol = 1.0f-7)
+    @test sol.errors[:l2] < 0.5
+end
+
+@testset "Training Strategies" begin
+    @testset "WeightedIntervalTraining" begin
+        function f(u, p, t)
+            [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
+        end
+        p = [1.5, 1.0, 3.0, 1.0]
+        u0 = [1.0, 1.0]
+        prob_oop = ODEProblem{false}(f, u0, (0.0, 3.0), p)
+        true_sol = solve(prob_oop, Tsit5(), saveat = 0.01)
+        func = Lux.σ
+        N = 12
+        chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
+                        Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
+        opt = OptimizationOptimisers.Adam(0.01)
+        weights = [0.7, 0.2, 0.1]
+        points = 200
+        alg = NNODE(chain, opt, autodiff = false,
+                            strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
+        sol = solve(prob_oop, alg, verbose = true, maxiters = 100000, saveat = 0.01)
+        @test abs(mean(sol) - mean(true_sol)) < 0.2
+    end
+
+    linear = (u, p, t) -> cos(2pi * t)
+    linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
+    tspan = (0.0, 1.0)
+    dt = (tspan[2] - tspan[1]) / 99
+    ts = collect(tspan[1]:dt:tspan[2])
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0, (0.0, 1.0))
+    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
+    u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
+
+    @testset "GridTraining" begin
+        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        (u_, t_) = (u_analytical(ts), ts)
+        function additional_loss(phi, θ)
+            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+        end
+        alg1 = NNODE(luxchain, opt, strategy = GridTraining(0.01),
+                            additional_loss = additional_loss)
+        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-8, maxiters = 500)
+        @test sol1.errors[:l2] < 0.5
+    end
+
+    @testset "QuadratureTraining" begin
+        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        (u_, t_) = (u_analytical(ts), ts)
+        function additional_loss(phi, θ)
+            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+        end
+        alg1 = NNODE(luxchain, opt, additional_loss = additional_loss)
+        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-10, maxiters = 200)
+        @test sol1.errors[:l2] < 0.5
+    end
+
+    @testset "StochasticTraining" begin
+        luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
+        (u_, t_) = (u_analytical(ts), ts)
+        function additional_loss(phi, θ)
+            return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
+        end
+        alg1 = NNODE(luxchain, opt, strategy = StochasticTraining(1000),
+                            additional_loss = additional_loss)
+        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-8, maxiters = 500)
+        @test sol1.errors[:l2] < 0.5
+    end
+end
+
+@testset "Translating from Flux" begin
+    linear = (u, p, t) -> cos(2pi * t)
+    linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
+    tspan = (0.0, 1.0)
+    dt = (tspan[2] - tspan[1]) / 99
+    ts = collect(tspan[1]:dt:tspan[2])
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0, (0.0, 1.0))
+    opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
+    u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
+    fluxchain = Flux.Chain(Flux.Dense(1, 5, Flux.σ), Flux.Dense(5, 1))
+    alg1 = NNODE(fluxchain, opt)
+    @test alg1.chain isa Lux.AbstractExplicitLayer
+    sol1 = solve(prob, alg1, verbose = true, abstol = 1e-10, maxiters = 200)
+    @test sol1.errors[:l2] < 0.5
+end
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
index 4c58987aea..c0f8422a09 100644
--- a/test/NNODE_tstops_test.jl
+++ b/test/NNODE_tstops_test.jl
@@ -30,38 +30,47 @@ weights = [0.3, 0.3, 0.4]
 points = 3
 dx = 1.0
 
-#Grid Training without added points (difference between solutions should be high)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.GridTraining(dx))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
-
-@test abs(mean(sol) - mean(true_sol)) > threshold
-
-#Grid Training with added points (difference between solutions should be low)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.GridTraining(dx))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
-
-@test abs(mean(sol) - mean(true_sol)) < threshold
-
-#WeightedIntervalTraining without added points (difference between solutions should be high)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
-
-@test abs(mean(sol) - mean(true_sol)) > threshold
-
-#WeightedIntervalTraining with added points (difference between solutions should be low)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
-
-@test abs(mean(sol) - mean(true_sol)) < threshold
-
-#StochasticTraining without added points (difference between solutions should be high)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.StochasticTraining(points))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
-
-@test abs(mean(sol) - mean(true_sol)) > threshold
+@testset "GridTraining" begin
+    @testset "Without added points" begin
+        # (difference between solutions should be high)
+        alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        @test abs(mean(sol) - mean(true_sol)) > threshold
+    end
+    @testset "With added points" begin
+        # (difference between solutions should be low)
+        alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        @test abs(mean(sol) - mean(true_sol)) < threshold
+    end
+end
 
-#StochasticTraining with added points (difference between solutions should be low)
-alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.StochasticTraining(points))
-sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+@testset "WeightedIntervalTraining" begin
+    @testset "Without added points" begin
+        # (difference between solutions should be high)
+        alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        @test abs(mean(sol) - mean(true_sol)) > threshold
+    end
+    @testset "With added points" begin
+        # (difference between solutions should be low)
+        alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        @test abs(mean(sol) - mean(true_sol)) < threshold
+    end
+end
 
-@test abs(mean(sol) - mean(true_sol)) < threshold
+@testset "StochasticTraining" begin
+    @testset "Without added points" begin
+        # (difference between solutions should be high)
+        alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        @test abs(mean(sol) - mean(true_sol)) > threshold
+    end
+    @testset "With added points" begin
+        # (difference between solutions should be low)
+        alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
+        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        @test abs(mean(sol) - mean(true_sol)) < threshold
+    end
+end

From 5332a78df70fedcc513e26cc2d93a6b7a5b75a9a Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Tue, 30 Jan 2024 14:41:55 +0000
Subject: [PATCH 06/16] build: add Flux as test dep

---
 Project.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index bfd0eeb542..fd683ede3b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -52,6 +52,7 @@ DiffEqNoiseProcess = "5.1"
 Distributions = "0.23, 0.24, 0.25"
 DocStringExtensions = "0.8, 0.9"
 DomainSets = "0.6, 0.7"
+Flux = "0.14"
 ForwardDiff = "0.10"
 Functors = "0.4"
 Integrals = "4"
@@ -78,6 +79,7 @@ julia = "1.6"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
@@ -88,4 +90,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "cuDNN", "LuxCUDA"]
+test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "cuDNN", "LuxCUDA", "Flux"]

From 0b5910e1d14f65308ee1fc5c8b86586ad40b76dc Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 06:30:02 +0000
Subject: [PATCH 07/16] refactor: have auto translate from Flux to Lux for
 PhysicsInformedNN, BNNODE, BayesianPINN, ahmc_bayesian_pinn_ode

---
 src/BPINN_ode.jl        | 19 +++++++++--------
 src/PDE_BPINN.jl        | 37 +++++++++++++++------------------
 src/advancedHMC_MCMC.jl |  9 +++------
 src/pinn_types.jl       | 45 ++++++++++++++++++++---------------------
 4 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 06c4fa4feb..3919c6ad3b 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -1,15 +1,13 @@
 # HIGH level API for BPINN ODE solver
 
 """
-```julia
-BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-                    priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
-                    phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-                    MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing, 
-                    Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
-                    Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
-                    progress = false, verbose = false)
-```
+    BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+                        priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+                        phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing, 
+                        Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
+                        Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+                        progress = false, verbose = false)
 
 Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
 of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
@@ -116,6 +114,7 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         targetacceptancerate = 0.8),
     Integratorkwargs = (Integrator = Leapfrog,),
     autodiff = false, progress = false, verbose = false)
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     BNNODE(chain, Kernel, strategy,
         draw_samples, priorsNNw, param, l2std,
         phystd, dataset, physdt, MCMCkwargs,
@@ -267,4 +266,4 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
     end
 
     BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params, t)
-end
\ No newline at end of file
+end
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 65bbcd884a..02eb939bf6 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -244,17 +244,16 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
 end
 
 """
-```julia
-ahmc_bayesian_pinn_pde(pde_system, discretization;
-        draw_samples = 1000,
-        bcstd = [0.01], l2std = [0.05],
-        phystd = [0.05], priorsNNw = (0.0, 2.0),
-        param = [], nchains = 1, Kernel = HMC(0.1, 30),
-        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-        Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
-        numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)               
-```
+    ahmc_bayesian_pinn_pde(pde_system, discretization;
+            draw_samples = 1000,
+            bcstd = [0.01], l2std = [0.05],
+            phystd = [0.05], priorsNNw = (0.0, 2.0),
+            param = [], nchains = 1, Kernel = HMC(0.1, 30),
+            Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+            Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
+            numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)               
+
 ## NOTES 
 * Dataset is required for accurate Parameter estimation + solving equations.
 * Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN parameters
@@ -272,22 +271,18 @@ ahmc_bayesian_pinn_pde(pde_system, discretization;
 * `phystd`: Vector of standard deviations of BPINN prediction against Chosen Underlying PDE equations.
 * `priorsNNw`: Tuple of (mean, std) for BPINN Network parameters. Weights and Biases of BPINN are Normal Distributions by default.
 * `param`: Vector of chosen PDE's parameter's Distributions in case of Inverse problems.
-* `nchains`: number of chains you want to sample
-
-# AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
-* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl implemenations ).
+* `nchains`: number of chains you want to sample.
+* `Kernel`: Choice of MCMC Sampling Algorithm object HMC/NUTS/HMCDA (AdvancedHMC.jl implementations).
 * `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
-   Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default)
+   Note: Target percentage(in decimal) of iterations in which the proposals are accepted (0.8 by default).
 * `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
 * `saveats`: Grid spacing for each independant variable for evaluation of ensemble solution, estimated parameters.
 * `numensemble`: Number of last samples to take for creation of ensemble solution, estimated parameters.
 * `progress`: controls whether to show the progress meter or not.
-* `verbose`: controls the verbosity. (Sample call args in AHMC)
+* `verbose`: controls the verbosity. (Sample call args in AHMC).
 
-"""
-
-"""
-priors: pdf for W,b + pdf for PDE params
+## Warnings
+* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
         draw_samples = 1000,
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index aa1839557b..2e8ba4bd8b 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -423,8 +423,6 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
 * `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
 * `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
-
-# AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 * `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
 * `Integratorkwargs`: `Integrator`, `jitter_rate`, `tempering_rate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
 * `Adaptorkwargs`: `Adaptor`, `Metric`, `targetacceptancerate`. Refer: https://turinglang.org/AdvancedHMC.jl/stable/
@@ -439,10 +437,8 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `progress`: controls whether to show the progress meter or not.
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
-"""
-
-"""
-priors: pdf for W,b + pdf for ODE params
+## Warnings
+* AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     strategy = GridTraining, dataset = [nothing],
@@ -457,6 +453,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     MCMCkwargs = (n_leapfrog = 30,),
     progress = false, verbose = false)
 
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index e1c13fd949..5751089330 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -27,19 +27,17 @@ function logscalar(logger, s::R, name::AbstractString, step::Integer) where {R <
 end
 
 """
-```julia
-PhysicsInformedNN(chain,
-                  strategy;
-                  init_params = nothing,
-                  phi = nothing,
-                  param_estim = false,
-                  additional_loss = nothing,
-                  adaptive_loss = nothing,
-                  logger = nothing,
-                  log_options = LogOptions(),
-                  iteration = nothing,
-                  kwargs...) where {iip}
-```
+    PhysicsInformedNN(chain,
+                    strategy;
+                    init_params = nothing,
+                    phi = nothing,
+                    param_estim = false,
+                    additional_loss = nothing,
+                    adaptive_loss = nothing,
+                    logger = nothing,
+                    log_options = LogOptions(),
+                    iteration = nothing,
+                    kwargs...)
 
 A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
 `PDESystem` into an `OptimizationProblem` using the Physics-Informed Neural Networks (PINN)
@@ -47,11 +45,12 @@ methodology.
 
 ## Positional Arguments
 
-* `chain`: a vector of Lux.jl chains with a d-dimensional input and a
-  1-dimensional output corresponding to each of the dependent variables. Note that this
-  specification respects the order of the dependent variables as specified in the PDESystem.
+* `chain`: a vector of Lux/Flux chains with a d-dimensional input and a
+           1-dimensional output corresponding to each of the dependent variables. Note that this
+           specification respects the order of the dependent variables as specified in the PDESystem.
+           Flux chains will be converted to Lux internally using `Lux.transform`.
 * `strategy`: determines which training strategy will be used. See the Training Strategy
-  documentation for more details.
+              documentation for more details.
 
 ## Keyword Arguments
 
@@ -105,7 +104,7 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
             iteration = nothing,
             kwargs...)
         multioutput = chain isa AbstractArray
-
+        !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
         if phi === nothing
             if multioutput
                 _phi = Phi.(chain)
@@ -113,6 +112,7 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
                 _phi = Phi(chain)
             end
         else
+            !(phi.f isa Lux.AbstractExplicitLayer) && throw(ArgumentError("Only Lux Chains are supported"))
             _phi = phi
         end
 
@@ -149,8 +149,7 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
 end
 
 """
-```julia
-BayesianPINN(chain,
+    BayesianPINN(chain,
                   strategy;
                   init_params = nothing,
                   phi = nothing,
@@ -161,8 +160,7 @@ BayesianPINN(chain,
                   log_options = LogOptions(),
                   iteration = nothing,
                   dataset = nothing,
-                  kwargs...) where {iip}
-```
+                  kwargs...)
 
 A `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a
 `PDESystem` into a likelihood function used for HMC based Posterior Sampling Algorithms [AdvancedHMC.jl](https://turinglang.org/AdvancedHMC.jl/stable/)
@@ -234,7 +232,7 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
             dataset = nothing,
             kwargs...)
         multioutput = chain isa AbstractArray
-
+        !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
         if phi === nothing
             if multioutput
                 _phi = Phi.(chain)
@@ -242,6 +240,7 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
                 _phi = Phi(chain)
             end
         else
+            !(phi.f isa Lux.AbstractExplicitLayer) && throw(ArgumentError("Only Lux Chains are supported"))
             _phi = phi
         end
 

From cf62a2c1dbb1cd33fac35c532b6739dbe3633fd1 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 06:33:22 +0000
Subject: [PATCH 08/16] test: refactor tests into testsets and add tests for
 autotranslating

---
 test/BPINN_PDE_tests.jl       | 359 ++++++++++++++------------
 test/BPINN_PDEinvsol_tests.jl | 288 ++++++++++-----------
 test/BPINN_Tests.jl           | 436 +++++++++++++++++---------------
 test/IDE_tests.jl             | 310 +++++++++++------------
 test/NNPDE_tests.jl           |  54 ++--
 test/NNPDE_tests_gpu_Lux.jl   | 364 +++++++++++++--------------
 test/adaptive_loss_tests.jl   |  18 +-
 test/additional_loss_tests.jl | 458 ++++++++++++++++------------------
 test/direct_function_tests.jl | 217 ++++++++--------
 9 files changed, 1256 insertions(+), 1248 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index a63450e371..c3ed4d913e 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -7,163 +7,202 @@ using ComponentArrays
 
 Random.seed!(100)
 
-# Cospit example
-@parameters t
-@variables u(..)
-
-Dt = Differential(t)
-
-eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
-
-# non adaptive case
-discretization = NeuralPDE.BayesianPINN([chainl], GridTraining([0.01]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.02],
-    phystd = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0])
-
-analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-@test u_predict≈u_real atol=0.5
-@test mean(u_predict .- u_real) < 0.1
-
-## Example 1, 1D ode
-@parameters θ
-@variables u(..)
-Dθ = Differential(θ)
-
-# 1D ODE
-eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
-                u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
-
-# Initial and boundary conditions
-bcs = [u(0.0) ~ 1.0]
-
-# Space and time domains
-domains = [θ ∈ Interval(0.0, 1.0)]
-
-# Neural network
-chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
-
-discretization = BayesianPINN([chain], GridTraining([0.01]))
-
-@named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 500,
-    bcstd = [0.1],
-    phystd = [0.05],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0])
-
-analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
-ts = sol1.timepoints[1]
-u_real = vec([analytic_sol_func(t) for t in ts])
-u_predict = pmean(sol1.ensemblesol[1])
-@test u_predict≈u_real atol=0.8
-
-# example 3 (3 degree ODE)
-@parameters x
-@variables u(..), Dxu(..), Dxxu(..), O1(..), O2(..)
-Dxxx = Differential(x)^3
-Dx = Differential(x)
-
-# ODE
-eq = Dx(Dxxu(x)) ~ cos(pi * x)
-
-# Initial and boundary conditions
-ep = (cbrt(eps(eltype(Float64))))^2 / 6
-
-bcs = [u(0.0) ~ 0.0,
-    u(1.0) ~ cos(pi),
-    Dxu(1.0) ~ 1.0,
-    Dxu(x) ~ Dx(u(x)) + ep * O1(x),
-    Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0)]
-
-# Neural network
-chain = [
-    Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-        Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-        Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-        Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
-    Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1))]
-
-discretization = BayesianPINN(chain, GridTraining(0.01))
-
-@named pde_system = PDESystem(eq, bcs, domains, [x],
-    [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.01, 0.01, 0.01, 0.01, 0.01],
-    phystd = [0.005],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0])
-
-analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sin(π * x)) / (π^3)
-
-u_predict = pmean(sol1.ensemblesol[1])
-xs = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func(x) for x in xs]
-@test u_predict≈u_real atol=0.5
-
-# 2D Poissons equation 
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
-
-# Discretization
-dx = 0.05
-discretization = BayesianPINN([chain], GridTraining(dx))
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 200,
-    bcstd = [0.003, 0.003, 0.003, 0.003],
-    phystd = [0.003],
-    priorsNNw = (0.0, 10.0),
-    saveats = [1 / 100.0, 1 / 100.0])
-
-xs = sol1.timepoints[1]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-u_predict = pmean(sol1.ensemblesol[1])
-u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-diff_u = abs.(u_predict .- u_real)
-@test u_predict≈u_real atol=1.5
+@testset "Example 1: 2D Periodic System" begin
+    # Cos(pi*t) example
+    @parameters t
+    @variables u(..)
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+    @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
+
+    # non adaptive case
+    discretization = BayesianPINN([chainl], GridTraining([0.01]))
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.02],
+        phystd = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0])
+
+    analytic_sol_func(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+    @test u_predict≈u_real atol=0.5
+    @test mean(u_predict .- u_real) < 0.1
+end
+
+@testset "Example 2: 1D ODE" begin
+    @parameters θ
+    @variables u(..)
+    Dθ = Differential(θ)
+
+    # 1D ODE
+    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
+
+    # Initial and boundary conditions
+    bcs = [u(0.0) ~ 1.0]
+
+    # Space and time domains
+    domains = [θ ∈ Interval(0.0, 1.0)]
+
+    # Neural network
+    chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
+
+    discretization = BayesianPINN([chain], GridTraining([0.01]))
+
+    @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 500,
+        bcstd = [0.1],
+        phystd = [0.05],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0])
+
+    analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
+    ts = sol1.timepoints[1]
+    u_real = vec([analytic_sol_func(t) for t in ts])
+    u_predict = pmean(sol1.ensemblesol[1])
+    @test u_predict≈u_real atol=0.8
+end
+
+@testset "Example 3: 3rd Degree ODE" begin
+    @parameters x
+    @variables u(..), Dxu(..), Dxxu(..), O1(..), O2(..)
+    Dxxx = Differential(x)^3
+    Dx = Differential(x)
+
+    # ODE
+    eq = Dx(Dxxu(x)) ~ cos(pi * x)
+
+    # Initial and boundary conditions
+    ep = (cbrt(eps(eltype(Float64))))^2 / 6
+
+    bcs = [u(0.0) ~ 0.0,
+        u(1.0) ~ cos(pi),
+        Dxu(1.0) ~ 1.0,
+        Dxu(x) ~ Dx(u(x)) + ep * O1(x),
+        Dxxu(x) ~ Dx(Dxu(x)) + ep * O2(x)]
+
+    # Space and time domains
+    domains = [x ∈ Interval(0.0, 1.0)]
+
+    # Neural network
+    chain = [
+        Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
+            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
+            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
+            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
+        Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1))]
+
+    discretization = BayesianPINN(chain, GridTraining(0.01))
+
+    @named pde_system = PDESystem(eq, bcs, domains, [x],
+        [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 200,
+        bcstd = [0.01, 0.01, 0.01, 0.01, 0.01],
+        phystd = [0.005],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0])
+
+    analytic_sol_func(x) = (π * x * (-x + (π^2) * (2 * x - 3) + 1) - sin(π * x)) / (π^3)
+
+    u_predict = pmean(sol1.ensemblesol[1])
+    xs = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func(x) for x in xs]
+    @test u_predict≈u_real atol=0.5
+end
+
+@testset "Example 4: 2D Poissons equation" begin
+    @parameters x y
+    @variables u(..)
+    Dxx = Differential(x)^2
+    Dyy = Differential(y)^2
+
+    # 2D PDE
+    eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+    # Boundary conditions
+    bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0.0,
+        u(x, 0) ~ 0.0, u(x, 1) ~ 0.0]
+
+    # Space and time domains
+    domains = [x ∈ Interval(0.0, 1.0),
+        y ∈ Interval(0.0, 1.0)]
+
+    # Neural network
+    dim = 2 # number of dimensions
+    chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
+
+    # Discretization
+    dx = 0.05
+    discretization = BayesianPINN([chain], GridTraining(dx))
+
+    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 200,
+        bcstd = [0.003, 0.003, 0.003, 0.003],
+        phystd = [0.003],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0, 1 / 100.0])
+
+    xs = sol1.timepoints[1]
+    analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+    u_predict = pmean(sol1.ensemblesol[1])
+    u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
+    diff_u = abs.(u_predict .- u_real)
+    @test u_predict≈u_real atol=1.5
+end
+
+@testset "Translating from Flux" begin
+    @parameters θ
+    @variables u(..)
+    Dθ = Differential(θ)
+
+    # 1D ODE
+    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
+
+    # Initial and boundary conditions
+    bcs = [u(0.0) ~ 1.0]
+
+    # Space and time domains
+    domains = [θ ∈ Interval(0.0, 1.0)]
+
+    # Neural network
+    chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
+
+    discretization = BayesianPINN([chain], GridTraining([0.01]))
+    @test discretization.chain isa Lux.AbstractExplicitLayer
+
+    @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 500,
+        bcstd = [0.1],
+        phystd = [0.05],
+        priorsNNw = (0.0, 10.0),
+        saveats = [1 / 100.0])
+
+    analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
+    ts = sol1.timepoints[1]
+    u_real = vec([analytic_sol_func(t) for t in ts])
+    u_predict = pmean(sol1.ensemblesol[1])
+    @test u_predict≈u_real atol=0.8
+end
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index e8e72d8797..61c64de27e 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -7,149 +7,149 @@ using ComponentArrays
 
 Random.seed!(100)
 
-# Cos(pit) periodic curve (Parameter Estimation)
-println("Example 1, 2d Periodic System")
-@parameters t, p
-@variables u(..)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs,
-    bcs,
-    domains,
-    [t],
-    [u(t)],
-    [p],
-    defaults = Dict([p => 4.0]))
-
-analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
-timepoints = collect(0.0:(1 / 100.0):2.0)
-u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
-u = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [hcat(u, timepoints)]
-
-# checking all training strategies
-discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true, 
-                              dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true, 
-                              dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true, 
-                              dataset = [dataset, nothing])
-
-ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true, 
-                              dataset = [dataset, nothing])
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.05],
-    phystd = [0.01], l2std = [0.01],
-    priorsNNw = (0.0, 1.0),
-    saveats = [1 / 50.0],
-    param = [LogNormal(6.0, 0.5)])
-
-param = 2 * π
-ts = vec(sol1.timepoints[1])
-u_real = [analytic_sol_func1(0.0, t) for t in ts]
-u_predict = pmean(sol1.ensemblesol[1])
-
-@test u_predict≈u_real atol=1.5
-@test mean(u_predict .- u_real) < 0.1
-@test sol1.estimated_de_params[1]≈param atol=param * 0.3
-
-## Example Lorenz System (Parameter Estimation)
-println("Example 2, Lorenz System")
-@parameters t, σ_
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-
-input_ = length(domains)
-n = 7
-chain = [
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-    Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-        Lux.Dense(n, 1)),
-]
-
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
+@testset "Example 1: 2D Periodic System with parameter estimation" begin
+    # Cos(pi*t) periodic curve
+    @parameters t, p
+    @variables u(..)
+
+    Dt = Differential(t)
+    eqs = Dt(u(t)) - cos(p * t) ~ 0
+    bcs = [u(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+
+    chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+    initl, st = Lux.setup(Random.default_rng(), chainl)
+
+    @named pde_system = PDESystem(eqs,
+        bcs,
+        domains,
+        [t],
+        [u(t)],
+        [p],
+        defaults = Dict([p => 4.0]))
+
+    analytic_sol_func1(u0, t) = u0 + sin(2 * π * t) / (2 * π)
+    timepoints = collect(0.0:(1 / 100.0):2.0)
+    u = [analytic_sol_func1(0.0, timepoint) for timepoint in timepoints]
+    u = u .+ (u .* 0.2) .* randn(size(u))
+    dataset = [hcat(u, timepoints)]
+
+    # checking all training strategies
+    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true, 
+                                dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true, 
+                                dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true, 
+                                dataset = [dataset, nothing])
+
+    ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true, 
+                                dataset = [dataset, nothing])
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 1500,
+        bcstd = [0.05],
+        phystd = [0.01], l2std = [0.01],
+        priorsNNw = (0.0, 1.0),
+        saveats = [1 / 50.0],
+        param = [LogNormal(6.0, 0.5)])
+
+    param = 2 * π
+    ts = vec(sol1.timepoints[1])
+    u_real = [analytic_sol_func1(0.0, t) for t in ts]
+    u_predict = pmean(sol1.ensemblesol[1])
+
+    @test u_predict≈u_real atol=1.5
+    @test mean(u_predict .- u_real) < 0.1
+    @test sol1.estimated_de_params[1]≈param atol=param * 0.3
 end
 
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
-ts = sol.t
-us = hcat(sol.u...)
-us = us .+ ((0.05 .* randn(size(us))) .* us)
-ts_ = hcat(sol(ts).t...)[1, :]
-dataset = [hcat(us[i, :], ts_) for i in 1:3]
-
-discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true, 
-                              dataset = [dataset, nothing])
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-    [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
-
-sol1 = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 50,
-    bcstd = [0.3, 0.3, 0.3],
-    phystd = [0.1, 0.1, 0.1],
-    l2std = [1, 1, 1],
-    priorsNNw = (0.0, 1.0),
-    saveats = [0.01],
-    param = [Normal(12.0, 2)])
-
-idealp = 10.0
-p_ = sol1.estimated_de_params[1]
-
-@test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
-# @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+@testset "Example 2: Lorenz System with parameter estimation" begin
+    @parameters t, σ_
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (28.0 - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - 8 / 3 * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+
+    input_ = length(domains)
+    n = 7
+    chain = [
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+        Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
+            Lux.Dense(n, 1)),
+    ]
+
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
+
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.01, saveat = 0.05)
+    ts = sol.t
+    us = hcat(sol.u...)
+    us = us .+ ((0.05 .* randn(size(us))) .* us)
+    ts_ = hcat(sol(ts).t...)[1, :]
+    dataset = [hcat(us[i, :], ts_) for i in 1:3]
+
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true, 
+                                dataset = [dataset, nothing])
+
+    @named pde_system = PDESystem(eqs, bcs, domains,
+        [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
+
+    sol1 = ahmc_bayesian_pinn_pde(pde_system,
+        discretization;
+        draw_samples = 50,
+        bcstd = [0.3, 0.3, 0.3],
+        phystd = [0.1, 0.1, 0.1],
+        l2std = [1, 1, 1],
+        priorsNNw = (0.0, 1.0),
+        saveats = [0.01],
+        param = [Normal(12.0, 2)])
+
+    idealp = 10.0
+    p_ = sol1.estimated_de_params[1]
+    @test sum(abs, pmean(p_) - 10.00) < 0.3 * idealp[1]
+    # @test sum(abs, pmean(p_[2]) - (8 / 3)) < 0.3 * idealp[2]
+end
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index fd43937ded..6277133158 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -9,206 +9,236 @@ using NeuralPDE, MonteCarloMeasurements
 # on latest Julia version it performs much better for below tests
 Random.seed!(100)
 
-## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-linear = (u, p, t) -> cos(2 * π * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
-p = prob.p
-
-# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
-ta = range(tspan[1], tspan[2], length = 300)
-u = [linear_analytic(u0, nothing, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
-time = vec(collect(Float64, ta))
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve() call must match saveat(1/50.0) arg
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-θinit, st = Lux.setup(Random.default_rng(), chainlux)
-
-fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux, draw_samples = 2500)
-
-alg = BNNODE(chainlux, draw_samples = 2500)
-sol1lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves[Ensemble predictions]
-θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:2500]
-luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call
-@test mean(abs.(x̂ .- meanscurve)) < 0.05
-@test mean(abs.(physsol1 .- meanscurve)) < 0.005
-
-#--------------------- solve() call 
-@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
-@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
-
-## PROBLEM-1 (WITH PARAMETER ESTIMATION)
-linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
-linear = (u, p, t) -> cos(p * t)
-tspan = (0.0, 2.0)
-u0 = 0.0
-p = 2 * pi
-prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
-
-# Numerical and Analytical Solutions
-sol1 = solve(prob, Tsit5(); saveat = 0.01)
-u = sol1.u
-time = sol1.t
-
-# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
-ta = range(tspan[1], tspan[2], length = 100)
-u = [linear_analytic(u0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
-
-# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
-ta0 = range(tspan[1], tspan[2], length = 101)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
-time1 = vec(collect(Float64, ta0))
-physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
-θinit, st = Lux.setup(Random.default_rng(), chainlux1)
-
-fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux1,
-    dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0,
-    priorsNNw = (0.0, 3.0),
-    param = [LogNormal(9, 0.5)])
-
-alg = BNNODE(chainlux1, dataset = dataset,
-    draw_samples = 2500,
-    physdt = 1 / 50.0,
-    priorsNNw = (0.0,
-        3.0),
-    param = [
-        LogNormal(9,
-            0.5),
-    ])
-
-sol2lux = solve(prob, alg)
-
-# testing points
-t = time
-# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
-θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:2500]
-luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-# --------------------- ahmc_bayesian_pinn_ode() call  
-@test mean(abs.(physsol1 .- meanscurve)) < 0.15
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - mean([fhsamples[i][23] for i in 2000:2500])) < abs(0.35 * p)
-
-#-------------------------- solve() call  
-@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
-
-# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
-@test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
-
-## PROBLEM-2
-linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
-tspan = (0.0, 10.0)
-u0 = 0.0
-p = -5.0
-prob = ODEProblem(linear, u0, tspan, p)
-linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
-
-# SOLUTION AND CREATE DATASET
-sol = solve(prob, Tsit5(); saveat = 0.1)
-u = sol.u
-time = sol.t
-x̂ = u .+ (u .* 0.2) .* randn(size(u))
-dataset = [x̂, time]
-t = sol.t
-physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
-
-ta0 = range(tspan[1], tspan[2], length = 501)
-u1 = [linear_analytic(u0, p, ti) for ti in ta0]
-time1 = vec(collect(Float64, ta0))
-physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
-
-chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
-θinit, st = Lux.setup(Random.default_rng(), chainlux12)
-
-fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0))
-
-fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ])
-
-alg = BNNODE(chainlux12,
-    dataset = dataset,
-    draw_samples = 1500,
-    l2std = [0.03],
-    phystd = [0.03],
-    priorsNNw = (0.0,
-        10.0),
-    param = [
-        Normal(-7,
-            4),
-    ])
-
-sol3lux_pestim = solve(prob, alg)
-
-# testing timepoints
-t = sol.t
-#------------------------------ ahmc_bayesian_pinn_ode() call
-# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
-θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
-luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
-luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
-meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
-
-@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
-@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
-@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
-@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
-
-# estimated parameters(lux chain)
-param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
-@test abs(param1 - p) < abs(0.3 * p)
-
-#-------------------------- solve() call 
-# (lux chain)
-@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
-# estimated parameters(lux chain)
-param1 = sol3lux_pestim.estimated_de_params[1]
-@test abs(param1 - p) < abs(0.45 * p)
+@testset "Example 1 - without parameter estimation" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+    linear = (u, p, t) -> cos(2 * π * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+    p = prob.p
+
+    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+    ta = range(tspan[1], tspan[2], length = 300)
+    u = [linear_analytic(u0, nothing, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve() call must match saveat(1/50.0) arg
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux, draw_samples = 2500)
+
+    alg = BNNODE(chainlux, draw_samples = 2500)
+    sol1lux = solve(prob, alg)
+
+    # testing points
+    t = time
+    # Mean of last 500 sampled parameter's curves[Ensemble predictions]
+    θ = [vector_to_parameters(fhsamples[i], θinit) for i in 2000:2500]
+    luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    # --------------------- ahmc_bayesian_pinn_ode() call
+    @test mean(abs.(x̂ .- meanscurve)) < 0.05
+    @test mean(abs.(physsol1 .- meanscurve)) < 0.005
+
+    #--------------------- solve() call 
+    @test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+    @test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+end
+
+@testset "Example 2 - with parameter estimation" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+    linear = (u, p, t) -> cos(p * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    p = 2 * pi
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+    # Numerical and Analytical Solutions
+    sol1 = solve(prob, Tsit5(); saveat = 0.01)
+    u = sol1.u
+    time = sol1.t
+
+    # BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+    ta = range(tspan[1], tspan[2], length = 100)
+    u = [linear_analytic(u0, p, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    dataset = [x̂, time]
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux1,
+        dataset = dataset,
+        draw_samples = 2500,
+        physdt = 1 / 50.0,
+        priorsNNw = (0.0, 3.0),
+        param = [LogNormal(9, 0.5)])
+
+    alg = BNNODE(chainlux1, dataset = dataset,
+        draw_samples = 2500,
+        physdt = 1 / 50.0,
+        priorsNNw = (0.0,
+            3.0),
+        param = [
+            LogNormal(9,
+                0.5),
+        ])
+
+    sol2lux = solve(prob, alg)
+
+    # testing points
+    t = time
+    # Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+    θ = [vector_to_parameters(fhsamples[i][1:(end - 1)], θinit) for i in 2000:2500]
+    luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    # --------------------- ahmc_bayesian_pinn_ode() call  
+    @test mean(abs.(physsol1 .- meanscurve)) < 0.15
+
+    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+    @test abs(p - mean([fhsamples[i][23] for i in 2000:2500])) < abs(0.35 * p)
+
+    #-------------------------- solve() call  
+    @test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+    # ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+    @test abs(p - sol2lux.estimated_de_params[1]) < abs(0.15 * p)
+end
+
+@testset "Example 3" begin
+    linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+    tspan = (0.0, 10.0)
+    u0 = 0.0
+    p = -5.0
+    prob = ODEProblem(linear, u0, tspan, p)
+    linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+    # SOLUTION AND CREATE DATASET
+    sol = solve(prob, Tsit5(); saveat = 0.1)
+    u = sol.u
+    time = sol.t
+    x̂ = u .+ (u .* 0.2) .* randn(size(u))
+    dataset = [x̂, time]
+    t = sol.t
+    physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+    ta0 = range(tspan[1], tspan[2], length = 501)
+    u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+    time1 = vec(collect(Float64, ta0))
+    physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+    chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+    θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0))
+
+    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+        dataset = dataset,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0),
+        param = [
+            Normal(-7,
+                4),
+        ])
+
+    alg = BNNODE(chainlux12,
+        dataset = dataset,
+        draw_samples = 1500,
+        l2std = [0.03],
+        phystd = [0.03],
+        priorsNNw = (0.0,
+            10.0),
+        param = [
+            Normal(-7,
+                4),
+        ])
+
+    sol3lux_pestim = solve(prob, alg)
+
+    # testing timepoints
+    t = sol.t
+    #------------------------------ ahmc_bayesian_pinn_ode() call
+    # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+    θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+    luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+    luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+    meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+    @test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+    @test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+    @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+    @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+    # estimated parameters(lux chain)
+    param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+    @test abs(param1 - p) < abs(0.3 * p)
+
+    #-------------------------- solve() call 
+    # (lux chain)
+    @test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+    # estimated parameters(lux chain)
+    param1 = sol3lux_pestim.estimated_de_params[1]
+    @test abs(param1 - p) < abs(0.45 * p)
+end
+
+@testset "Translating from Flux" begin
+    linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+    linear = (u, p, t) -> cos(2 * π * t)
+    tspan = (0.0, 2.0)
+    u0 = 0.0
+    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+    p = prob.p
+
+    # Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+    ta = range(tspan[1], tspan[2], length = 300)
+    u = [linear_analytic(u0, nothing, ti) for ti in ta]
+    x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+    time = vec(collect(Float64, ta))
+    physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+    # testing points for solve() call must match saveat(1/50.0) arg
+    ta0 = range(tspan[1], tspan[2], length = 101)
+    u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+    x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+    time1 = vec(collect(Float64, ta0))
+    physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+    chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainflux, draw_samples = 2500)
+    alg = BNNODE(chainflux, draw_samples = 2500)
+    @test alg.chain isa Lux.AbstractExplicitLayer
+end
diff --git a/test/IDE_tests.jl b/test/IDE_tests.jl
index 0033a658d4..d9aa9eb3cf 100644
--- a/test/IDE_tests.jl
+++ b/test/IDE_tests.jl
@@ -1,7 +1,7 @@
 using Test, NeuralPDE
 using Optimization, OptimizationOptimJL
 import ModelingToolkit: Interval
-using DomainSets
+using DomainSets, Flux
 import Lux
 
 using Random
@@ -12,172 +12,158 @@ callback = function (p, l)
     return false
 end
 
-mse(x, y) = sum(abs2, x .- y)
-
-#Integration Tests
-println("Integral Tests")
-@parameters t
-@variables i(..)
-Di = Differential(t)
-Ii = Integral(t in DomainSets.ClosedInterval(0, t))
-eq = Di(i(t)) + 2 * i(t) + 5 * Ii(i(t)) ~ 1
-bcs = [i(0.0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 2.0)]
-chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
-strategy_ = GridTraining(0.1)
-discretization = PhysicsInformedNN(chain, strategy_)
-@named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
-prob = discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 100)
-ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-phi = discretization.phi
-
-analytic_sol_func(t) = 1 / 2 * (exp(-t)) * (sin(2 * t))
-u_real = [analytic_sol_func(t) for t in ts]
-u_predict = [first(phi([t], res.minimizer)) for t in ts]
-@test mse(u_real, u_predict) < 0.01
-
-## Simple Integral Test
-println("Simple Integral Test")
+@testset "Example 1 - 1D" begin
+    @parameters t
+    @variables i(..)
+    Di = Differential(t)
+    Ii = Integral(t in DomainSets.ClosedInterval(0, t))
+    eq = Di(i(t)) + 2 * i(t) + 5 * Ii(i(t)) ~ 1
+    bcs = [i(0.0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 2.0)]
+    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_)
+    @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
+    ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    phi = discretization.phi
+    analytic_sol_func(t) = 1 / 2 * (exp(-t)) * (sin(2 * t))
+    u_real = [analytic_sol_func(t) for t in ts]
+    u_predict = [first(phi([t], res.minimizer)) for t in ts]
+    @test Flux.mse(u_real, u_predict) < 0.01
+end
 
-@parameters x
-@variables u(..)
-Ix = Integral(x in DomainSets.ClosedInterval(0, x))
-# eq = Ix(u(x)) ~ (x^3)/3
+@testset "Example 2 - 1D" begin
+    @parameters x
+    @variables u(..)
+    Ix = Integral(x in DomainSets.ClosedInterval(0, x))
+eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 
-bcs = [u(0.0) ~ 0.0]
-domains = [x ∈ Interval(0.0, 1.00)]
-chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
-strategy_ = GridTraining(0.1)
-discretization = PhysicsInformedNN(chain, strategy_)
-@named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-prob = discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 200)
-xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-phi = discretization.phi
-u_predict = [first(phi([x], res.minimizer)) for x in xs]
-u_real = [x^2 / cos(x) for x in xs]
-@test mse(u_real, u_predict) < 0.001
-
-#simple multidimensitonal integral test
-println("simple multidimensitonal integral test")
-@parameters x, y
-@variables u(..)
-Dx = Differential(x)
-Dy = Differential(y)
-Ix = Integral((x, y) in DomainSets.UnitSquare())
-eq = Ix(u(x, y)) ~ 1 / 3
-bcs = [u(0.0, 0.0) ~ 1, Dx(u(x, y)) ~ -2 * x, Dy(u(x, y)) ~ -2 * y]
-domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
-strategy_ = GridTraining(0.1)
-discretization = PhysicsInformedNN(chain, strategy_)
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 100)
-xs = 0.00:0.01:1.00
-ys = 0.00:0.01:1.00
-phi = discretization.phi
-u_real = collect(1 - x^2 - y^2 for y in ys, x in xs);
-u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
-@test mse(u_real, u_predict) < 0.001
-
-@parameters x, y
-@variables u(..)
-Dx = Differential(x)
-Dy = Differential(y)
-Ix = Integral((x, y) in DomainSets.ProductDomain(UnitInterval(), ClosedInterval(0, x)))
-eq = Ix(u(x, y)) ~ 5 / 12
-bcs = [u(0.0, 0.0) ~ 0, Dy(u(x, y)) ~ 2 * y, u(x, 0) ~ x]
-domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
-chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
-strategy_ = GridTraining(0.1)
-discretization = PhysicsInformedNN(chain, strategy_)
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = discretize(pde_system, discretization)
-res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 100)
-xs = 0.00:0.01:1.00
-ys = 0.00:0.01:1.00
-phi = discretization.phi
-
-u_real = collect(x + y^2 for y in ys, x in xs);
-u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
-@test mse(u_real, u_predict) < 0.01
-
-## Two variables Integral Test
-println("Two variables Integral Test")
-
-@parameters x
-@variables u(..) w(..)
-Dx = Differential(x)
-Ix = Integral(x in DomainSets.ClosedInterval(1, x))
-
-eqs = [Ix(u(x) * w(x)) ~ log(abs(x)),
-    Dx(w(x)) ~ -2 / (x^3),
-    u(x) ~ x]
+    eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
+
+    bcs = [u(0.0) ~ 0.0]
+    domains = [x ∈ Interval(0.0, 1.00)]
+    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_)
+    @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
+                            maxiters = 200)
+    xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    phi = discretization.phi
+    u_predict = [first(phi([x], res.minimizer)) for x in xs]
+    u_real = [x^2 / cos(x) for x in xs]
+    @test Flux.mse(u_real, u_predict) < 0.001
+end
 
-bcs = [u(1.0) ~ 1.0, w(1.0) ~ 1.0]
-domains = [x ∈ Interval(1.0, 2.0)]
+@testset "Example 3 - 2 Inputs, 1 Ouput" begin
+    @parameters x, y
+    @variables u(..)
+    Dx = Differential(x)
+    Dy = Differential(y)
+    Ix = Integral((x, y) in DomainSets.UnitSquare())
+    eq = Ix(u(x, y)) ~ 1 / 3
+    bcs = [u(0.0, 0.0) ~ 1, Dx(u(x, y)) ~ -2 * x, Dy(u(x, y)) ~ -2 * y]
+    domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
+    chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_)
+    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
+    xs = 0.00:0.01:1.00
+    ys = 0.00:0.01:1.00
+    phi = discretization.phi
+    u_real = collect(1 - x^2 - y^2 for y in ys, x in xs);
+    u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
+    @test Flux.mse(u_real, u_predict) < 0.001
+end
 
-chains = [Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1)) for _ in 1:2]
-strategy_ = GridTraining(0.1)
-discretization = PhysicsInformedNN(chains, strategy_)
-@named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x), w(x)])
-prob = discretize(pde_system, discretization)
-res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 200)
-xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-phi = discretization.phi
+@testset "Example 4 - 2 Inputs, 1 Ouput" begin
+    @parameters x, y
+    @variables u(..)
+    Dx = Differential(x)
+    Dy = Differential(y)
+    Ix = Integral((x, y) in DomainSets.ProductDomain(UnitInterval(), ClosedInterval(0, x)))
+    eq = Ix(u(x, y)) ~ 5 / 12
+    bcs = [u(0.0, 0.0) ~ 0, Dy(u(x, y)) ~ 2 * y, u(x, 0) ~ x]
+    domains = [x ∈ Interval(0.0, 1.00), y ∈ Interval(0.0, 1.00)]
+    chain = Lux.Chain(Lux.Dense(2, 15, Lux.σ), Lux.Dense(15, 1))
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chain, strategy_)
+    @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 100)
+    xs = 0.00:0.01:1.00
+    ys = 0.00:0.01:1.00
+    phi = discretization.phi
+    u_real = collect(x + y^2 for y in ys, x in xs);
+    u_predict = collect(Array(phi([x, y], res.minimizer))[1] for y in ys, x in xs);
+    @test Flux.mse(u_real, u_predict) < 0.01
+end
 
-u_predict = [(phi[1]([x], res.u.depvar.u))[1] for x in xs]
-w_predict = [(phi[2]([x], res.u.depvar.w))[1] for x in xs]
-u_real = [x for x in xs]
-w_real = [1 / x^2 for x in xs]
-@test mse(u_real, u_predict) < 0.001
-@test mse(w_real, w_predict) < 0.001
+@testset "Example 5 - 1 Input, 2 Outputs" begin
+    @parameters x
+    @variables u(..) w(..)
+    Dx = Differential(x)
+    Ix = Integral(x in DomainSets.ClosedInterval(1, x))
+    eqs = [Ix(u(x) * w(x)) ~ log(abs(x)), Dx(w(x)) ~ -2 / (x^3), u(x) ~ x]
+    bcs = [u(1.0) ~ 1.0, w(1.0) ~ 1.0]
+    domains = [x ∈ Interval(1.0, 2.0)]
+    chains = [Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1)) for _ in 1:2]
+    strategy_ = GridTraining(0.1)
+    discretization = PhysicsInformedNN(chains, strategy_)
+    @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x), w(x)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 200)
+    xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    phi = discretization.phi
+    u_predict = [(phi[1]([x], res.u.depvar.u))[1] for x in xs]
+    w_predict = [(phi[2]([x], res.u.depvar.w))[1] for x in xs]
+    u_real = [x for x in xs]
+    w_real = [1 / x^2 for x in xs]
+    @test Flux.mse(u_real, u_predict) < 0.001
+    @test Flux.mse(w_real, w_predict) < 0.001
+end
 
-## Infinity Integral Test
-println("Infinity Integral Test")
-@parameters x
-@variables u(..)
-I = Integral(x in ClosedInterval(1, x))
-Iinf = Integral(x in ClosedInterval(1, Inf))
-eqs = [I(u(x)) ~ Iinf(u(x)) - 1 / x]
-bcs = [u(1) ~ 1]
-domains = [x ∈ Interval(1.0, 2.0)]
-chain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 1))
-discretization = PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
-@named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
-prob = discretize(pde_system, discretization)
-res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 200)
-xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-phi = discretization.phi
-u_predict = [first(phi([x], res.minimizer)) for x in xs]
-u_real = [1 / x^2 for x in xs]
-@test u_real≈u_predict rtol=10^-2
+@testset "Example 6: Infinity" begin
+    @parameters x
+    @variables u(..)
+    I = Integral(x in ClosedInterval(1, x))
+    Iinf = Integral(x in ClosedInterval(1, Inf))
+    eqs = [I(u(x)) ~ Iinf(u(x)) - 1 / x]
+    bcs = [u(1) ~ 1]
+    domains = [x ∈ Interval(1.0, 2.0)]
+    chain = Lux.Chain(Lux.Dense(1, 10, Lux.σ), Lux.Dense(10, 1))
+    discretization = PhysicsInformedNN(chain, NeuralPDE.GridTraining(0.1))
+    @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 200)
+    xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    phi = discretization.phi
+    u_predict = [first(phi([x], res.minimizer)) for x in xs]
+    u_real = [1 / x^2 for x in xs]
+    @test u_real≈u_predict rtol=10^-2
+end
 
-# Infinity Integral equation Test
-println("Infinity Integral equation Test")
-@parameters x
-@variables u(..)
-I = Integral(x in ClosedInterval(x, Inf))
-eq = I(u(x)) ~ 1 / x
-domains = [x ∈ Interval(1.0, 2.0)]
-bcs = [u(1) ~ 1]
-chain = Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 1))
-discretization = PhysicsInformedNN(chain, GridTraining(0.1))
-@named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
-prob = discretize(pde_system, discretization)
-res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                         maxiters = 300)
-xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
-phi = discretization.phi
-u_predict = [first(phi([x], res.minimizer)) for x in xs]
-u_real = [1 / x^2 for x in xs]
-@test u_real≈u_predict rtol=10^-2
+@testset "Example 7: Infinity" begin
+    @parameters x
+    @variables u(..)
+    I = Integral(x in ClosedInterval(x, Inf))
+    eq = I(u(x)) ~ 1 / x
+    domains = [x ∈ Interval(1.0, 2.0)]
+    bcs = [u(1) ~ 1]
+    chain = Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 1))
+    discretization = PhysicsInformedNN(chain, GridTraining(0.1))
+    @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimJL.BFGS(); callback = callback, maxiters = 300)
+    xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    phi = discretization.phi
+    u_predict = [first(phi([x], res.minimizer)) for x in xs]
+    u_real = [1 / x^2 for x in xs]
+    @test u_real≈u_predict rtol=10^-2
+end
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index 1ed22ea8ba..e47ab36d16 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -6,6 +6,7 @@ import ModelingToolkit: Interval, infimum, supremum
 using DomainSets
 import Lux
 using LineSearches
+using Flux
 
 using Random
 Random.seed!(100)
@@ -15,7 +16,6 @@ callback = function (p, l)
     return false
 end
 
-## Example 1, 1D ode
 function test_ode(strategy_)
     println("Example 1, 1D ode: strategy: $(nameof(typeof(strategy_)))")
     @parameters θ
@@ -35,11 +35,9 @@ function test_ode(strategy_)
     # Neural network
     chain = Lux.Chain(Lux.Dense(1, 12, Lux.σ), Lux.Dense(12, 1))
 
-    discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                                 strategy_)
-
+    discretization = PhysicsInformedNN(chain, strategy_)
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
 
     res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 1000)
     prob = remake(prob, u0 = res.minimizer)
@@ -47,12 +45,10 @@ function test_ode(strategy_)
     prob = remake(prob, u0 = res.minimizer)
     res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 500)
     phi = discretization.phi
-
     analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
     ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     u_real = [analytic_sol_func(t) for t in ts]
     u_predict = [first(phi(t, res.minimizer)) for t in ts]
-
     @test u_predict≈u_real atol=0.1
 end
 
@@ -78,13 +74,13 @@ strategies = [
     quasirandom_strategy_resampling,
     quadrature_strategy,
 ]
-@testset "Test ODE/Heterogeneous" begin
 
-map(strategies) do strategy_
-    test_ode(strategy_)
-end end
+@testset "Test ODE/Heterogeneous" begin
+    map(strategies) do strategy_
+        test_ode(strategy_)
+    end 
+end
 
-## Heterogeneous system
 @testset "Example 1: Heterogeneous system" begin
     @parameters x, y, z
     @variables u(..), v(..), h(..), p(..)
@@ -162,7 +158,6 @@ end end
     end
 end
 
-## Example 2, 2D Poisson equation
 function test_2d_poisson_equation(chain_, strategy_)
     println("Example 2, 2D Poisson equation, chain: $(nameof(typeof(chain_))), strategy: $(nameof(typeof(strategy_)))")
     @parameters x y
@@ -205,7 +200,6 @@ end
                            Lux.Dense(12, 1))
         test_2d_poisson_equation(chain_, strategy_)
     end
-
     algs = [CubatureJLp()] #CubatureJLh(),
     for alg in algs
         chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
@@ -216,7 +210,6 @@ end
     end
 end
 
-## Example 3, 3rd-order
 @testset "Example 3, 3rd-order ode" begin
     @parameters x
     @variables u(..), Dxu(..), Dxxu(..), O1(..), O2(..)
@@ -275,7 +268,6 @@ end
     @test u_predict≈u_real atol=10^-4
 end
 
-## Example 4, system of pde
 @testset "Example 4, system of pde" begin
     @parameters x, y
     @variables u1(..), u2(..)
@@ -323,7 +315,6 @@ end
     @test u_predict[2]≈u_real[2] atol=0.1
 end
 
-## Example 5, 2d wave equation, neumann boundary condition
 @testset "Example 5, 2d wave equation, neumann boundary condition" begin
     #here we use low level api for build solution
     @parameters x, t
@@ -380,7 +371,6 @@ end
     @test u_predict≈u_real atol=0.1
 end
 
-## Example 6, pde with mixed derivative
 @testset "Example 6, pde with mixed derivative" begin
     @parameters x y
     @variables u(..)
@@ -424,3 +414,31 @@ end
                      (length(xs), length(ys)))
     @test u_predict≈u_real rtol=0.1
 end
+
+@testset "Translating from Flux" begin
+    @parameters θ
+    @variables u(..)
+    Dθ = Differential(θ)
+    eq = Dθ(u(θ)) ~ θ^3 + 2 * θ + (θ^2) * ((1 + 3 * (θ^2)) / (1 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
+    bcs = [u(0.0) ~ 1.0]
+    domains = [θ ∈ Interval(0.0, 1.0)]
+    
+    chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
+    discretization = PhysicsInformedNN(chain, QuadratureTraining())
+    @test discretization.chain isa Lux.AbstractExplicitLayer
+    
+    @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
+    prob = discretize(pde_system, discretization)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 1000)
+    prob = remake(prob, u0 = res.minimizer)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 500)
+    prob = remake(prob, u0 = res.minimizer)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 500)
+    phi = discretization.phi
+    analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
+    ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
+    u_real = [analytic_sol_func(t) for t in ts]
+    u_predict = [first(phi(t, res.minimizer)) for t in ts]
+    @test u_predict≈u_real atol=0.1
+end
\ No newline at end of file
diff --git a/test/NNPDE_tests_gpu_Lux.jl b/test/NNPDE_tests_gpu_Lux.jl
index 2dca784943..b3123a30cd 100644
--- a/test/NNPDE_tests_gpu_Lux.jl
+++ b/test/NNPDE_tests_gpu_Lux.jl
@@ -14,187 +14,183 @@ end
 CUDA.allowscalar(false)
 const gpud = gpu_device()
 
-## ODE
-println("ode")
-@parameters θ
-@variables u(..)
-Dθ = Differential(θ)
-
-# 1D ODE
-eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ + (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
-                u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
-
-# Initial and boundary conditions
-bcs = [u(0.0) ~ 1.0f0]
-
-# Space and time domains
-domains = [θ ∈ Interval(0.0f0, 1.0f0)]
-# Discretization
-dt = 0.1f0
-# Neural network
-inner = 20
-chain = Lux.Chain(Lux.Dense(1, inner, Lux.σ),
-              Lux.Dense(inner, inner, Lux.σ),
-              Lux.Dense(inner, inner, Lux.σ),
-              Lux.Dense(inner, inner, Lux.σ),
-              Lux.Dense(inner, inner, Lux.σ),
-              Lux.Dense(inner, 1))
-
-strategy = GridTraining(dt)
-ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
-discretization = PhysicsInformedNN(chain,
-                                strategy;
-                                init_params = ps)
-
-@named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
-prob = discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
-phi = discretization.phi
-
-analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
-ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
-u_real = [analytic_sol_func(t) for t in ts]
-u_predict = [first(Array(phi([t], res.minimizer))) for t in ts]
-@test u_predict≈u_real atol=0.2
-
-## 1D PDE Dirichlet boundary conditions
-println("1D PDE Dirichlet boundary conditions")
-@parameters t x
-@variables u(..)
-Dt = Differential(t)
-Dxx = Differential(x)^2
-
-eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-bcs = [u(0, x) ~ cos(x),
-    u(t, 0) ~ exp(-t),
-    u(t, 1) ~ exp(-t) * cos(1)]
-
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
-
-@named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
-
-inner = 30
-chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, 1))
-
-strategy = StochasticTraining(500)
-ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
-prob = discretize(pdesys, discretization)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 1000)
-phi = discretization.phi
-
-u_exact = (t, x) -> exp.(-t) * cos.(x)
-ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
-                    (length(ts), length(xs)))
-u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
-diff_u = abs.(u_predict .- u_real)
-@test u_predict≈u_real atol=1.0
-
-## 1D PDE Neumann boundary conditions and Float64 accuracy
-println("1D PDE Neumann boundary conditions and Float64 accuracy")
-@parameters t x
-@variables u(..)
-Dt = Differential(t)
-Dx = Differential(x)
-Dxx = Differential(x)^2
-
-# 1D PDE and boundary conditions
-eq = Dt(u(t, x)) ~ Dxx(u(t, x))
-bcs = [u(0, x) ~ cos(x),
-    Dx(u(t, 0)) ~ 0.0,
-    Dx(u(t, 1)) ~ -exp(-t) * sin(1.0)]
-
-# Space and time domains
-domains = [t ∈ Interval(0.0, 1.0),
-    x ∈ Interval(0.0, 1.0)]
-
-# PDE system
-@named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
-
-inner = 20
-chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, 1))
-
-strategy = QuasiRandomTraining(500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
-ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
-prob = discretize(pdesys, discretization)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 2000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
-phi = discretization.phi
-
-u_exact = (t, x) -> exp(-t) * cos(x)
-ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
-                    (length(ts), length(xs)))
-u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
-diff_u = abs.(u_predict .- u_real)
-@test u_predict≈u_real atol=1.0
-
-println("2D PDE")
-@parameters t x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-Dt = Differential(t)
-t_min = 0.0
-t_max = 2.0
-x_min = 0.0
-x_max = 2.0
-y_min = 0.0
-y_max = 2.0
-
-eq = Dt(u(t, x, y)) ~ Dxx(u(t, x, y)) + Dyy(u(t, x, y))
-
-analytic_sol_func(t, x, y) = exp(x + y) * cos(x + y + 4t)
-# Initial and boundary conditions
-bcs = [u(t_min, x, y) ~ analytic_sol_func(t_min, x, y),
-    u(t, x_min, y) ~ analytic_sol_func(t, x_min, y),
-    u(t, x_max, y) ~ analytic_sol_func(t, x_max, y),
-    u(t, x, y_min) ~ analytic_sol_func(t, x, y_min),
-    u(t, x, y_max) ~ analytic_sol_func(t, x, y_max)]
-
-# Space and time domains
-domains = [t ∈ Interval(t_min, t_max),
-    x ∈ Interval(x_min, x_max),
-    y ∈ Interval(y_min, y_max)]
-
-# Neural network
-inner = 25
-chain = Lux.Chain(Lux.Dense(3, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, inner, Lux.σ),
-                  Lux.Dense(inner, 1))
-
-strategy = GridTraining(0.05)
-ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
-discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
-@named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
-prob = discretize(pde_system, discretization)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2500)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 2500)
-@show res.original
-
-phi = discretization.phi
-ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
-u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
-u_predict = [first(Array(phi([t, x, y], res.minimizer))) for t in ts for x in xs
-            for y in ys]
-
-@test u_predict≈u_real rtol=0.2
+@testset "ODE" begin
+    @parameters θ
+    @variables u(..)
+    Dθ = Differential(θ)
+
+    # 1D ODE
+    eq = Dθ(u(θ)) ~ θ^3 + 2.0f0 * θ + (θ^2) * ((1.0f0 + 3 * (θ^2)) / (1.0f0 + θ + (θ^3))) -
+                    u(θ) * (θ + ((1.0f0 + 3.0f0 * (θ^2)) / (1.0f0 + θ + θ^3)))
+
+    # Initial and boundary conditions
+    bcs = [u(0.0) ~ 1.0f0]
+
+    # Space and time domains
+    domains = [θ ∈ Interval(0.0f0, 1.0f0)]
+    # Discretization
+    dt = 0.1f0
+    # Neural network
+    inner = 20
+    chain = Lux.Chain(Lux.Dense(1, inner, Lux.σ),
+                Lux.Dense(inner, inner, Lux.σ),
+                Lux.Dense(inner, inner, Lux.σ),
+                Lux.Dense(inner, inner, Lux.σ),
+                Lux.Dense(inner, inner, Lux.σ),
+                Lux.Dense(inner, 1))
+
+    strategy = GridTraining(dt)
+    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
+    discretization = PhysicsInformedNN(chain,
+                                    strategy;
+                                    init_params = ps)
+
+    @named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
+    prob = discretize(pde_system, discretization)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-2); maxiters = 2000)
+    phi = discretization.phi
+    analytic_sol_func(t) = exp(-(t^2) / 2) / (1 + t + t^3) + t^2
+    ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+    u_real = [analytic_sol_func(t) for t in ts]
+    u_predict = [first(Array(phi([t], res.minimizer))) for t in ts]
+    @test u_predict≈u_real atol=0.2
+end
+
+@testset "1D PDE Dirichlet boundary conditions" begin
+    @parameters t x
+    @variables u(..)
+    Dt = Differential(t)
+    Dxx = Differential(x)^2
+
+    eq = Dt(u(t, x)) ~ Dxx(u(t, x))
+    bcs = [u(0, x) ~ cos(x),
+        u(t, 0) ~ exp(-t),
+        u(t, 1) ~ exp(-t) * cos(1)]
+
+    domains = [t ∈ Interval(0.0, 1.0),
+        x ∈ Interval(0.0, 1.0)]
+
+    @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
+
+    inner = 30
+    chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, 1))
+
+    strategy = StochasticTraining(500)
+    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+    prob = discretize(pdesys, discretization)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 1000)
+    prob = remake(prob, u0 = res.minimizer)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 1000)
+    phi = discretization.phi
+    u_exact = (t, x) -> exp.(-t) * cos.(x)
+    ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
+    u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
+                        (length(ts), length(xs)))
+    u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
+    diff_u = abs.(u_predict .- u_real)
+    @test u_predict≈u_real atol=1.0
+end
+
+@testset "1D PDE Neumann boundary conditions and Float64 accuracy" begin
+    @parameters t x
+    @variables u(..)
+    Dt = Differential(t)
+    Dx = Differential(x)
+    Dxx = Differential(x)^2
+
+    # 1D PDE and boundary conditions
+    eq = Dt(u(t, x)) ~ Dxx(u(t, x))
+    bcs = [u(0, x) ~ cos(x),
+        Dx(u(t, 0)) ~ 0.0,
+        Dx(u(t, 1)) ~ -exp(-t) * sin(1.0)]
+
+    # Space and time domains
+    domains = [t ∈ Interval(0.0, 1.0),
+        x ∈ Interval(0.0, 1.0)]
+
+    # PDE system
+    @named pdesys = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
+
+    inner = 20
+    chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, 1))
+
+    strategy = QuasiRandomTraining(500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
+    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+    prob = discretize(pdesys, discretization)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 2000)
+    prob = remake(prob, u0 = res.minimizer)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2000)
+    phi = discretization.phi
+    u_exact = (t, x) -> exp(-t) * cos(x)
+    ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
+    u_predict = reshape([first(Array(phi([t, x], res.minimizer))) for t in ts for x in xs],
+                        (length(ts), length(xs)))
+    u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
+    diff_u = abs.(u_predict .- u_real)
+    @test u_predict≈u_real atol=1.0
+end
+
+@testset "2D PDE" begin
+    @parameters t x y
+    @variables u(..)
+    Dxx = Differential(x)^2
+    Dyy = Differential(y)^2
+    Dt = Differential(t)
+    t_min = 0.0
+    t_max = 2.0
+    x_min = 0.0
+    x_max = 2.0
+    y_min = 0.0
+    y_max = 2.0
+
+    eq = Dt(u(t, x, y)) ~ Dxx(u(t, x, y)) + Dyy(u(t, x, y))
+
+    analytic_sol_func(t, x, y) = exp(x + y) * cos(x + y + 4t)
+    # Initial and boundary conditions
+    bcs = [u(t_min, x, y) ~ analytic_sol_func(t_min, x, y),
+        u(t, x_min, y) ~ analytic_sol_func(t, x_min, y),
+        u(t, x_max, y) ~ analytic_sol_func(t, x_max, y),
+        u(t, x, y_min) ~ analytic_sol_func(t, x, y_min),
+        u(t, x, y_max) ~ analytic_sol_func(t, x, y_max)]
+
+    # Space and time domains
+    domains = [t ∈ Interval(t_min, t_max),
+        x ∈ Interval(x_min, x_max),
+        y ∈ Interval(y_min, y_max)]
+
+    # Neural network
+    inner = 25
+    chain = Lux.Chain(Lux.Dense(3, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, inner, Lux.σ),
+                    Lux.Dense(inner, 1))
+
+    strategy = GridTraining(0.05)
+    ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
+    discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
+    @named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
+    prob = discretize(pde_system, discretization)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); maxiters = 2500)
+    prob = remake(prob, u0 = res.minimizer)
+    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); maxiters = 2500)
+    phi = discretization.phi
+    ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
+    u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
+    u_predict = [first(Array(phi([t, x, y], res.minimizer))) for t in ts for x in xs
+                for y in ys]
+
+    @test u_predict≈u_real rtol=0.2
+end
\ No newline at end of file
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index ad21395e3c..72c0d78ab2 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -38,17 +38,15 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
         y ∈ Interval(0.0, 1.0)]
 
     iteration = [0]
-    discretization = NeuralPDE.PhysicsInformedNN(chain_,
-                                                 strategy_;
-                                                 adaptive_loss = adaptive_loss,
-                                                 logger = nothing,
-                                                 iteration = iteration)
+    discretization = PhysicsInformedNN(chain_,
+                                    strategy_;
+                                    adaptive_loss = adaptive_loss,
+                                    logger = nothing,
+                                    iteration = iteration)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-    prob = NeuralPDE.discretize(pde_system, discretization)
+    prob = discretize(pde_system, discretization)
     phi = discretization.phi
-    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
@@ -61,9 +59,7 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
         end
         return false
     end
-    res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters,
-                             callback = callback)
-
+    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters, callback = callback)
     u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
                         (length(xs), length(ys)))
     diff_u = abs.(u_predict .- u_real)
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index e91478f834..5d5034e748 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -8,264 +8,224 @@ using OrdinaryDiffEq, ComponentArrays
 import Lux
 using ComponentArrays
 
-## Example 7, Fokker-Planck equation
-println("Example 7, Fokker-Planck equation")
-# the example took from this article https://arxiv.org/abs/1910.10503
-@parameters x
-@variables p(..)
-Dx = Differential(x)
-Dxx = Differential(x)^2
-
-#2D PDE
-α = 0.3
-β = 0.5
-_σ = 0.5
-# Discretization
-dx = 0.01
-# here we use normalization condition: dx*p(x) ~ 1, in order to get non-zero solution.
-#(α - 3*β*x^2)*p(x) + (α*x - β*x^3)*Dx(p(x)) ~ (_σ^2/2)*Dxx(p(x))
-eq = [Dx((α * x - β * x^3) * p(x)) ~ (_σ^2 / 2) * Dxx(p(x))]
-x_0 = -2.2
-x_end = 2.2
-# Initial and boundary conditions
-bcs = [p(x_0) ~ 0.0, p(x_end) ~ 0.0]
-
-# Space and time domains
-domains = [x ∈ Interval(-2.2, 2.2)]
-
-# Neural network
-inn = 18
-chain = Lux.Chain(Lux.Dense(1, inn, Lux.σ),
-                  Lux.Dense(inn, inn, Lux.σ),
-                  Lux.Dense(inn, inn, Lux.σ),
-                  Lux.Dense(inn, 1))
-init_params = Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain)[1]))
-
-lb = [x_0]
-ub = [x_end]
-function norm_loss_function(phi, θ, p)
-    function inner_f(x, θ)
-        dx * phi(x, θ) .- 1
+@testset "Fokker-Planck" begin
+    # the example took from this article https://arxiv.org/abs/1910.10503
+    @parameters x
+    @variables p(..)
+    Dx = Differential(x)
+    Dxx = Differential(x)^2
+    α = 0.3
+    β = 0.5
+    _σ = 0.5
+    # Discretization
+    dx = 0.01
+    # here we use normalization condition: dx*p(x) ~ 1, in order to get non-zero solution.
+    #(α - 3*β*x^2)*p(x) + (α*x - β*x^3)*Dx(p(x)) ~ (_σ^2/2)*Dxx(p(x))
+    eq = [Dx((α * x - β * x^3) * p(x)) ~ (_σ^2 / 2) * Dxx(p(x))]
+    x_0 = -2.2
+    x_end = 2.2
+    # Initial and boundary conditions
+    bcs = [p(x_0) ~ 0.0, p(x_end) ~ 0.0]
+
+    # Space and time domains
+    domains = [x ∈ Interval(-2.2, 2.2)]
+
+    # Neural network
+    inn = 18
+    chain = Lux.Chain(Lux.Dense(1, inn, Lux.σ),
+                    Lux.Dense(inn, inn, Lux.σ),
+                    Lux.Dense(inn, inn, Lux.σ),
+                    Lux.Dense(inn, 1))
+    init_params = Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain)[1]))
+    lb = [x_0]
+    ub = [x_end]
+    function norm_loss_function(phi, θ, p)
+        function inner_f(x, θ)
+            dx * phi(x, θ) .- 1
+        end
+        prob = IntegralProblem(inner_f, lb, ub, θ)
+        norm2 = solve(prob, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
+        abs(norm2[1])
     end
-    prob = IntegralProblem(inner_f, lb, ub, θ)
-    norm2 = solve(prob, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
-    abs(norm2[1])
-end
-# norm_loss_function(phi,init_params,nothing)
-
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             NeuralPDE.GridTraining(dx);
-                                             init_params = init_params,
-                                             additional_loss = norm_loss_function)
-
-@named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
-bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
-
-phi = discretization.phi
-
-cb_ = function (p, l)
-    println("loss: ", l)
-    println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
-    println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
-    println("additional_loss: ", norm_loss_function(phi, p, nothing))
-    return false
-end
-
-res = Optimization.solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
-
-C = 142.88418699042
-analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
-xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
-u_real = [analytic_sol_func(x) for x in xs]
-u_predict = [first(phi(x, res.u)) for x in xs]
-
-@test u_predict≈u_real rtol=1e-3
-
-### No init_params
-
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             NeuralPDE.GridTraining(dx);
-                                             additional_loss = norm_loss_function)
-
-@named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
-bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
-
-phi = discretization.phi
-
-cb_ = function (p, l)
-    println("loss: ", l)
-    println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
-    println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
-    println("additional_loss: ", norm_loss_function(phi, p, nothing))
-    return false
+    discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params = init_params,
+                                       additional_loss = norm_loss_function)
+    @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
+    prob = discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
+    bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
+    phi = discretization.phi
+    cb_ = function (p, l)
+        println("loss: ", l)
+        println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
+        println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
+        println("additional_loss: ", norm_loss_function(phi, p, nothing))
+        return false
+    end
+    res = solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
+    C = 142.88418699042
+    analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
+    xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
+    u_real = [analytic_sol_func(x) for x in xs]
+    u_predict = [first(phi(x, res.u)) for x in xs]
+    @test u_predict≈u_real rtol=1e-3
+
+    ### No init_params
+    discretization = PhysicsInformedNN(chain, GridTraining(dx); additional_loss = norm_loss_function)
+    @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
+    prob = discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
+    bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
+    phi = discretization.phi
+    cb_ = function (p, l)
+        println("loss: ", l)
+        println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
+        println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
+        println("additional_loss: ", norm_loss_function(phi, p, nothing))
+        return false
+    end
+    res = solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
+    C = 142.88418699042
+    analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
+    xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
+    u_real = [analytic_sol_func(x) for x in xs]
+    u_predict = [first(phi(x, res.u)) for x in xs]
+    @test u_predict≈u_real rtol=1e-3
 end
 
-res = Optimization.solve(prob, OptimizationOptimJL.LBFGS(), maxiters = 400, callback = cb_)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 2000, callback = cb_)
-
-C = 142.88418699042
-analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
-xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
-u_real = [analytic_sol_func(x) for x in xs]
-u_predict = [first(phi(x, res.u)) for x in xs]
-
-@test u_predict≈u_real rtol=1e-3
-
-## Example 8, Lorenz System (Parameter Estimation)
-println("Example 8, Lorenz System")
-
-Random.seed!(1234)
-@parameters t, σ_, β, ρ
-@variables x(..), y(..), z(..)
-Dt = Differential(t)
-eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
-    Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
-    Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
-
-bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 1.0)]
-dt = 0.05
-
-input_ = length(domains)
-n = 12
-chain = [Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.σ),
-                   Lux.Dense(n, 1)) for _ in 1:3]
-#Generate Data
-function lorenz!(du, u, p, t)
-    du[1] = 10.0 * (u[2] - u[1])
-    du[2] = u[1] * (28.0 - u[3]) - u[2]
-    du[3] = u[1] * u[2] - (8 / 3) * u[3]
-end
+@testset "Lorenz System" begin
+    @parameters t, σ_, β, ρ
+    @variables x(..), y(..), z(..)
+    Dt = Differential(t)
+    eqs = [Dt(x(t)) ~ σ_ * (y(t) - x(t)),
+        Dt(y(t)) ~ x(t) * (ρ - z(t)) - y(t),
+        Dt(z(t)) ~ x(t) * y(t) - β * z(t)]
+
+    bcs = [x(0) ~ 1.0, y(0) ~ 0.0, z(0) ~ 0.0]
+    domains = [t ∈ Interval(0.0, 1.0)]
+    dt = 0.05
+
+    input_ = length(domains)
+    n = 12
+    chain = [Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.σ),
+                    Lux.Dense(n, 1)) for _ in 1:3]
+    #Generate Data
+    function lorenz!(du, u, p, t)
+        du[1] = 10.0 * (u[2] - u[1])
+        du[2] = u[1] * (28.0 - u[3]) - u[2]
+        du[3] = u[1] * u[2] - (8 / 3) * u[3]
+    end
 
-u0 = [1.0; 0.0; 0.0]
-tspan = (0.0, 1.0)
-prob = ODEProblem(lorenz!, u0, tspan)
-sol = solve(prob, Tsit5(), dt = 0.1)
-ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+    u0 = [1.0; 0.0; 0.0]
+    tspan = (0.0, 1.0)
+    prob = ODEProblem(lorenz!, u0, tspan)
+    sol = solve(prob, Tsit5(), dt = 0.1)
+    ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+
+    function getData(sol)
+        data = []
+        us = hcat(sol(ts).u...)
+        ts_ = hcat(sol(ts).t...)
+        return [us, ts_]
+    end
 
-function getData(sol)
-    data = []
-    us = hcat(sol(ts).u...)
-    ts_ = hcat(sol(ts).t...)
-    return [us, ts_]
-end
+    data = getData(sol)
 
-data = getData(sol)
+    #Additional Loss Function
+    init_params = [Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain[i])[1]))
+                for i in 1:3]
+    names = (:x, :y, :z)
+    flat_init_params = ComponentArray(NamedTuple{names}(i for i in init_params))
 
-#Additional Loss Function
-init_params = [Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain[i])[1]))
-               for i in 1:3]
-names = (:x, :y, :z)
-flat_init_params = ComponentArray(NamedTuple{names}(i for i in init_params))
+    acum = [0; accumulate(+, length.(init_params))]
+    sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
+    (u_, t_) = data
+    len = length(data[2])
 
-acum = [0; accumulate(+, length.(init_params))]
-sep = [(acum[i] + 1):acum[i + 1] for i in 1:(length(acum) - 1)]
-(u_, t_) = data
-len = length(data[2])
+    function additional_loss(phi, θ, p)
+        return sum(sum(abs2, phi[i](t_, getproperty(θ, names[i])) .- u_[[i], :]) /
+                len
+                for i in 1:1:3)
+    end
 
-function additional_loss(phi, θ, p)
-    return sum(sum(abs2, phi[i](t_, getproperty(θ, names[i])) .- u_[[i], :]) /
-               len
-               for i in 1:1:3)
+    discretization = PhysicsInformedNN(chain,
+                                    GridTraining(dt);
+                                    init_params = flat_init_params,
+                                    param_estim = true,
+                                    additional_loss = additional_loss)
+
+    additional_loss(discretization.phi, flat_init_params, nothing)
+    @named pde_system = PDESystem(eqs, bcs, domains,
+                                [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+                                defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+    prob = discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    sym_prob.loss_functions.full_loss_function(ComponentArray(depvar = flat_init_params,
+                                                            p = ones(3)),
+                                            Float64[])
+
+    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
+    p_ = res.minimizer[(end - 2):end]
+    @test sum(abs2, p_[1] - 10.00) < 0.1
+    @test sum(abs2, p_[2] - 28.00) < 0.1
+    @test sum(abs2, p_[3] - (8 / 3)) < 0.1
+
+    ### No init_params
+    discretization = PhysicsInformedNN(chain,
+                                    GridTraining(dt);
+                                    param_estim = true,
+                                    additional_loss = additional_loss)
+
+    additional_loss(discretization.phi, flat_init_params, nothing)
+    @named pde_system = PDESystem(eqs, bcs, domains,
+                                [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+                                defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+    prob = discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    sym_prob.loss_functions.full_loss_function(sym_prob.flat_init_params, nothing)
+    res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
+    p_ = res.minimizer[(end - 2):end]
+    @test sum(abs2, p_[1] - 10.00) < 0.1
+    @test sum(abs2, p_[2] - 28.00) < 0.1
+    @test sum(abs2, p_[3] - (8 / 3)) < 0.1
 end
 
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             NeuralPDE.GridTraining(dt);
-                                             init_params = flat_init_params,
-                                             param_estim = true,
-                                             additional_loss = additional_loss)
-
-additional_loss(discretization.phi, flat_init_params, nothing)
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-                              [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-                              defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-sym_prob.loss_functions.full_loss_function(ComponentArray(depvar = flat_init_params,
-                                                          p = ones(3)),
-                                           Float64[])
-
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
-p_ = res.minimizer[(end - 2):end]
-@test sum(abs2, p_[1] - 10.00) < 0.1
-@test sum(abs2, p_[2] - 28.00) < 0.1
-@test sum(abs2, p_[3] - (8 / 3)) < 0.1
-
-### No init_params
-
-discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             NeuralPDE.GridTraining(dt);
-                                             param_estim = true,
-                                             additional_loss = additional_loss)
-
-additional_loss(discretization.phi, flat_init_params, nothing)
-
-@named pde_system = PDESystem(eqs, bcs, domains,
-                              [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-                              defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-sym_prob.loss_functions.full_loss_function(sym_prob.flat_init_params, nothing)
-
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
-p_ = res.minimizer[(end - 2):end]
-@test sum(abs2, p_[1] - 10.00) < 0.1
-@test sum(abs2, p_[2] - 28.00) < 0.1
-@test sum(abs2, p_[3] - (8 / 3)) < 0.1
-
-## approximation from data
-println("Approximation of function from data and additional_loss")
-
-@parameters x
-@variables u(..)
-eq = [u(0) ~ u(0)]
-bc = [u(0) ~ u(0)]
-x0 = 0
-x_end = pi
-dx = pi / 10
-domain = [x ∈ Interval(x0, x_end)]
-
-hidden = 10
-chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-                  Lux.Dense(hidden, hidden, Lux.sin),
-                  Lux.Dense(hidden, hidden, Lux.tanh),
-                  Lux.Dense(hidden, 1))
-
-strategy = NeuralPDE.GridTraining(dx)
-xs = collect(x0:dx:x_end)'
-aproxf_(x) = @. cos(pi * x)
-data = aproxf_(xs)
-
-function additional_loss_(phi, θ, p)
-    sum(abs2, phi(xs, θ) .- data)
+@testset "Approximation from data and additional_loss" begin
+    @parameters x
+    @variables u(..)
+    eq = [u(0) ~ u(0)]
+    bc = [u(0) ~ u(0)]
+    x0 = 0
+    x_end = pi
+    dx = pi / 10
+    domain = [x ∈ Interval(x0, x_end)]
+    hidden = 10
+    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
+                    Lux.Dense(hidden, hidden, Lux.sin),
+                    Lux.Dense(hidden, hidden, Lux.tanh),
+                    Lux.Dense(hidden, 1))
+    strategy = GridTraining(dx)
+    xs = collect(x0:dx:x_end)'
+    aproxf_(x) = @. cos(pi * x)
+    data = aproxf_(xs)
+    function additional_loss_(phi, θ, p)
+        sum(abs2, phi(xs, θ) .- data)
+    end
+    discretization = PhysicsInformedNN(chain, strategy; additional_loss = additional_loss_)
+    @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    flat_init_params = sym_prob.flat_init_params
+    phi = discretization.phi
+    phi(xs, flat_init_params)
+    additional_loss_(phi, flat_init_params, nothing)
+    res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
+    @test phi(xs, res.u)≈aproxf_(xs) rtol=0.01
 end
-
-discretization = NeuralPDE.PhysicsInformedNN(chain, strategy;
-                                             additional_loss = additional_loss_)
-
-@named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-
-flat_init_params = sym_prob.flat_init_params
-phi = discretization.phi
-phi(xs, flat_init_params)
-additional_loss_(phi, flat_init_params, nothing)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
-
-@test phi(xs, res.u)≈aproxf_(xs) rtol=0.01
diff --git a/test/direct_function_tests.jl b/test/direct_function_tests.jl
index 663537c01f..f128372f51 100644
--- a/test/direct_function_tests.jl
+++ b/test/direct_function_tests.jl
@@ -8,120 +8,103 @@ import Lux
 
 Random.seed!(110)
 
-## Approximation of function 1D
-println("Approximation of function 1D")
-
-@parameters x
-@variables u(..)
-
-func(x) = @. 2 + abs(x - 0.5)
-
-eq = [u(x) ~ func(x)]
-bc = [u(0) ~ u(0)]
-
-x0 = 0
-x_end = 2
-dx = 0.001
-domain = [x ∈ Interval(x0, x_end)]
-
-xs = collect(x0:dx:x_end)
-func_s = func(xs)
-
-hidden = 10
-chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-                  Lux.Dense(hidden, hidden, Lux.tanh),
-                  Lux.Dense(hidden, 1))
-
-strategy = NeuralPDE.GridTraining(0.01)
-
-discretization = NeuralPDE.PhysicsInformedNN(chain, strategy)
-@named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
-prob = NeuralPDE.discretize(pde_system, discretization)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.05), maxiters = 1000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(initial_stepnorm = 0.01),
-                         maxiters = 500)
-
-@test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
-
-## Approximation of function 1D 2
-println("Approximation of function 1D 2")
-
-@parameters x
-@variables u(..)
-func(x) = @. cos(5pi * x) * x
-eq = [u(x) ~ func(x)]
-bc = [u(0) ~ u(0)]
-
-x0 = 0
-x_end = 4
-domain = [x ∈ Interval(x0, x_end)]
-
-hidden = 20
-chain = Lux.Chain(Lux.Dense(1, hidden, Lux.sin),
-                  Lux.Dense(hidden, hidden, Lux.sin),
-                  Lux.Dense(hidden, hidden, Lux.sin),
-                  Lux.Dense(hidden, 1))
-
-strategy = GridTraining(0.01)
-
-discretization = PhysicsInformedNN(chain, strategy)
-@named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
-prob = discretize(pde_system, discretization)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
-
-dx = 0.01
-xs = collect(x0:dx:x_end)
-func_s = func(xs)
-
-@test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
-
-## Approximation of function 2D
-println("Approximation of function 2D")
-
-@parameters x, y
-@variables u(..)
-func(x, y) = -cos(x) * cos(y) * exp(-((x - pi)^2 + (y - pi)^2))
-eq = [u(x, y) ~ func(x, y)]
-bc = [u(0, 0) ~ u(0, 0)]
-
-x0 = -10
-x_end = 10
-y0 = -10
-y_end = 10
-d = 0.4
-
-domain = [x ∈ Interval(x0, x_end), y ∈ Interval(y0, y_end)]
-
-hidden = 25
-chain = Lux.Chain(Lux.Dense(2, hidden, Lux.tanh),
-                  Lux.Dense(hidden, hidden, Lux.tanh),
-                  Lux.Dense(hidden, hidden, Lux.tanh),
-                  Lux.Dense(hidden, 1))
-
-strategy = GridTraining(d)
-discretization = PhysicsInformedNN(chain, strategy)
-@named pde_system = PDESystem(eq, bc, domain, [x, y], [u(x, y)])
-prob = discretize(pde_system, discretization)
-symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-symprob.loss_functions.full_loss_function(symprob.flat_init_params, nothing)
-
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
-phi = discretization.phi
-
-xs = collect(x0:0.1:x_end)
-ys = collect(y0:0.1:y_end)
-u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
-u_real = reshape([func(x, y) for x in xs for y in ys], (length(xs), length(ys)))
-diff_u = abs.(u_predict .- u_real)
-
-@test u_predict≈u_real rtol=0.05
+@testset "Approximation of function 1D" begin
+    @parameters x
+    @variables u(..)
+
+    func(x) = @. 2 + abs(x - 0.5)
+
+    eq = [u(x) ~ func(x)]
+    bc = [u(0) ~ u(0)]
+
+    x0 = 0
+    x_end = 2
+    dx = 0.001
+    domain = [x ∈ Interval(x0, x_end)]
+
+    xs = collect(x0:dx:x_end)
+    func_s = func(xs)
+
+    hidden = 10
+    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
+                    Lux.Dense(hidden, hidden, Lux.tanh),
+                    Lux.Dense(hidden, 1))
+
+    strategy = GridTraining(0.01)
+    discretization = PhysicsInformedNN(chain, strategy)
+    @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimisers.Adam(0.05), maxiters = 1000)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(initial_stepnorm = 0.01), maxiters = 500)
+    @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
+end
+
+@testset "Approximation of function 1D - 2" begin
+    @parameters x
+    @variables u(..)
+    func(x) = @. cos(5pi * x) * x
+    eq = [u(x) ~ func(x)]
+    bc = [u(0) ~ u(0)]
+
+    x0 = 0
+    x_end = 4
+    domain = [x ∈ Interval(x0, x_end)]
+
+    hidden = 20
+    chain = Lux.Chain(Lux.Dense(1, hidden, Lux.sin),
+                    Lux.Dense(hidden, hidden, Lux.sin),
+                    Lux.Dense(hidden, hidden, Lux.sin),
+                    Lux.Dense(hidden, 1))
+
+    strategy = GridTraining(0.01)
+    discretization = PhysicsInformedNN(chain, strategy)
+    @named pde_system = PDESystem(eq, bc, domain, [x], [u(x)])
+    prob = discretize(pde_system, discretization)
+    res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
+    dx = 0.01
+    xs = collect(x0:dx:x_end)
+    func_s = func(xs)
+    @test discretization.phi(xs', res.u)≈func(xs') rtol=0.01
+end
+
+@testset "Approximation of function 2D" begin
+    @parameters x, y
+    @variables u(..)
+    func(x, y) = -cos(x) * cos(y) * exp(-((x - pi)^2 + (y - pi)^2))
+    eq = [u(x, y) ~ func(x, y)]
+    bc = [u(0, 0) ~ u(0, 0)]
+    x0 = -10
+    x_end = 10
+    y0 = -10
+    y_end = 10
+    d = 0.4
+    domain = [x ∈ Interval(x0, x_end), y ∈ Interval(y0, y_end)]
+    hidden = 25
+    chain = Lux.Chain(Lux.Dense(2, hidden, Lux.tanh),
+                    Lux.Dense(hidden, hidden, Lux.tanh),
+                    Lux.Dense(hidden, hidden, Lux.tanh),
+                    Lux.Dense(hidden, 1))
+
+    strategy = GridTraining(d)
+    discretization = PhysicsInformedNN(chain, strategy)
+    @named pde_system = PDESystem(eq, bc, domain, [x, y], [u(x, y)])
+    prob = discretize(pde_system, discretization)
+    symprob = NeuralPDE.symbolic_discretize(pde_system, discretization)
+    symprob.loss_functions.full_loss_function(symprob.flat_init_params, nothing)
+    res = solve(prob, OptimizationOptimisers.Adam(0.01), maxiters = 500)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 1000)
+    prob = remake(prob, u0 = res.minimizer)
+    res = solve(prob, OptimizationOptimJL.BFGS(), maxiters = 500)
+    phi = discretization.phi
+    xs = collect(x0:0.1:x_end)
+    ys = collect(y0:0.1:y_end)
+    u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
+                        (length(xs), length(ys)))
+    u_real = reshape([func(x, y) for x in xs for y in ys], (length(xs), length(ys)))
+    diff_u = abs.(u_predict .- u_real)
+    @test u_predict≈u_real rtol=0.05
+end

From db865ba1e79bfb6b25efd40850d10419fd3f0b8c Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 07:26:53 +0000
Subject: [PATCH 09/16] fixup! refactor: have auto translate from Flux to Lux
 for PhysicsInformedNN, BNNODE, BayesianPINN, ahmc_bayesian_pinn_ode

---
 src/pinn_types.jl | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 5751089330..925070a9e0 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -104,7 +104,11 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
             iteration = nothing,
             kwargs...)
         multioutput = chain isa AbstractArray
-        !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+        if multioutput
+            !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
+        else
+            !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+        end
         if phi === nothing
             if multioutput
                 _phi = Phi.(chain)
@@ -232,7 +236,11 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
             dataset = nothing,
             kwargs...)
         multioutput = chain isa AbstractArray
-        !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+        if multioutput
+            !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
+        else
+            !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+        end
         if phi === nothing
             if multioutput
                 _phi = Phi.(chain)
@@ -240,7 +248,11 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
                 _phi = Phi(chain)
             end
         else
-            !(phi.f isa Lux.AbstractExplicitLayer) && throw(ArgumentError("Only Lux Chains are supported"))
+            if multioutput
+                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) || throw(ArgumentError("Only Lux Chains are supported"))
+            else
+                (phi.f isa Lux.AbstractExplicitLayer) || throw(ArgumentError("Only Lux Chains are supported"))
+            end
             _phi = phi
         end
 

From fa71c1819ed26640e2844016bcfa83bfd2d5b2f1 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 07:27:13 +0000
Subject: [PATCH 10/16] fixup! test: refactor tests into testsets and add tests
 for autotranslating

---
 test/BPINN_PDE_tests.jl | 1 +
 test/BPINN_Tests.jl     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index c3ed4d913e..e8f4406103 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -4,6 +4,7 @@ using ForwardDiff, Distributions, OrdinaryDiffEq
 using AdvancedHMC, Statistics, Random, Functors
 using NeuralPDE, MonteCarloMeasurements
 using ComponentArrays
+using Flux
 
 Random.seed!(100)
 
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6277133158..6821a8d35e 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -4,6 +4,7 @@ using ForwardDiff, Distributions, OrdinaryDiffEq
 using OptimizationOptimisers, AdvancedHMC, Lux
 using Statistics, Random, Functors, ComponentArrays
 using NeuralPDE, MonteCarloMeasurements
+using Flux
 
 # note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
 # on latest Julia version it performs much better for below tests

From 528d2d11920c8c04658e8d480adef3c92d8ccd92 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 08:38:48 +0000
Subject: [PATCH 11/16] refactor: type differential_vars in generate_loss for
 DAEs as it causes overwrite

---
 src/dae_solve.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index 446f20852d..f2c861da02 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -46,7 +46,7 @@ function NNDAE(chain, opt, init_params = nothing; strategy = nothing, autodiff =
     NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
 end
 
-function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool, differential_vars)
+function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool, differential_vars::AbstractVector)
     if autodiff
         autodiff && throw(ArgumentError("autodiff not supported for DAE problem."))
     else
@@ -64,7 +64,7 @@ function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool, differential_v
 end
 
 function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector, θ,
-        p, differential_vars) where {C, T, U}
+        p, differential_vars::AbstractVector) where {C, T, U}
     out = Array(phi(t, θ))
     dphi = Array(dfdx(phi, t, θ, autodiff, differential_vars))
     arrt = Array(t)
@@ -73,7 +73,7 @@ function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector,
 end
 
 function generate_loss(strategy::GridTraining, phi, f, autodiff::Bool, tspan, p,
-        differential_vars)
+        differential_vars::AbstractVector)
     ts = tspan[1]:(strategy.dx):tspan[2]
     autodiff && throw(ArgumentError("autodiff not supported for GridTraining."))
     function loss(θ, _)

From b1a7fa1566b8737cb6f84c1ff2da061111a46558 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 10:59:13 +0000
Subject: [PATCH 12/16] test: further refactor tests and fix bugs

---
 test/BPINN_PDE_tests.jl       |   5 +-
 test/NNDAE_tests.jl           | 110 +++++++++++++++++-----------------
 test/additional_loss_tests.jl |   4 +-
 3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index e8f4406103..6dd3637f5a 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -148,7 +148,7 @@ end
     chain = Lux.Chain(Lux.Dense(dim, 9, Lux.σ), Lux.Dense(9, 9, Lux.σ), Lux.Dense(9, 1))
 
     # Discretization
-    dx = 0.05
+    dx = 0.04
     discretization = BayesianPINN([chain], GridTraining(dx))
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
@@ -166,7 +166,6 @@ end
 
     u_predict = pmean(sol1.ensemblesol[1])
     u_real = [analytic_sol_func(xs[:, i][1], xs[:, i][2]) for i in 1:length(xs[1, :])]
-    diff_u = abs.(u_predict .- u_real)
     @test u_predict≈u_real atol=1.5
 end
 
@@ -189,7 +188,7 @@ end
     chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
 
     discretization = BayesianPINN([chain], GridTraining([0.01]))
-    @test discretization.chain isa Lux.AbstractExplicitLayer
+    @test discretization.chain[1] isa Lux.AbstractExplicitLayer
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
 
diff --git a/test/NNDAE_tests.jl b/test/NNDAE_tests.jl
index f32cab0ee3..f0f2699127 100644
--- a/test/NNDAE_tests.jl
+++ b/test/NNDAE_tests.jl
@@ -1,62 +1,64 @@
 using Test, Flux
 using Random, NeuralPDE
-using OrdinaryDiffEq, Optimisers, Statistics
+using OrdinaryDiffEq, Statistics
 import Lux, OptimizationOptimisers, OptimizationOptimJL
 
 Random.seed!(100)
 
-#Example 1
-function example1(du, u, p, t)
-    du[1] = cos(2pi * t)
-    du[2] = u[2] + cos(2pi * t)
-    nothing
+@testset "Example 1" begin
+    function example1(du, u, p, t)
+        du[1] = cos(2pi * t)
+        du[2] = u[2] + cos(2pi * t)
+        nothing
+    end
+    u₀ = [1.0, -1.0]
+    du₀ = [0.0, 0.0]
+    M = [1.0 0
+        0 0]
+    f = ODEFunction(example1, mass_matrix = M)
+    tspan = (0.0f0, 1.0f0)
+
+    prob_mm = ODEProblem(f, u₀, tspan)
+    ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
+
+    example = (du, u, p, t) -> [cos(2pi * t) - du[1], u[2] + cos(2pi * t) - du[2]]
+    differential_vars = [true, false]
+    prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
+    chain = Lux.Chain(Lux.Dense(1, 15, cos), Lux.Dense(15, 15, sin), Lux.Dense(15, 2))
+    opt = OptimizationOptimisers.Adam(0.1)
+    alg = NeuralPDE.NNDAE(chain, opt; autodiff = false)
+
+    sol = solve(prob,
+        alg, verbose = false, dt = 1 / 100.0f0,
+        maxiters = 3000, abstol = 1.0f-10)
+    @test ground_sol(0:(1 / 100):1)≈sol atol=0.4
 end
-u₀ = [1.0, -1.0]
-du₀ = [0.0, 0.0]
-M = [1.0 0
-    0 0]
-f = ODEFunction(example1, mass_matrix = M)
-tspan = (0.0f0, 1.0f0)
-
-prob_mm = ODEProblem(f, u₀, tspan)
-ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
-
-example = (du, u, p, t) -> [cos(2pi * t) - du[1], u[2] + cos(2pi * t) - du[2]]
-differential_vars = [true, false]
-prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
-chain = Flux.Chain(Dense(1, 15, cos), Dense(15, 15, sin), Dense(15, 2))
-opt = OptimizationOptimisers.Adam(0.1)
-alg = NeuralPDE.NNDAE(chain, opt; autodiff = false)
-
-sol = solve(prob,
-    alg, verbose = false, dt = 1 / 100.0f0,
-    maxiters = 3000, abstol = 1.0f-10)
-@test ground_sol(0:(1 / 100):1)≈sol atol=0.4
-
-#Example 2
-function example2(du, u, p, t)
-    du[1] = u[1] - t
-    du[2] = u[2] - t
-    nothing
+
+@testset "Example 2" begin
+    function example2(du, u, p, t)
+        du[1] = u[1] - t
+        du[2] = u[2] - t
+        nothing
+    end
+    M = [0.0 0
+        0 1]
+    u₀ = [0.0, 0.0]
+    du₀ = [0.0, 0.0]
+    tspan = (0.0f0, pi / 2.0f0)
+    f = ODEFunction(example2, mass_matrix = M)
+    prob_mm = ODEProblem(f, u₀, tspan)
+    ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
+
+    example = (du, u, p, t) -> [u[1] - t - du[1], u[2] - t - du[2]]
+    differential_vars = [false, true]
+    prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
+    chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 2))
+    opt = OptimizationOptimisers.Adam(0.1)
+    alg = NNDAE(chain, OptimizationOptimisers.Adam(0.1); autodiff = false)
+
+    sol = solve(prob,
+        alg, verbose = false, dt = 1 / 100.0f0,
+        maxiters = 3000, abstol = 1.0f-10)
+
+    @test ground_sol(0:(1 / 100):(pi / 2))≈sol atol=0.4
 end
-M = [0.0 0
-    0 1]
-u₀ = [0.0, 0.0]
-du₀ = [0.0, 0.0]
-tspan = (0.0f0, pi / 2.0f0)
-f = ODEFunction(example2, mass_matrix = M)
-prob_mm = ODEProblem(f, u₀, tspan)
-ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
-
-example = (du, u, p, t) -> [u[1] - t - du[1], u[2] - t - du[2]]
-differential_vars = [false, true]
-prob = DAEProblem(example, du₀, u₀, tspan; differential_vars = differential_vars)
-chain = Flux.Chain(Dense(1, 15, σ), Dense(15, 2))
-opt = OptimizationOptimisers.Adam(0.1)
-alg = NNDAE(chain, OptimizationOptimisers.Adam(0.1); autodiff = false)
-
-sol = solve(prob,
-    alg, verbose = false, dt = 1 / 100.0f0,
-    maxiters = 3000, abstol = 1.0f-10)
-
-@test ground_sol(0:(1 / 100):(pi / 2))≈sol atol=0.4
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index 5d5034e748..79364234a0 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -43,8 +43,8 @@ using ComponentArrays
         function inner_f(x, θ)
             dx * phi(x, θ) .- 1
         end
-        prob = IntegralProblem(inner_f, lb, ub, θ)
-        norm2 = solve(prob, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
+        prob1 = IntegralProblem(inner_f, lb, ub, θ)
+        norm2 = solve(prob1, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
         abs(norm2[1])
     end
     discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params = init_params,

From 1c02aa8e6dc0684847c5a5b8fa41ae505c527204 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 14:19:31 +0000
Subject: [PATCH 13/16] refactor: add auto translation for DAE problems

---
 src/dae_solve.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index f2c861da02..755a3abbe0 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -43,6 +43,7 @@ end
 
 function NNDAE(chain, opt, init_params = nothing; strategy = nothing, autodiff = false,
         kwargs...)
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
 end
 

From 78bc964e0ea2cd4ca7ed39c645e07fe2213434d8 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 31 Jan 2024 14:20:37 +0000
Subject: [PATCH 14/16] refactor: fix min max adaptive loss

---
 src/adaptive_losses.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 6bfb192194..d6f84c7bbe 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -237,13 +237,15 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                          adaloss::MiniMaxAdaptiveLoss,
                                          pde_loss_functions, bc_loss_functions)
     pde_max_optimiser = adaloss.pde_max_optimiser
+    pde_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(pde_max_optimiser, adaloss.pde_loss_weights)
     bc_max_optimiser = adaloss.bc_max_optimiser
+    bc_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(bc_max_optimiser, adaloss.bc_loss_weights)
     iteration = pinnrep.iteration
 
     function run_minimax_adaptive_loss(θ, pde_losses, bc_losses)
         if iteration[1] % adaloss.reweight_every == 0
-            OptimizationOptimisers.Optimisers.update(pde_max_optimiser, adaloss.pde_loss_weights, -pde_losses)
-            OptimizationOptimisers.Optimisers.update(bc_max_optimiser, adaloss.bc_loss_weights, -bc_losses)
+            OptimizationOptimisers.Optimisers.update!(pde_max_optimiser_setup, adaloss.pde_loss_weights, -pde_losses)
+            OptimizationOptimisers.Optimisers.update!(bc_max_optimiser_setup, adaloss.bc_loss_weights, -bc_losses)
             logvector(pinnrep.logger, adaloss.pde_loss_weights,
                       "adaptive_loss/pde_loss_weights", iteration[1])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,

From 13d70e50bf5c978815849aeade5024bdcbb335cd Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 1 Feb 2024 08:51:46 +0000
Subject: [PATCH 15/16] docs: cleanup docstrings

---
 src/BPINN_ode.jl           |  7 ++--
 src/PDE_BPINN.jl           |  6 +++-
 src/adaptive_losses.jl     | 73 +++++++++++++++++---------------------
 src/advancedHMC_MCMC.jl    | 61 +++++++++++++++++--------------
 src/dae_solve.jl           | 13 ++++---
 src/discretize.jl          | 24 ++++---------
 src/neural_adapter.jl      |  4 +--
 src/symbolic_utilities.jl  | 30 +++++++---------
 src/training_strategies.jl | 31 ++++++----------
 9 files changed, 113 insertions(+), 136 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 3919c6ad3b..d91dd4b96a 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -72,11 +72,10 @@ is an accurate interpolation (up to the neural network training result). In addi
 ## References
 
 Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
-Forward and Inverse PDE Problems with Noisy Data"
-
-Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl. 
-"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems"
+Forward and Inverse PDE Problems with Noisy Data".
 
+Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl
+"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems".
 """
 struct BNNODE{C, K, IT <: NamedTuple,
     A <: NamedTuple, H <: NamedTuple,
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 02eb939bf6..344d007963 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -254,17 +254,20 @@ end
             Integratorkwargs = (Integrator = Leapfrog,), saveats = [1 / 10.0],
             numensemble = floor(Int, draw_samples / 3), progress = false, verbose = false)               
 
-## NOTES 
+## NOTES
+
 * Dataset is required for accurate Parameter estimation + solving equations.
 * Returned solution is a BPINNsolution consisting of Ensemble solution, estimated PDE and NN parameters
   for chosen `saveats` grid spacing and last n = `numensemble` samples in Chain. the complete set of samples
   in the MCMC chain is returned as `fullsolution`,  refer `BPINNsolution` for more details.
 
 ## Positional Arguments
+
 * `pde_system`: ModelingToolkit defined PDE equation or system of equations.
 * `discretization`: BayesianPINN discretization for the given pde_system, Neural Network and training strategy.
 
 ## Keyword Arguments
+
 * `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
 * `bcstd`: Vector of standard deviations of BPINN prediction against Initial/Boundary Condition equations.
 * `l2std`: Vector of standard deviations of BPINN prediction against L2 losses/Dataset for each dependant variable of interest.
@@ -282,6 +285,7 @@ end
 * `verbose`: controls the verbosity. (Sample call args in AHMC).
 
 ## Warnings
+
 * AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index d6f84c7bbe..3a1c4a79db 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -12,11 +12,9 @@ end
 
 # Dispatches
 """
-```julia
-NonAdaptiveLoss{T}(; pde_loss_weights = 1,
-                     bc_loss_weights = 1,
-                     additional_loss_weights = 1)
-```
+    NonAdaptiveLoss(; pde_loss_weights = 1.0,
+                      bc_loss_weights = 1.0,
+                      additional_loss_weights = 1.0)
 
 A way of loss weighting the components of the loss function in the total sum that does not
 change during optimization
@@ -25,9 +23,9 @@ mutable struct NonAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function NonAdaptiveLoss{T}(; pde_loss_weights = 1,
-                                                      bc_loss_weights = 1,
-                                                      additional_loss_weights = 1) where {
+    SciMLBase.@add_kwonly function NonAdaptiveLoss{T}(; pde_loss_weights = 1.0,
+                                                      bc_loss_weights = 1.0,
+                                                      additional_loss_weights = 1.0) where {
                                                                                           T <:
                                                                                           Real
                                                                                           }
@@ -37,8 +35,8 @@ mutable struct NonAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
 end
 
 # default to Float64
-SciMLBase.@add_kwonly function NonAdaptiveLoss(; pde_loss_weights = 1, bc_loss_weights = 1,
-                                               additional_loss_weights = 1)
+SciMLBase.@add_kwonly function NonAdaptiveLoss(; pde_loss_weights = 1.0, bc_loss_weights = 1.0,
+                                               additional_loss_weights = 1.0)
     NonAdaptiveLoss{Float64}(; pde_loss_weights = pde_loss_weights,
                              bc_loss_weights = bc_loss_weights,
                              additional_loss_weights = additional_loss_weights)
@@ -53,17 +51,14 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
 end
 
 """
-```julia
-GradientScaleAdaptiveLoss(reweight_every;
-                          weight_change_inertia = 0.9,
-                          pde_loss_weights = 1,
-                          bc_loss_weights = 1,
-                          additional_loss_weights = 1)
-```
+    GradientScaleAdaptiveLoss(reweight_every;
+                            weight_change_inertia = 0.9,
+                            pde_loss_weights = 1.0,
+                            bc_loss_weights = 1.0,
+                            additional_loss_weights = 1.0)
 
 A way of adaptively reweighting the components of the loss function in the total sum such
-that BC_i loss weights are scaled by the exponential moving average of
-max(|∇pde_loss|)/mean(|∇bc_i_loss|) )
+that BC_i loss weights are scaled by the exponential moving average of max(|∇pde_loss|) / mean(|∇bc_i_loss|)).
 
 ## Positional Arguments
 
@@ -93,9 +88,9 @@ mutable struct GradientScaleAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss{T}(reweight_every;
                                                                 weight_change_inertia = 0.9,
-                                                                pde_loss_weights = 1,
-                                                                bc_loss_weights = 1,
-                                                                additional_loss_weights = 1) where {
+                                                                pde_loss_weights = 1.0,
+                                                                bc_loss_weights = 1.0,
+                                                                additional_loss_weights = 1.0) where {
                                                                                                     T <:
                                                                                                     Real
                                                                                                     }
@@ -107,9 +102,9 @@ end
 # default to Float64
 SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss(reweight_every;
                                                          weight_change_inertia = 0.9,
-                                                         pde_loss_weights = 1,
-                                                         bc_loss_weights = 1,
-                                                         additional_loss_weights = 1)
+                                                         pde_loss_weights = 1.0,
+                                                         bc_loss_weights = 1.0,
+                                                         additional_loss_weights = 1.0)
     GradientScaleAdaptiveLoss{Float64}(reweight_every;
                                        weight_change_inertia = weight_change_inertia,
                                        pde_loss_weights = pde_loss_weights,
@@ -156,18 +151,16 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
 end
 
 """
-```julia
-function MiniMaxAdaptiveLoss(reweight_every;
-                             pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-                             bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                             pde_loss_weights = 1,
-                             bc_loss_weights = 1,
-                             additional_loss_weights = 1)
-```
+    function MiniMaxAdaptiveLoss(reweight_every;
+                                pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+                                bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
+                                pde_loss_weights = 1,
+                                bc_loss_weights = 1,
+                                additional_loss_weights = 1)
 
 A way of adaptively reweighting the components of the loss function in the total sum such
 that the loss weights are maximized by an internal optimizer, which leads to a behavior
-where loss functions that have not been satisfied get a greater weight,
+where loss functions that have not been satisfied get a greater weight.
 
 ## Positional Arguments
 
@@ -202,9 +195,9 @@ mutable struct MiniMaxAdaptiveLoss{T <: Real,
                                                        PDE_OPT, BC_OPT}(reweight_every;
                                                                         pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
                                                                         bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                                                                        pde_loss_weights = 1,
-                                                                        bc_loss_weights = 1,
-                                                                        additional_loss_weights = 1) where {
+                                                                        pde_loss_weights = 1.0,
+                                                                        bc_loss_weights = 1.0,
+                                                                        additional_loss_weights = 1.0) where {
                                                                                                             T <:
                                                                                                             Real,
                                                                                                             PDE_OPT,
@@ -221,9 +214,9 @@ end
 SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss(reweight_every;
                                                    pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
                                                    bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                                                   pde_loss_weights = 1,
-                                                   bc_loss_weights = 1,
-                                                   additional_loss_weights = 1)
+                                                   pde_loss_weights = 1.0,
+                                                   bc_loss_weights = 1.0,
+                                                   additional_loss_weights = 1.0)
     MiniMaxAdaptiveLoss{Float64, typeof(pde_max_optimiser),
                         typeof(bc_max_optimiser)}(reweight_every;
                                                   pde_max_optimiser = pde_max_optimiser,
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index ee3718842c..9dd22cceb2 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -65,8 +65,8 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
 end
 
 """
-function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
-the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging
+Function needed for converting vector of sampled parameters into ComponentVector in case of Lux chain output, derivatives
+the sampled parameters are of exotic type `Dual` due to ForwardDiff's autodiff tagging.
 """
 function vector_to_parameters(ps_new::AbstractVector,
         ps::Union{NamedTuple, ComponentArrays.ComponentVector})
@@ -93,7 +93,7 @@ function LogDensityProblems.capabilities(::LogTargetDensity)
 end
 
 """
-L2 loss loglikelihood(needed for ODE parameter estimation)
+L2 loss loglikelihood(needed for ODE parameter estimation).
 """
 function L2LossData(Tar::LogTargetDensity, θ)
     # check if dataset is provided
@@ -116,7 +116,7 @@ function L2LossData(Tar::LogTargetDensity, θ)
 end
 
 """
-physics loglikelihood over problem timespan + dataset timepoints
+Physics loglikelihood over problem timespan + dataset timepoints.
 """
 function physloglikelihood(Tar::LogTargetDensity, θ)
     f = Tar.prob.f
@@ -214,7 +214,7 @@ function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
 end
 
 """
-MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
 function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
     ode_params)
@@ -253,7 +253,7 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
 end
 
 """
-prior logpdf for NN parameters + ODE constants
+Prior logpdf for NN parameters + ODE constants.
 """
 function priorweights(Tar::LogTargetDensity, θ)
     allparams = Tar.priors
@@ -286,7 +286,7 @@ function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
 end
 
 """
-nn OUTPUT AT t,θ ~ phi(t,θ)
+NN OUTPUT AT t,θ ~ phi(t,θ).
 """
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
     θ) where {C <: Lux.AbstractExplicitLayer, S}
@@ -305,7 +305,7 @@ function (f::LogTargetDensity{C, S})(t::Number,
 end
 
 """
-similar to ode_dfdx() in NNODE/ode_solve.jl
+Similar to ode_dfdx() in NNODE.
 """
 function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
     if autodiff
@@ -330,18 +330,18 @@ function kernelchoice(Kernel, MCMCkwargs)
 end
 
 """
-```julia
-ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                    dataset = [nothing],init_params = nothing, 
-                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
-                    phystd = [0.05], priorsNNw = (0.0, 2.0),
-                    param = [], nchains = 1, autodiff = false, Kernel = HMC,
-                    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-                    Integratorkwargs = (Integrator = Leapfrog,),
-                    MCMCkwargs = (n_leapfrog = 30,),
-                    progress = false, verbose = false)
-```
+    ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                        dataset = [nothing],init_params = nothing, 
+                        draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                        phystd = [0.05], priorsNNw = (0.0, 2.0),
+                        param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                                         Metric = DiagEuclideanMetric, 
+                                         targetacceptancerate = 0.8),
+                        Integratorkwargs = (Integrator = Leapfrog,),
+                        MCMCkwargs = (n_leapfrog = 30,),
+                        progress = false, verbose = false)
+
 !!! warn
 
     Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
@@ -349,24 +349,26 @@ ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
     will exit with an error.
 
 ## Example
+
+```julia
 linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
 tspan = (0.0, 10.0)
 u0 = 0.0
 p = [5.0, -5.0]
 prob = ODEProblem(linear, u0, tspan, p)
 
-# CREATE DATASET (Necessity for accurate Parameter estimation)
+### CREATE DATASET (Necessity for accurate Parameter estimation)
 sol = solve(prob, Tsit5(); saveat = 0.05)
 u = sol.u[1:100]
 time = sol.t[1:100]
 
-# dataset and BPINN create
+### dataset and BPINN create
 x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
 dataset = [x̂, time]
 
 chain1 = Lux.Chain(Lux.Dense(1, 5, tanh), Lux.Dense(5, 5, tanh), Lux.Dense(5, 1)
 
-# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+### simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
 fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
                                                             dataset = dataset,
                                                             draw_samples = 1500,
@@ -374,7 +376,7 @@ fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chain1,
                                                             phystd = [0.05],
                                                             priorsNNw = (0.0,3.0))
 
-# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+### solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
 fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
                                                             dataset = dataset,
                                                             draw_samples = 1500,
@@ -382,16 +384,20 @@ fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chain1,
                                                             phystd = [0.05],
                                                             priorsNNw = (0.0,3.0),
                                                             param = [Normal(6.5,0.5), Normal(-3,0.5)])
+```
+
+## NOTES
 
-## NOTES 
 Dataset is required for accurate Parameter estimation + solving equations
 Incase you are only solving the Equations for solution, do not provide dataset
 
 ## Positional Arguments
-* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
-* `chain`: Lux Neural Netork which would be made the Bayesian PINN
+
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t).
+* `chain`: Lux Neural Netork which would be made the Bayesian PINN.
 
 ## Keyword Arguments
+
 * `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
 * `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
 * `nchains`: number of chains you want to sample
@@ -417,6 +423,7 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * `verbose`: controls the verbosity. (Sample call args in AHMC)
 
 ## Warnings
+
 * AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index 755a3abbe0..0415e77c29 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -1,11 +1,9 @@
 """
-```julia
-NNDAE(chain,
-    OptimizationOptimisers.Adam(0.1),
-    init_params = nothing;
-    autodiff = false,
-    kwargs...)
-```
+    NNDAE(chain,
+        OptimizationOptimisers.Adam(0.1),
+        init_params = nothing;
+        autodiff = false,
+        kwargs...)
 
 Algorithm for solving differential algebraic equationsusing a neural network. This is a specialization
 of the physics-informed neural network which is used as a solver for a standard `DAEProblem`.
@@ -24,6 +22,7 @@ of the physics-informed neural network which is used as a solver for a standard
   which thus uses the random initialization provided by the neural network library.
 
 ## Keyword Arguments
+
 * `autodiff`: The switch between automatic(not supported yet) and numerical differentiation for
               the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
diff --git a/src/discretize.jl b/src/discretize.jl
index 57b13b5eb6..af035980b3 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -1,5 +1,5 @@
 """
-Build a loss function for a PDE or a boundary condition
+Build a loss function for a PDE or a boundary condition.
 
 # Examples: System of PDEs:
 
@@ -23,7 +23,7 @@ to
           end
       end)
 
-for Lux.AbstractExplicitLayer
+for Lux.AbstractExplicitLayer.
 """
 function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
                                       eq_params = SciMLBase.NullParameters(),
@@ -141,9 +141,7 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
 end
 
 """
-```julia
-build_loss_function(eqs, indvars, depvars, phi, derivative, init_params; bc_indvars=nothing)
-```
+    build_loss_function(eqs, indvars, depvars, phi, derivative, init_params; bc_indvars=nothing)
 
 Returns the body of loss function, which is the executable Julia function, for the main
 equation or boundary condition.
@@ -166,9 +164,7 @@ function build_loss_function(pinnrep::PINNRepresentation, eqs, bc_indvars)
 end
 
 """
-```julia
-generate_training_sets(domains,dx,bcs,_indvars::Array,_depvars::Array)
-```
+    generate_training_sets(domains,dx,bcs,_indvars::Array,_depvars::Array)
 
 Returns training sets for equations and boundary condition, that is used for GridTraining
 strategy.
@@ -236,9 +232,7 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
 end
 
 """
-```julia
-get_bounds(domains,bcs,_indvars::Array,_depvars::Array)
-```
+    get_bounds(domains,bcs,_indvars::Array,_depvars::Array)
 
 Returns pairs with lower and upper bounds for all domains. It is used for all non-grid
 training strategy: StochasticTraining, QuasiRandomTraining, QuadratureTraining.
@@ -359,9 +353,7 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
 end
 
 """
-```julia
-prob = symbolic_discretize(pde_system::PDESystem, discretization::AbstractPINN)
-```
+    prob = symbolic_discretize(pde_system::PDESystem, discretization::AbstractPINN)
 
 `symbolic_discretize` is the lower level interface to `discretize` for inspecting internals.
 It transforms a symbolic description of a ModelingToolkit-defined `PDESystem` into a
@@ -706,9 +698,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 end
 
 """
-```julia
-prob = discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
-```
+    prob = discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
 
 Transforms a symbolic description of a ModelingToolkit-defined `PDESystem` and generates
 an `OptimizationProblem` for [Optimization.jl](https://docs.sciml.ai/Optimization/stable/) whose
diff --git a/src/neural_adapter.jl b/src/neural_adapter.jl
index 1e97c9eb70..8b8ae68c97 100644
--- a/src/neural_adapter.jl
+++ b/src/neural_adapter.jl
@@ -102,9 +102,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::QuadratureT
 end
 
 """
-```julia
-neural_adapter(loss, init_params, pde_system, strategy)
-```
+    neural_adapter(loss, init_params, pde_system, strategy)
 
 Trains a neural network using the results from one already obtained prediction.
 
diff --git a/src/symbolic_utilities.jl b/src/symbolic_utilities.jl
index 9161f3c365..b4d4c97f3a 100644
--- a/src/symbolic_utilities.jl
+++ b/src/symbolic_utilities.jl
@@ -3,7 +3,7 @@ using Base.Broadcast
 """
 Override `Broadcast.__dot__` with `Broadcast.dottable(x::Function) = true`
 
-# Example
+## Example
 
 ```julia
 julia> e = :(1 + $sin(x))
@@ -53,14 +53,14 @@ end
 """
 Create dictionary: variable => unique number for variable
 
-# Example 1
+## Example 1
 
 Dict{Symbol,Int64} with 3 entries:
   :y => 2
   :t => 3
   :x => 1
 
-# Example 2
+## Example 2
 
  Dict{Symbol,Int64} with 2 entries:
   :u1 => 1
@@ -101,18 +101,18 @@ end
 """
 Transform the derivative expression to inner representation
 
-# Examples
+## Examples
 
 1. First compute the derivative of function 'u(x,y)' with respect to x.
 
 Take expressions in the form: `derivative(u(x,y), x)` to `derivative(phi, u, [x, y], εs, order, θ)`,
 where
- phi - trial solution
- u - function
- x,y - coordinates of point
- εs - epsilon mask
- order - order of derivative
- θ - weight in neural network
+- phi - trial solution.
+- u - function.
+- x,y - coordinates of point.
+- εs - epsilon mask.
+- order - order of derivative.
+- θ - weights in neural network.
 """
 function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = false,
                                dict_transformation_vars = nothing,
@@ -279,7 +279,7 @@ end
 """
 Parse ModelingToolkit equation form to the inner representation.
 
-Example:
+## Examples:
 
 1)  1-D ODE: Dt(u(t)) ~ t +1
 
@@ -380,9 +380,7 @@ function get_integration_variables(eqs, dict_indvars, dict_depvars)
 end
 
 """
-``julia
-get_variables(eqs,_indvars,_depvars)
-```
+    get_variables(eqs,_indvars,_depvars)
 
 Returns all variables that are used in each equations or boundary condition.
 """
@@ -420,9 +418,7 @@ function find_thing_in_expr(ex::Expr, thing; ans = [])
 end
 
 """
-```julia
-get_argument(eqs,_indvars::Array,_depvars::Array)
-```
+    get_argument(eqs,_indvars::Array,_depvars::Array)
 
 Returns all arguments that are used in each equations or boundary condition.
 """
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index a74a1bed98..a419afcdbf 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -1,7 +1,5 @@
 """
-```julia
-GridTraining(dx)
-```
+    GridTraining(dx)
 
 A training strategy that uses the grid points in a multidimensional grid
 with spacings `dx`. If the grid is multidimensional, then `dx` is expected
@@ -83,9 +81,7 @@ function get_loss_function(loss_function, train_set, eltypeθ, strategy::GridTra
 end
 
 """
-```julia
-StochasticTraining(points; bcs_points = points)
-```
+    StochasticTraining(points; bcs_points = points)
 
 ## Positional Arguments
 
@@ -144,11 +140,10 @@ function get_loss_function(loss_function, bound, eltypeθ, strategy::StochasticT
 end
 
 """
-```julia
-QuasiRandomTraining(points; bcs_points = points,
-                            sampling_alg = LatinHypercubeSample(), resampling = true,
-                            minibatch = 0)
-```
+    QuasiRandomTraining(points; bcs_points = points,
+                                sampling_alg = LatinHypercubeSample(), resampling = true,
+                                minibatch = 0)
+
 
 A training strategy which uses quasi-Monte Carlo sampling for low discrepancy sequences
 that accelerate the convergence in high dimensional spaces over pure random sequences.
@@ -168,7 +163,7 @@ that accelerate the convergence in high dimensional spaces over pure random sequ
    points is generated directly at each iteration in runtime. In this case, `minibatch` has no effect,
 * `minibatch`: the number of subsets, if resampling == false.
 
-For more information, see [QuasiMonteCarlo.jl](https://docs.sciml.ai/QuasiMonteCarlo/stable/)
+For more information, see [QuasiMonteCarlo.jl](https://docs.sciml.ai/QuasiMonteCarlo/stable/).
 """
 struct QuasiRandomTraining <: AbstractTrainingStrategy
     points::Int64
@@ -248,11 +243,9 @@ function get_loss_function(loss_function, bound, eltypeθ, strategy::QuasiRandom
 end
 
 """
-```julia
-QuadratureTraining(; quadrature_alg = CubatureJLh(),
-                     reltol = 1e-6, abstol = 1e-3,
-                     maxiters = 1_000, batch = 100)
-```
+    QuadratureTraining(; quadrature_alg = CubatureJLh(),
+                        reltol = 1e-6, abstol = 1e-3,
+                        maxiters = 1_000, batch = 100)
 
 A training strategy which treats the loss function as the integral of
 ||condition|| over the domain. Uses an Integrals.jl algorithm for
@@ -335,9 +328,7 @@ function get_loss_function(loss_function, lb, ub, eltypeθ, strategy::Quadrature
 end
 
 """
-```julia
-WeightedIntervalTraining(weights, samples)
-```
+    WeightedIntervalTraining(weights, samples)
 
 A training strategy that generates points for training based on the given inputs. 
 We split the timespan into equal segments based on the number of weights, 

From c303e130983120c34c6db4fd2b40ac195d06a2da Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 1 Feb 2024 13:00:46 +0000
Subject: [PATCH 16/16] refactor: PhysicsInformedNN check `phi` for Lux

---
 src/pinn_types.jl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 925070a9e0..3ad4e8d91a 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -116,7 +116,11 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
                 _phi = Phi(chain)
             end
         else
-            !(phi.f isa Lux.AbstractExplicitLayer) && throw(ArgumentError("Only Lux Chains are supported"))
+            if multioutput
+                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) || throw(ArgumentError("Only Lux Chains are supported"))
+            else
+                (phi.f isa Lux.AbstractExplicitLayer) || throw(ArgumentError("Only Lux Chains are supported"))
+            end
             _phi = phi
         end