diff --git a/.JuliaFormatter b/.JuliaFormatter.toml
similarity index 100%
rename from .JuliaFormatter
rename to .JuliaFormatter.toml
diff --git a/docs/make.jl b/docs/make.jl
index 4694148..94e832c 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,8 +1,7 @@
 ### Process examples
 using Pkg
-Pkg.add(
-    Pkg.PackageSpec(; url = "https://github.com/JuliaGaussianProcesses/JuliaGPsDocs.jl"),
-) # While the package is unregistered, it's a workaround
+# While the package is unregistered, it's a workaround
+Pkg.add(Pkg.PackageSpec(; url="https://github.com/JuliaGaussianProcesses/JuliaGPsDocs.jl"))
 
 using JuliaGPsDocs
 
@@ -20,20 +19,19 @@ DocMeta.setdocmeta!(
     quote
         using EasyGPs
     end;  # we have to load all packages used (implicitly) within jldoctest blocks in the API docstrings
-    recursive = true,
+    recursive=true,
 )
 
 makedocs(;
-    sitename = "EasyGPs.jl",
-    format = Documenter.HTML(; size_threshold_ignore = ["examples/0-mauna-loa/index.md"]),
-    modules = [EasyGPs],
-    pages = [
-        "Home" => "index.md",
-        "Examples" => JuliaGPsDocs.find_generated_examples(EasyGPs),
+    sitename="EasyGPs.jl",
+    format=Documenter.HTML(; size_threshold_ignore=["examples/0-mauna-loa/index.md"]),
+    modules=[EasyGPs],
+    pages=[
+        "Home" => "index.md", "Examples" => JuliaGPsDocs.find_generated_examples(EasyGPs)
     ],
-    warnonly = true,
-    checkdocs = :exports,
-    doctestfilters = JuliaGPsDocs.DOCTEST_FILTERS,
+    warnonly=true,
+    checkdocs=:exports,
+    doctestfilters=JuliaGPsDocs.DOCTEST_FILTERS,
 )
 
-deploydocs(; repo = "github.com/JuliaGaussianProcesses/EasyGPs.jl.git", push_preview = true)
+deploydocs(; repo="github.com/JuliaGaussianProcesses/EasyGPs.jl.git", push_preview=true)
diff --git a/examples/0-mauna-loa/script.jl b/examples/0-mauna-loa/script.jl
index 1869b5f..b7a0599 100644
--- a/examples/0-mauna-loa/script.jl
+++ b/examples/0-mauna-loa/script.jl
@@ -17,7 +17,7 @@ using Plots  # visualisation
 # Let's load and visualize the dataset.
 
 (xtrain, ytrain), (xtest, ytest) = let
-    data = CSV.read(joinpath(@__DIR__, "CO2_data.csv"), Tables.matrix; header = 0)
+    data = CSV.read(joinpath(@__DIR__, "CO2_data.csv"), Tables.matrix; header=0)
     year = data[:, 1]
     co2 = data[:, 2]
 
@@ -29,9 +29,9 @@ using Plots  # visualisation
 end
 
 function plotdata()
-    plot(; xlabel = "year", ylabel = "CO₂ [ppm]", legend = :bottomright)
-    scatter!(xtrain, ytrain; label = "training data", ms = 2, markerstrokewidth = 0)
-    return scatter!(xtest, ytest; label = "test data", ms = 2, markerstrokewidth = 0)
+    plot(; xlabel="year", ylabel="CO₂ [ppm]", legend=:bottomright)
+    scatter!(xtrain, ytrain; label="training data", ms=2, markerstrokewidth=0)
+    return scatter!(xtest, ytest; label="test data", ms=2, markerstrokewidth=0)
 end
 
 plotdata()
@@ -43,9 +43,9 @@ plotdata()
 
 k_smooth_trend = exp(8.0) * with_lengthscale(SEKernel(), exp(4.0))#with_lengthscale(SEKernel(), exp(4.0))
 k_seasonality =
-    exp(2.0) * PeriodicKernel(; r = [0.5]) * with_lengthscale(SEKernel(), exp(4.0))
+    exp(2.0) * PeriodicKernel(; r=[0.5]) * with_lengthscale(SEKernel(), exp(4.0))
 k_medium_term_irregularities =
-    1.0 * with_lengthscale(RationalQuadraticKernel(; α = exp(-1.0)), 1.0)
+    1.0 * with_lengthscale(RationalQuadraticKernel(; α=exp(-1.0)), 1.0)
 k_noise_terms =
     exp(-4.0) * with_lengthscale(SEKernel(), exp(-2.0)) + exp(-4.0) * WhiteKernel()
 kernel = k_smooth_trend + k_seasonality + k_medium_term_irregularities + k_noise_terms
@@ -71,11 +71,11 @@ fpost_init = posterior(gp(xtrain), ytrain)
 # By setting `ribbon_scale=2` we visualize the uncertainty band with ``\pm 2``
 # (instead of the default ``\pm 1``) standard deviations.
 
-plot_gp!(f; label) = plot!(f(1920:0.2:2030); ribbon_scale = 2, linewidth = 1, label)
+plot_gp!(f; label) = plot!(f(1920:0.2:2030); ribbon_scale=2, linewidth=1, label)
 #md nothing #hide
 
 plotdata()
-plot_gp!(fpost_init; label = "posterior f(⋅)")
+plot_gp!(fpost_init; label="posterior f(⋅)")
 
 # A reasonable fit to the data, but poor extrapolation away from the observations!
 
@@ -89,9 +89,9 @@ plot_gp!(fpost_init; label = "posterior f(⋅)")
     gp,
     xtrain,
     ytrain;
-    optimizer = Optim.LBFGS(;
-        alphaguess = Optim.LineSearches.InitialStatic(; scaled = true),
-        linesearch = Optim.LineSearches.BackTracking(),
+    optimizer=Optim.LBFGS(;
+        alphaguess=Optim.LineSearches.InitialStatic(; scaled=true),
+        linesearch=Optim.LineSearches.BackTracking(),
     ),
 )
 #md nothing #hide
@@ -108,4 +108,4 @@ fpost_opt.prior.kernel
 # And, finally, we can visualize our optimized posterior GP:
 
 plotdata()
-plot_gp!(fpost_opt; label = "optimized posterior f(⋅)")
+plot_gp!(fpost_opt; label="optimized posterior f(⋅)")
diff --git a/src/EasyGPs.jl b/src/EasyGPs.jl
index f44f0dc..83bc9c1 100644
--- a/src/EasyGPs.jl
+++ b/src/EasyGPs.jl
@@ -5,10 +5,11 @@ using Reexport
 @reexport using AbstractGPs
 @reexport using GPLikelihoods
 
-import Optimization
+using Optimization: Optimization
 @reexport import OptimizationOptimJL: Optim
-import ParameterHandling
-import Enzyme, Zygote
+using ParameterHandling: ParameterHandling
+using Enzyme: Enzyme
+using Zygote: Zygote
 
 using ApproximateGPs
 using Distributions: MvNormal
@@ -61,14 +62,13 @@ Takes a callable `model` and returns the optimal parameter, starting with initia
 `θ0`. In order to work, there needs to be an implementation of `EasyGPs.costfunction` taking
 two arguments, the first of which is of type `typeof(model(θ0))`.
 """
-function optimize(model, θ0, data; iterations = 1000, optimizer = Optim.BFGS(), kwargs...)
+function optimize(model, θ0, data; iterations=1000, optimizer=Optim.BFGS(), kwargs...)
     par0, unflatten = ParameterHandling.flatten(θ0)
     optf = Optimization.OptimizationFunction(
-        (par, data) -> costfunction(model(unflatten(par)), data),
-        Optimization.AutoZygote(),
+        (par, data) -> costfunction(model(unflatten(par)), data), Optimization.AutoZygote()
     )
     prob = Optimization.OptimizationProblem(optf, par0, data)
-    sol = Optimization.solve(prob, optimizer; maxiters = iterations)
+    sol = Optimization.solve(prob, optimizer; maxiters=iterations)
     return unflatten(sol.u)
 end
 
@@ -80,8 +80,6 @@ roughly speaking the objects must be of the same type and have the same paramete
 """
 _isequal(::T1, ::T2) where {T1,T2} = false
 
-
-
 # Mean functions
 extract_parameters(::ZeroMean) = nothing
 apply_parameters(m::ZeroMean, θ) = m
@@ -91,8 +89,6 @@ extract_parameters(m::ConstMean) = m.c
 apply_parameters(::ConstMean, θ) = ConstMean(θ)
 _isequal(m1::ConstMean, m2::ConstMean) = isapprox(m1.c, m2.c)
 
-
-
 # Simple kernels
 KernelsWithoutParameters = Union{SEKernel,Matern32Kernel,Matern52Kernel,WhiteKernel}
 
@@ -101,16 +97,15 @@ apply_parameters(k::T, θ) where {T<:KernelsWithoutParameters} = k
 _isequal(k1::T, k2::T) where {T<:KernelsWithoutParameters} = true
 
 extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(only(k.r))
-apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(r = [θ])
+apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=[θ])
 _isequal(k1::T, k2::T) where {T<:PeriodicKernel} = k1.r ≈ k2.r
 
 extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(only(k.α))
-apply_parameters(k::RationalQuadraticKernel, θ) =
-    RationalQuadraticKernel(; α = θ, metric = k.metric)
+function apply_parameters(k::RationalQuadraticKernel, θ)
+    return RationalQuadraticKernel(; α=θ, metric=k.metric)
+end
 _isequal(k1::T, k2::T) where {T<:RationalQuadraticKernel} = true
 
-
-
 # Composite kernels
 extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels)
 apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ))
@@ -118,8 +113,9 @@ _isequal(k1::KernelSum, k2::KernelSum) = mapreduce(_isequal, &, k1.kernels, k2.k
 
 extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels)
 apply_parameters(k::KernelProduct, θ) = KernelProduct(map(apply_parameters, k.kernels, θ))
-_isequal(k1::KernelProduct, k2::KernelProduct) =
-    mapreduce(_isequal, &, k1.kernels, k2.kernels)
+function _isequal(k1::KernelProduct, k2::KernelProduct)
+    return mapreduce(_isequal, &, k1.kernels, k2.kernels)
+end
 
 function extract_parameters(k::TransformedKernel)
     return (extract_parameters(k.kernel), extract_parameters(k.transform))
@@ -127,8 +123,7 @@ end
 
 function apply_parameters(k::TransformedKernel, θ)
     return TransformedKernel(
-        apply_parameters(k.kernel, θ[1]),
-        apply_parameters(k.transform, θ[2]),
+        apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2])
     )
 end
 
@@ -148,15 +143,11 @@ function _isequal(k1::ScaledKernel, k2::ScaledKernel)
     return _isequal(k1.kernel, k2.kernel) && isapprox(k1.σ², k2.σ²)
 end
 
-
-
 # Transforms
 extract_parameters(t::ScaleTransform) = ParameterHandling.positive(only(t.s))
 apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ)
 _isequal(t1::ScaleTransform, t2::ScaleTransform) = isapprox(t1.s, t2.s)
 
-
-
 # Likelihoods
 extract_parameters(::BernoulliLikelihood) = nothing
 apply_parameters(l::BernoulliLikelihood, θ) = l
@@ -166,20 +157,18 @@ extract_parameters(::PoissonLikelihood) = nothing
 apply_parameters(l::PoissonLikelihood, θ) = l
 _isequal(l1::T, l2::T) where {T<:PoissonLikelihood} = true
 
-
-
 # GPs
 extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel))
-apply_parameters(f::GP, θ) =
-    GP(apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]))
+function apply_parameters(f::GP, θ)
+    return GP(apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]))
+end
 costfunction(f::GP, data) = -logpdf(f(data.x), data.y)
 _isequal(f1::GP, f2::GP) = _isequal(f1.mean, f2.mean) && _isequal(f1.kernel, f2.kernel)
 
 extract_parameters(f::LatentGP) = (extract_parameters(f.f), extract_parameters(f.lik))
-apply_parameters(f::LatentGP, θ) =
-    LatentGP(apply_parameters(f.f, θ[1]), apply_parameters(f.lik, θ[2]), f.Σy)
-
-
+function apply_parameters(f::LatentGP, θ)
+    return LatentGP(apply_parameters(f.f, θ[1]), apply_parameters(f.lik, θ[2]), f.Σy)
+end
 
 # Approximations
 const SVA = SparseVariationalApproximation
@@ -196,17 +185,13 @@ function apply_parameters(sva::SVA, θ)
     return SVA(fz, q)
 end
 
-variational_gaussian(n::Int, T = Float64) = MvNormal(zeros(T, n), Matrix{T}(I, n, n))
-
-
+variational_gaussian(n::Int, T=Float64) = MvNormal(zeros(T, n), Matrix{T}(I, n, n))
 
 # Distributions
 extract_parameters(d::MvNormal) = (d.μ, ParameterHandling.positive_definite(d.Σ))
 apply_parameters(::MvNormal, θ) = MvNormal(θ[1], θ[2])
 _isequal(d1::MvNormal, d2::MvNormal) = isapprox(d1.μ, d1.μ) && isapprox(d1.Σ, d2.Σ)
 
-
-
 # Custom wrappers
 struct NoisyGP{T<:GP,Tn<:Real}
     gp::T
@@ -217,12 +202,14 @@ end
 
 with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise)
 
-extract_parameters(f::NoisyGP) =
-    (extract_parameters(f.gp), ParameterHandling.positive(f.obs_noise, exp, 1e-6))
+function extract_parameters(f::NoisyGP)
+    return (extract_parameters(f.gp), ParameterHandling.positive(f.obs_noise, exp, 1e-6))
+end
 apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2])
 costfunction(f::NoisyGP, data) = -logpdf(f(data.x), data.y)
-_isequal(f1::NoisyGP, f2::NoisyGP) =
-    _isequal(f1.gp, f2.gp) && isapprox(f1.obs_noise, f2.obs_noise)
+function _isequal(f1::NoisyGP, f2::NoisyGP)
+    return _isequal(f1.gp, f2.gp) && isapprox(f1.obs_noise, f2.obs_noise)
+end
 
 struct SVGP{T<:LatentGP,Ts<:SVA}
     lgp::T
diff --git a/test/integration_tests.jl b/test/integration_tests.jl
index b673da6..b388023 100644
--- a/test/integration_tests.jl
+++ b/test/integration_tests.jl
@@ -4,7 +4,7 @@
     gp = GP(3.0, kernel)
     x = 0.01:0.01:1.0
     y = rand(gp(x, 0.1))
-    fitted_gp = EasyGPs.fit(gp, x, y; iterations = 1)
+    fitted_gp = EasyGPs.fit(gp, x, y; iterations=1)
     @test fitted_gp isa typeof(gp)
     @test !EasyGPs._isequal(fitted_gp, gp)
 end
@@ -14,7 +14,7 @@ end
     gp = with_gaussian_noise(GP(3.0, kernel), 0.1)
     x = 0.01:0.01:1.0
     y = rand(gp.gp(x, 0.1))
-    fitted_gp = EasyGPs.fit(gp, x, y; iterations = 1)
+    fitted_gp = EasyGPs.fit(gp, x, y; iterations=1)
     @test fitted_gp isa typeof(gp)
     @test !EasyGPs._isequal(fitted_gp, gp)
 end
@@ -22,12 +22,12 @@ end
 @testitem "Sparse variational 2d GP with Poisson likelihood" begin
     kernel = 1.0 * SEKernel()
     lgp = LatentGP(GP(0.0, kernel), PoissonLikelihood(), 1e-6)
-    x = rand(100, 2) |> RowVecs
+    x = RowVecs(rand(100, 2))
     y = round.(Int, 10 .* sum.(abs2, x))
     z = x[begin:5:end]
     sva = SVA(lgp(z).fx, variational_gaussian(length(z)))
-    svgp = SVGP(lgp, sva; fixed_inducing_points = true)
-    fitted_svgp = EasyGPs.fit(svgp, x, y; iterations = 1)
+    svgp = SVGP(lgp, sva; fixed_inducing_points=true)
+    fitted_svgp = EasyGPs.fit(svgp, x, y; iterations=1)
     @test fitted_svgp isa typeof(svgp)
     @test !EasyGPs._isequal(fitted_svgp, svgp)
     @test fitted_svgp.sva.fz.x === z
diff --git a/test/unit_tests.jl b/test/unit_tests.jl
index e8a6b1a..7d4a6af 100644
--- a/test/unit_tests.jl
+++ b/test/unit_tests.jl
@@ -11,7 +11,7 @@
 end
 
 @testitem "parameterize" begin
-    import ParameterHandling
+    using ParameterHandling: ParameterHandling
     for object in (
         ZeroMean(),
         ConstMean(1.0),