JuliaDiff · nickrobinson251 · Jan 28, 2020 · Jan 26, 2020 · Jan 26, 2020 · nickrobinson251
diff --git a/Project.toml b/Project.toml
@@ -10,7 +10,7 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
-ChainRulesCore = "0.6"
+ChainRulesCore = "0.6.1"
 FiniteDifferences = "^0.7"
 Reexport = "0.2"
 Requires = "0.5.2, 1"

diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@
 [![Travis](https://travis-ci.org/JuliaDiff/ChainRules.jl.svg?branch=master)](https://travis-ci.org/JuliaDiff/ChainRules.jl)
 [![Coveralls](https://coveralls.io/repos/github/JuliaDiff/ChainRules.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaDiff/ChainRules.jl?branch=master)
 [![PkgEval](https://juliaci.github.io/NanosoldierReports/pkgeval_badges/C/ChainRules.svg)](https://juliaci.github.io/NanosoldierReports/pkgeval_badges/report.html)
+[![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
 
 **Docs:**
 [![](https://img.shields.io/badge/docs-master-blue.svg)](https://JuliaDiff.github.io/ChainRulesCore.jl/dev)
@@ -14,7 +15,7 @@ The ChainRules package provides a variety of common utilities that can be used b
 
 The core logic of ChainRules is implemented in [ChainRulesCore.jl](https://github.com/JuliaDiff/ChainRulesCore.jl).
 To add ChainRules support to your package, by defining new `rrule`s or `frules`, you only need to depend on the very light-weight package ChainRulesCore.jl.
-This repository contains ChainRules.jl, which is what people actually use directly. 
+This repository contains ChainRules.jl, which is what people actually use directly.
 ChainRules reexports all the ChainRulesCore functionality, and has all the rules for the Julia standard library.
 
 
@@ -24,6 +25,3 @@ Here are some of the core features of the package:
 - Extensible rules: package authors can add rules (and thus AD support) to the functions in their packages, without needing to make a PR to ChainRules.jl .
 - Control-inverted design: rule authors can fully specify derivatives in a concise manner that supports computational efficiencies, so we will only compute as much as the user requests.
 - Propagation semantics built-in, with default implementations that allow rule authors to easily opt-in to common optimizations (fusion, increment elision, memoization, etc.).
-
-
-The ChainRules source code follows the [YASGuide](https://github.com/jrevels/YASGuide).
diff --git a/src/ChainRules.jl b/src/ChainRules.jl
@@ -1,16 +1,18 @@
 module ChainRules
+
 using Reexport
 @reexport using ChainRulesCore
-# Basically everything this package does is overloading these, so we make an exception
-# to the normal rule of only overload via `ChainRulesCore.rrule`.
-import ChainRulesCore: rrule, frule
-using ChainRulesCore: AbstractZero
 
+using Base.Broadcast: materialize, materialize!, broadcasted, Broadcasted, broadcastable
 using LinearAlgebra
 using LinearAlgebra.BLAS
 using Requires
 using Statistics
-using Base.Broadcast: materialize, materialize!, broadcasted, Broadcasted, broadcastable
+
+# Basically everything this package does is overloading these, so we make an exception
+# to the normal rule of only overload via `ChainRulesCore.rrule`.
+import ChainRulesCore: rrule, frule
+
 
 if VERSION < v"1.3.0-DEV.142"
     # In prior versions, the BLAS submodule also exported `dot`, which caused a conflict

diff --git a/src/rulesets/Base/base.jl b/src/rulesets/Base/base.jl
@@ -2,62 +2,64 @@
 @scalar_rule(zero(x), Zero())
 @scalar_rule(sign(x), Zero())
 
+@scalar_rule(abs(x::Real), sign(x))
 @scalar_rule(abs2(x), 2x)
+@scalar_rule(exp(x), Ω)
+@scalar_rule(exp10(x), Ω * log(oftype(x, 10)))
+@scalar_rule(exp2(x), Ω * log(oftype(x, 2)))
+@scalar_rule(expm1(x), exp(x))
 @scalar_rule(log(x), inv(x))
 @scalar_rule(log10(x), inv(x) / log(oftype(x, 10)))
-@scalar_rule(log2(x), inv(x) / log(oftype(x, 2)))
 @scalar_rule(log1p(x), inv(x + 1))
-@scalar_rule(expm1(x), exp(x))
+@scalar_rule(log2(x), inv(x) / log(oftype(x, 2)))
 
-@scalar_rule(sin(x), cos(x))
 @scalar_rule(cos(x), -sin(x))
-@scalar_rule(sinpi(x), π * cospi(x))
+@scalar_rule(cosd(x), -(π / oftype(x, 180)) * sind(x))
 @scalar_rule(cospi(x), -π * sinpi(x))
+@scalar_rule(sin(x), cos(x))
+@scalar_rule(sincos(x), @setup((sinx, cosx) = Ω), cosx, -sinx)
 @scalar_rule(sind(x), (π / oftype(x, 180)) * cosd(x))
-@scalar_rule(cosd(x), -(π / oftype(x, 180)) * sind(x))
+@scalar_rule(sinpi(x), π * cospi(x))
 
-@scalar_rule(asin(x), inv(sqrt(1 - x^2)))
 @scalar_rule(acos(x), -inv(sqrt(1 - x^2)))
-@scalar_rule(atan(x), inv(1 + x^2))
-@scalar_rule(asec(x::Real), inv(abs(x) * sqrt(x^2 - 1)))
-@scalar_rule(asec(x), inv(x^2 * sqrt(1 - x^-2)))
-@scalar_rule(acsc(x::Real), -inv(abs(x) * sqrt(x^2 - 1)))
-@scalar_rule(acsc(x), -inv(x^2 * sqrt(1 - x^-2)))
 @scalar_rule(acot(x), -inv(1 + x^2))
+@scalar_rule(acsc(x), -inv(x^2 * sqrt(1 - x^-2)))
+@scalar_rule(acsc(x::Real), -inv(abs(x) * sqrt(x^2 - 1)))
+@scalar_rule(asec(x), inv(x^2 * sqrt(1 - x^-2)))
+@scalar_rule(asec(x::Real), inv(abs(x) * sqrt(x^2 - 1)))
+@scalar_rule(asin(x), inv(sqrt(1 - x^2)))
+@scalar_rule(atan(x), inv(1 + x^2))
+@scalar_rule(atan(y, x), @setup(u = x^2 + y^2), (x / u, -y / u))
 
-@scalar_rule(asind(x), oftype(x, 180) / π / sqrt(1 - x^2))
 @scalar_rule(acosd(x), -oftype(x, 180) / π / sqrt(1 - x^2))
-@scalar_rule(atand(x), oftype(x, 180) / π / (1 + x^2))
-@scalar_rule(asecd(x::Real), oftype(x, 180) / π / abs(x) / sqrt(x^2 - 1))
-@scalar_rule(asecd(x), oftype(x, 180) / π / x^2 / sqrt(1 - x^-2))
-@scalar_rule(acscd(x::Real), -oftype(x, 180) / π / abs(x) / sqrt(x^2 - 1))
-@scalar_rule(acscd(x), -oftype(x, 180) / π / x^2 / sqrt(1 - x^-2))
 @scalar_rule(acotd(x), -oftype(x, 180) / π / (1 + x^2))
+@scalar_rule(acscd(x), -oftype(x, 180) / π / x^2 / sqrt(1 - x^-2))
+@scalar_rule(acscd(x::Real), -oftype(x, 180) / π / abs(x) / sqrt(x^2 - 1))
+@scalar_rule(asecd(x), oftype(x, 180) / π / x^2 / sqrt(1 - x^-2))
+@scalar_rule(asecd(x::Real), oftype(x, 180) / π / abs(x) / sqrt(x^2 - 1))
+@scalar_rule(asind(x), oftype(x, 180) / π / sqrt(1 - x^2))
+@scalar_rule(atand(x), oftype(x, 180) / π / (1 + x^2))
 
-@scalar_rule(sinh(x), cosh(x))
 @scalar_rule(cosh(x), sinh(x))
-@scalar_rule(tanh(x), 1-Ω^2)
 @scalar_rule(coth(x), -(csch(x)^2))
+@scalar_rule(sinh(x), cosh(x))
+@scalar_rule(tanh(x), 1-Ω^2)
 
-@scalar_rule(asinh(x), inv(sqrt(x^2 + 1)))
 @scalar_rule(acosh(x), inv(sqrt(x^2 - 1)))
-@scalar_rule(atanh(x), inv(1 - x^2))
-@scalar_rule(asech(x), -inv(x * sqrt(1 - x^2)))
-@scalar_rule(acsch(x::Real), -inv(abs(x) * sqrt(1 + x^2)))
-@scalar_rule(acsch(x), -inv(x^2 * sqrt(1 + x^-2)))
 @scalar_rule(acoth(x), inv(1 - x^2))
+@scalar_rule(acsch(x), -inv(x^2 * sqrt(1 + x^-2)))
+@scalar_rule(acsch(x::Real), -inv(abs(x) * sqrt(1 + x^2)))
+@scalar_rule(asech(x), -inv(x * sqrt(1 - x^2)))
+@scalar_rule(asinh(x), inv(sqrt(x^2 + 1)))
+@scalar_rule(atanh(x), inv(1 - x^2))
 
 @scalar_rule(deg2rad(x), π / oftype(x, 180))
 @scalar_rule(rad2deg(x), oftype(x, 180) / π)
 
-@scalar_rule(conj(x::Real), One())
 @scalar_rule(adjoint(x::Real), One())
+@scalar_rule(conj(x::Real), One())
 @scalar_rule(transpose(x), One())
 
-@scalar_rule(abs(x::Real), sign(x))
-@scalar_rule(hypot(x::Real), sign(x))
-@scalar_rule(rem2pi(x, r::RoundingMode), (One(), DoesNotExist()))
-
 @scalar_rule(+(x), One())
 @scalar_rule(-(x), -1)
 @scalar_rule(+(x, y), (One(), One()))
@@ -66,39 +68,42 @@
 @scalar_rule(\(x, y), (-(y / x / x), inv(x)))
 @scalar_rule(^(x, y), (ifelse(iszero(y), zero(Ω), y * x^(y - 1)), Ω * log(x)))
 
+@scalar_rule(cbrt(x), inv(3 * Ω^2))
 @scalar_rule(inv(x), -Ω^2)
 @scalar_rule(sqrt(x), inv(2 * Ω))
-@scalar_rule(cbrt(x), inv(3 * Ω^2))
-@scalar_rule(exp(x), Ω)
-@scalar_rule(exp2(x), Ω * log(oftype(x, 2)))
-@scalar_rule(exp10(x), Ω * log(oftype(x, 10)))
 
-@scalar_rule(tan(x), 1 + Ω^2)
-@scalar_rule(sec(x), Ω * tan(x))
-@scalar_rule(csc(x), -Ω * cot(x))
 @scalar_rule(cot(x), -(1 + Ω^2))
-@scalar_rule(tand(x), (π / oftype(x, 180)) * (1 + Ω^2))
-@scalar_rule(secd(x), (π / oftype(x, 180)) * Ω * tand(x))
-@scalar_rule(cscd(x), -(π / oftype(x, 180)) * Ω * cotd(x))
 @scalar_rule(cotd(x), -(π / oftype(x, 180)) * (1 + Ω^2))
-@scalar_rule(sech(x), -tanh(x) * Ω)
+@scalar_rule(csc(x), -Ω * cot(x))
+@scalar_rule(cscd(x), -(π / oftype(x, 180)) * Ω * cotd(x))
 @scalar_rule(csch(x), -coth(x) * Ω)
+@scalar_rule(sec(x), Ω * tan(x))
+@scalar_rule(secd(x), (π / oftype(x, 180)) * Ω * tand(x))
+@scalar_rule(sech(x), -tanh(x) * Ω)
+@scalar_rule(tan(x), 1 + Ω^2)
+@scalar_rule(tand(x), (π / oftype(x, 180)) * (1 + Ω^2))
 
+@scalar_rule(angle(x::Real), Zero())
+@scalar_rule(hypot(x::Real), sign(x))
 @scalar_rule(hypot(x::Real, y::Real), (x / Ω, y / Ω))
-@scalar_rule(sincos(x), @setup((sinx, cosx) = Ω), cosx, -sinx)
-@scalar_rule(atan(y, x), @setup(u = x^2 + y^2), (x / u, -y / u))
+@scalar_rule(imag(x::Real), Zero())
 
+@scalar_rule(fma(x, y, z), (y, x, One()))
 @scalar_rule(max(x, y), @setup(gt = x > y), (gt, !gt))
 @scalar_rule(min(x, y), @setup(gt = x > y), (!gt, gt))
-@scalar_rule(mod(x, y), @setup((u, nan) = promote(x / y, NaN16), isint = isinteger(x / y)),
-             (ifelse(isint, nan, one(u)), ifelse(isint, nan, -floor(u))))
-@scalar_rule(rem(x, y), @setup((u, nan) = promote(x / y, NaN16), isint = isinteger(x / y)),
-             (ifelse(isint, nan, one(u)), ifelse(isint, nan, -trunc(u))))
-@scalar_rule(fma(x, y, z), (y, x, One()))
 @scalar_rule(muladd(x, y, z), (y, x, One()))
-@scalar_rule(angle(x::Real), Zero())
+@scalar_rule(
+    mod(x, y),
+    @setup((u, nan) = promote(x / y, NaN16), isint = isinteger(x / y)),
+    ifelse(isint, nan, one(u)), ifelse(isint, nan, -floor(u)),
+)
 @scalar_rule(real(x::Real), One())
-@scalar_rule(imag(x::Real), Zero())
+@scalar_rule(rem2pi(x, r::RoundingMode), (One(), DoesNotExist()))
+@scalar_rule(
+    rem(x, y),
+    @setup((u, nan) = promote(x / y, NaN16), isint = isinteger(x / y)),
+    ifelse(isint, nan, one(u)), ifelse(isint, nan, -trunc(u)),
+)
 
 # product rule requires special care for arguments where `mul` is non-commutative
 

diff --git a/src/rulesets/LinearAlgebra/blas.jl b/src/rulesets/LinearAlgebra/blas.jl
@@ -85,12 +85,7 @@ function rrule(::typeof(BLAS.asum), n, X, incx)
             ∂X = Zero()
         else
             ΔΩ = extern(ΔΩ)
-            ∂X = @thunk scal!(
-                n,
-                ΔΩ,
-                blascopy!(n, sign.(X), incx, _zeros(X), incx),
-                incx
-            )
+            ∂X = @thunk scal!(n, ΔΩ, blascopy!(n, sign.(X), incx, _zeros(X), incx), incx)
         end
         return (NO_FIELDS, DoesNotExist(), ∂X, DoesNotExist())
     end
@@ -129,8 +124,9 @@ function rrule(::typeof(gemv), tA::Char, α::T, A::AbstractMatrix{T},
     return y, gemv_pullback
 end
 
-function rrule(::typeof(gemv), tA::Char, A::AbstractMatrix{T},
-               x::AbstractVector{T}) where T<:BlasFloat
+function rrule(
+    ::typeof(gemv), tA::Char, A::AbstractMatrix{T}, x::AbstractVector{T}
+) where T<:BlasFloat
     y, inner_pullback = rrule(gemv, tA, one(T), A, x)
     function gemv_pullback(Ȳ)
         (_, dtA, _, dA, dx) = inner_pullback(Ȳ)
@@ -143,8 +139,9 @@ end
 ##### `BLAS.gemm`
 #####
 
-function rrule(::typeof(gemm), tA::Char, tB::Char, α::T,
-               A::AbstractMatrix{T}, B::AbstractMatrix{T}) where T<:BlasFloat
+function rrule(
+    ::typeof(gemm), tA::Char, tB::Char, α::T, A::AbstractMatrix{T}, B::AbstractMatrix{T}
+) where T<:BlasFloat
     C = gemm(tA, tB, α, A, B)
     function gemv_pullback(C̄)
         β = one(T)
@@ -194,8 +191,9 @@ function rrule(::typeof(gemm), tA::Char, tB::Char, α::T,
     return C, gemv_pullback
 end
 
-function rrule(::typeof(gemm), tA::Char, tB::Char,
-               A::AbstractMatrix{T}, B::AbstractMatrix{T}) where T<:BlasFloat
+function rrule(
+    ::typeof(gemm), tA::Char, tB::Char, A::AbstractMatrix{T}, B::AbstractMatrix{T}
+) where T<:BlasFloat
     C, inner_pullback = rrule(gemm, tA, tB, one(T), A, B)
     function gemv_pullback(Ȳ)
         (_, dtA, dtB, _, dA, dB) = inner_pullback(Ȳ)

diff --git a/src/rulesets/Statistics/statistics.jl b/src/rulesets/Statistics/statistics.jl
@@ -7,7 +7,7 @@ _denom(x, dims::Integer) = size(x, dims)
 _denom(x, dims) = mapreduce(i->size(x, i), Base.mul_prod, unique(dims), init=1)
 
 # TODO: We have `mean(f, x; dims)` as of 1.3.0-DEV.36
-
+# https://github.com/JuliaDiff/ChainRules.jl/issues/85
 function rrule(::typeof(mean), x::AbstractArray{<:Real}; dims=:)
     y_sum, sum_pullback = rrule(sum, x; dims=dims)
     n = _denom(x, dims)

diff --git a/src/rulesets/packages/NaNMath.jl b/src/rulesets/packages/NaNMath.jl
@@ -14,9 +14,27 @@ using ChainRulesCore
 @scalar_rule(NaNMath.lgamma(x), SpecialFunctions.digamma(x))
 @scalar_rule(NaNMath.sqrt(x), inv(2 * Ω))
 @scalar_rule(NaNMath.pow(x, y), (y * NaNMath.pow(x, y - 1), Ω * NaNMath.log(x)))
-@scalar_rule(NaNMath.max(x, y),
-             (ifelse((y > x) | (signbit(y) < signbit(x)), ifelse(isnan(y), One(), Zero()), ifelse(isnan(x), Zero(), One())),
-              ifelse((y > x) | (signbit(y) < signbit(x)), ifelse(isnan(y), Zero(), One()), ifelse(isnan(x), One(), Zero()))))
-@scalar_rule(NaNMath.min(x, y),
-             (ifelse((y < x) | (signbit(y) > signbit(x)), ifelse(isnan(y), One(), Zero()), ifelse(isnan(x), Zero(), One())),
-              ifelse((y < x) | (signbit(y) > signbit(x)), ifelse(isnan(y), Zero(), One()), ifelse(isnan(x), One(), Zero()))))
+@scalar_rule(
+    NaNMath.max(x, y),
+    (ifelse(
+        (y > x) | (signbit(y) < signbit(x)),
+        ifelse(isnan(y), One(), Zero()),
+        ifelse(isnan(x), Zero(), One())),
+     ifelse(
+        (y > x) | (signbit(y) < signbit(x)),
+        ifelse(isnan(y), Zero(), One()),
+        ifelse(isnan(x), One(), Zero())),
+    )
+)
+@scalar_rule(
+    NaNMath.min(x, y),
+    (ifelse(
+        (y < x) | (signbit(y) > signbit(x)),
+        ifelse(isnan(y), One(), Zero()),
+        ifelse(isnan(x), Zero(), One())),
+     ifelse(
+        (y < x) | (signbit(y) > signbit(x)),
+        ifelse(isnan(y), Zero(), One()),
+        ifelse(isnan(x), One(), Zero())),
+   )
+)