Introduce AdjointFactorization not subtyping AbstractMatrix (#46874)

JuliaLang · Feb 15, 2023 · 1270b34 · 1270b34
1 parent faf7954
commit 1270b34
Show file tree

Hide file tree

Showing 12 changed files with 164 additions and 91 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -51,6 +51,10 @@ Standard library changes
 
 #### LinearAlgebra
 
+* Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
+  and `Transpose` wrappers, respectively. Instead, they are wrapped in
+  `AdjointFactorization` and `TranposeFactorization` types, which themselves subtype
+  `Factorization` ([#46874]).
 * New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
   (real symmetric) part of a matrix ([#31836]).
 

diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
@@ -276,12 +276,11 @@ to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](
 Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
 operations like determinants.
 
-
 ## [Matrix factorizations](@id man-linalg-factorizations)
 
 [Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
 compute the factorization of a matrix into a product of matrices, and are one of the central concepts
-in linear algebra.
+in (numerical) linear algebra.
 
 The following table summarizes the types of matrix factorizations that have been implemented in
 Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
@@ -306,6 +305,10 @@ of the Linear Algebra documentation.
 | `Schur`            | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition)                                       |
 | `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
 
+Adjoints and transposes of [`Factorization`](@ref) objects are lazily wrapped in
+`AdjointFactorization` and `TransposeFactorization` objects, respectively. Generically,
+transpose of real `Factorization`s are wrapped as `AdjointFactorization`.
+
 ## Standard functions
 
 Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
@@ -460,9 +463,11 @@ LinearAlgebra.ishermitian
 Base.transpose
 LinearAlgebra.transpose!
 LinearAlgebra.Transpose
+LinearAlgebra.TransposeFactorization
 Base.adjoint
 LinearAlgebra.adjoint!
 LinearAlgebra.Adjoint
+LinearAlgebra.AdjointFactorization
 Base.copy(::Union{Transpose,Adjoint})
 LinearAlgebra.stride1
 LinearAlgebra.checksquare

diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -503,7 +503,7 @@ _initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
 # While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
 # which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
 # we restrict this method to only the LAPACK factorizations in LinearAlgebra.
-# The definition is put here since it explicitly references all the Factorizion structs so it has
+# The definition is put here since it explicitly references all the Factorization structs so it has
 # to be located after all the files that define the structs.
 const LAPACKFactorizations{T,S} = Union{
     BunchKaufman{T,S},
@@ -514,7 +514,12 @@ const LAPACKFactorizations{T,S} = Union{
     QRCompactWY{T,S},
     QRPivoted{T,S},
     SVD{T,<:Real,S}}
-function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorizations}}, B::AbstractVecOrMat)
+
+(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
+
+function ldiv(F::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(F)
     if m != size(B, 1)
@@ -544,7 +549,11 @@ function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorization
 end
 # disambiguate
 (\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    invoke(\, Tuple{Factorization{T}, VecOrMat{Complex{T}}}, F, B)
+    @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
+(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
+(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
 
 """
     LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)

diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -291,6 +291,9 @@ wrapperop(_) = identity
 wrapperop(::Adjoint) = adjoint
 wrapperop(::Transpose) = transpose
 
+# the following fallbacks can be removed if Adjoint/Transpose are restricted to AbstractVecOrMat
+size(A::AdjOrTrans) = reverse(size(A.parent))
+axes(A::AdjOrTrans) = reverse(axes(A.parent))
 # AbstractArray interface, basic definitions
 length(A::AdjOrTrans) = length(A.parent)
 size(v::AdjOrTransAbsVec) = (1, length(v.parent))

diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
@@ -11,9 +11,58 @@ matrix factorizations.
 """
 abstract type Factorization{T} end
 
+"""
+    AdjointFactorization
+
+Lazy wrapper type for the adjoint of the underlying `Factorization` object. Usually, the
+`AdjointFactorization` constructor should not be called directly, use
+[`adjoint(:: Factorization)`](@ref) instead.
+"""
+struct AdjointFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+AdjointFactorization(F::Factorization) =
+    AdjointFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
+"""
+    TransposeFactorization
+
+Lazy wrapper type for the transpose of the underlying `Factorization` object. Usually, the
+`TransposeFactorization` constructor should not be called directly, use
+[`transpose(:: Factorization)`](@ref) instead.
+"""
+struct TransposeFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+TransposeFactorization(F::Factorization) =
+    TransposeFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
 eltype(::Type{<:Factorization{T}}) where {T} = T
-size(F::Adjoint{<:Any,<:Factorization}) = reverse(size(parent(F)))
-size(F::Transpose{<:Any,<:Factorization}) = reverse(size(parent(F)))
+size(F::AdjointFactorization) = reverse(size(parent(F)))
+size(F::TransposeFactorization) = reverse(size(parent(F)))
+size(F::Union{AdjointFactorization,TransposeFactorization}, d::Integer) = d in (1, 2) ? size(F)[d] : 1
+parent(F::Union{AdjointFactorization,TransposeFactorization}) = F.parent
+
+"""
+    adjoint(F::Factorization)
+
+Lazy adjoint of the factorization `F`. By default, returns an
+[`AdjointFactorization`](@ref) wrapper.
+"""
+adjoint(F::Factorization) = AdjointFactorization(F)
+"""
+    transpose(F::Factorization)
+
+Lazy transpose of the factorization `F`. By default, returns a [`TransposeFactorization`](@ref),
+except for `Factorization`s with real `eltype`, in which case returns an [`AdjointFactorization`](@ref).
+"""
+transpose(F::Factorization) = TransposeFactorization(F)
+transpose(F::Factorization{<:Real}) = AdjointFactorization(F)
+adjoint(F::AdjointFactorization) = F.parent
+transpose(F::TransposeFactorization) = F.parent
+transpose(F::AdjointFactorization{<:Real}) = F.parent
+conj(A::TransposeFactorization) = adjoint(A.parent)
+conj(A::AdjointFactorization) = transpose(A.parent)
 
 checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
 checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
@@ -60,64 +109,77 @@ convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)::T
 
 ### General promotion rules
 Factorization{T}(F::Factorization{T}) where {T} = F
-# This is a bit odd since the return is not a Factorization but it works well in generic code
-Factorization{T}(A::Adjoint{<:Any,<:Factorization}) where {T} =
+# This no longer looks odd since the return _is_ a Factorization!
+Factorization{T}(A::AdjointFactorization) where {T} =
     adjoint(Factorization{T}(parent(A)))
+Factorization{T}(A::TransposeFactorization) where {T} =
+    transpose(Factorization{T}(parent(A)))
 inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
 
 Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
 Base.:(==)(  F::T, G::T) where {T<:Factorization} = all(f -> getfield(F, f) == getfield(G, f), 1:nfields(F))
 Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, f), getfield(G, f)), 1:nfields(F))::Bool
 
-function Base.show(io::IO, x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, ::MIME"text/plain", x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, MIME"text/plain"(), parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, ::MIME"text/plain", x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, MIME"text/plain"(), parent(x))
 end
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns or rows
-function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
+function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal}
     require_one_based_indexing(B)
     c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
     x = ldiv!(F, c2r)
     return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B))
 end
-function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(\)(F::TransposeFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    conj!(adjoint(parent(F)) \ conj.(B))
+(\)(F::AdjointFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    @invoke \(F::typeof(F), B::VecOrMat)
+
+function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where {T<:BlasReal}
     require_one_based_indexing(B)
     x = rdiv!(copy(reinterpret(T, B)), F)
     return copy(reinterpret(Complex{T}, x))
 end
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(/)(B::VecOrMat{Complex{T}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
+    conj!(adjoint(parent(F)) \ conj.(B))
+(/)(B::VecOrMat{Complex{T}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
+    @invoke /(B::VecOrMat{Complex{T}}, F::Factorization{T})
 
-function \(F::Union{Factorization, Adjoint{<:Any,<:Factorization}}, B::AbstractVecOrMat)
+function (\)(F::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
+    TFB = typeof(oneunit(eltype(F)) \ oneunit(eltype(B)))
     ldiv!(F, copy_similar(B, TFB))
 end
+(\)(F::TransposeFactorization, B::AbstractVecOrMat) = conj!(adjoint(F.parent) \ conj.(B))
 
-function /(B::AbstractMatrix, F::Union{Factorization, Adjoint{<:Any,<:Factorization}})
+function (/)(B::AbstractMatrix, F::Factorization)
     require_one_based_indexing(B)
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
     rdiv!(copy_similar(B, TFB), F)
 end
-/(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent)
-/(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B))
-
+(/)(A::AbstractMatrix, F::AdjointFactorization) = adjoint(adjoint(F) \ adjoint(A))
+(/)(A::AbstractMatrix, F::TransposeFactorization) = transpose(transpose(F) \ transpose(A))
 
 function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -128,7 +190,7 @@ function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
 end
 function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -138,14 +200,3 @@ function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
         return ldiv!(A, Y)
     end
 end
-
-# fallback methods for transposed solves
-\(F::Transpose{<:Any,<:Factorization{<:Real}}, B::AbstractVecOrMat) = adjoint(F.parent) \ B
-\(F::Transpose{<:Any,<:Factorization}, B::AbstractVecOrMat) = conj.(adjoint(F.parent) \ conj.(B))
-
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = transpose(transpose(F) \ transpose(B))
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization}) = transpose(transpose(F) \ transpose(B))
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -422,10 +422,12 @@ Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo;
 
 copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ)
 copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ)
-size(F::Hessenberg, d) = size(F.H, d)
+size(F::Hessenberg, d::Integer) = size(F.H, d)
 size(F::Hessenberg) = size(F.H)
 
-adjoint(F::Hessenberg) = Adjoint(F)
+transpose(F::Hessenberg{<:Real}) = F'
+transpose(::Hessenberg) =
+    throw(ArgumentError("transpose of Hessenberg decomposition is not supported, consider using adjoint"))
 
 # iteration for destructuring into components
 Base.iterate(S::Hessenberg) = (S.Q, Val(:H))
@@ -687,8 +689,8 @@ function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:A
     return B .= Complex.(Br,Bi)
 end
 
-ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
-rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')'
+ldiv!(F::AdjointFactorization{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
+rdiv!(B::AbstractMatrix, F::AdjointFactorization{<:Any,<:Hessenberg}) = ldiv!(F', B')'
 
 det(F::Hessenberg) = det(F.H; shift=F.μ)
 logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ)

diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
@@ -135,8 +135,11 @@ AbstractArray(A::LQ) = AbstractMatrix(A)
 Matrix(A::LQ) = Array(AbstractArray(A))
 Array(A::LQ) = Matrix(A)
 
-adjoint(A::LQ) = Adjoint(A)
-Base.copy(F::Adjoint{T,<:LQ{T}}) where {T} =
+transpose(F::LQ{<:Real}) = F'
+transpose(::LQ) =
+    throw(ArgumentError("transpose of LQ decomposition is not supported, consider using adjoint"))
+
+Base.copy(F::AdjointFactorization{T,<:LQ{T}}) where {T} =
     QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
 
 function getproperty(F::LQ, d::Symbol)
@@ -343,7 +346,7 @@ function ldiv!(A::LQ, B::AbstractVecOrMat)
     return lmul!(adjoint(A.Q), B)
 end
 
-function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::AbstractVecOrMat)
+function ldiv!(Fadj::AdjointFactorization{<:Any,<:LQ}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(Fadj)
     m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))