Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs_input/api/linalg/decomp/pinv.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.. _pinv_func:

pinv
####

Compute the Moore-Penrose pseudo-inverse of a matrix.

.. doxygenfunction:: pinv(const OpA &a, float rcond = get_default_rcond<typename OpA::value_type>())

Examples
~~~~~~~~

.. literalinclude:: ../../../../test/00_solver/Pinv.cu
:language: cpp
:start-after: example-begin pinv-test-1
:end-before: example-end pinv-test-1
:dedent:


18 changes: 18 additions & 0 deletions docs_input/api/linalg/other/det.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.. _det_func:

det
=====

Compute the determinant of a tensor.

.. doxygenfunction:: det(const OpA &a)

Examples
~~~~~~~~

.. literalinclude:: ../../../../test/00_solver/Det.cu
:language: cpp
:start-after: example-begin det-test-1
:end-before: example-end det-test-1
:dedent:

5 changes: 3 additions & 2 deletions include/matx/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,9 +658,10 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
{
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)

[[maybe_unused]] stride_type prod = std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<stride_type>());
// Ensure new shape's total size is not larger than the original
MATX_ASSERT_STR(
sizeof(M) * shape.TotalSize() <= storage_.Bytes(), matxInvalidSize,
sizeof(M) * prod <= storage_.Bytes(), matxInvalidSize,
"Total size of new tensor must not be larger than the original");

// This could be loosened up to make sure only the fastest changing dims
Expand Down Expand Up @@ -877,7 +878,7 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
{
MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)

static_assert(RANK >= 2, "Only tensors of rank 2 and higher can be permuted.");
static_assert(RANK >= 1, "Only tensors of rank 1 and higher can be permuted.");
cuda::std::array<shape_type, RANK> n;
cuda::std::array<stride_type, RANK> s;
[[maybe_unused]] bool done[RANK] = {0};
Expand Down
2 changes: 0 additions & 2 deletions include/matx/operators/chol.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ namespace detail {
}
}

// Size is not relevant in eig() since there are multiple return values and it
// is not allowed to be called in larger expressions
constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
{
return a_.Size(dim);
Expand Down
9 changes: 7 additions & 2 deletions include/matx/operators/det.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@ namespace detail {
}
}

// Size is not relevant in det() since there are multiple return values and it
// is not allowed to be called in larger expressions
constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
{
return a_.Size(dim);
Expand All @@ -106,6 +104,13 @@ namespace detail {
};
}

/**
* Computes the determinant by performing an LU factorization of the input,
* and then calculating the product of diagonal entries of the U factor.
*
* For tensors of rank > 2, batching is performed.
*
*/
template<typename OpA>
__MATX_INLINE__ auto det(const OpA &a) {
return detail::DetOp(a);
Expand Down
1 change: 1 addition & 0 deletions include/matx/operators/operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
#include "matx/operators/outer.h"
#include "matx/operators/overlap.h"
#include "matx/operators/percentile.h"
#include "matx/operators/pinv.h"
#include "matx/operators/permute.h"
#include "matx/operators/planar.h"
#include "matx/operators/polyval.h"
Expand Down
144 changes: 144 additions & 0 deletions include/matx/operators/pinv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
////////////////////////////////////////////////////////////////////////////////
// BSD 3-Clause License
//
// COpBright (c) 2021, NVIDIA Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above cOpBright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above cOpBright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the cOpBright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COpBRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COpBRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////////

#pragma once


#include "matx/core/type_utils.h"
#include "matx/operators/base_operator.h"
#include "matx/transforms/pinv.h"

namespace matx {
namespace detail {
template<typename OpA>
class PinvOp : public BaseOp<PinvOp<OpA>>
{
private:
OpA a_;
float rcond_;
cuda::std::array<index_t, OpA::Rank()> out_dims_;
mutable detail::tensor_impl_t<typename remove_cvref_t<OpA>::value_type, OpA::Rank()> tmp_out_;
mutable typename remove_cvref_t<OpA>::value_type *ptr;

public:
using matxop = bool;
using value_type = typename OpA::value_type;
using matx_transform_op = bool;
using pinv_xform_op = bool;

__MATX_INLINE__ std::string str() const { return "pinv()"; }
__MATX_INLINE__ PinvOp(OpA a, float rcond) : a_(a), rcond_(rcond) {
for (int r = 0; r < Rank(); r++) {
if (r >= Rank() - 2) {
out_dims_[r] = (r == Rank() - 1) ? a_.Size(Rank() - 2) : a_.Size(Rank() - 1);
}
else {
out_dims_[r] = a_.Size(r);
}
}
};

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
return tmp_out_(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
return OpA::Rank();
}

constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
{
return out_dims_.Size(dim);
}

template <typename Out, typename Executor>
void Exec(Out &&out, Executor &&ex) const{
pinv_impl(cuda::std::get<0>(out), a_, ex, rcond_);
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void InnerPreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
{
if constexpr (is_matx_op<OpA>()) {
a_.PreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
}
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape, Executor &&ex) const noexcept
{
InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));

detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_, &ptr);

Exec(cuda::std::make_tuple(tmp_out_), std::forward<Executor>(ex));
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void PostRun(ShapeType &&shape, Executor &&ex) const noexcept
{
if constexpr (is_matx_op<OpA>()) {
a_.PostRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
}
}

};
}

/**
* Perfom a generalized inverse of a matrix using its singular-value decomposition (SVD).
* It automatically removes small singular values for stability.
*
* For tensors of rank > 2, batching is performed.
*
* @tparam OpA
* Tensor or operator type of input A
*
* @param a
* Input tensor or operator of shape `... x m x n`
* @param rcond
* Cutoff for small singular values. For stability, singular values
* smaller than `rcond * largest_singular_value` are set to 0 for each matrix
* in the batch. By default, `rcond` is approximately the machine epsilon of the tensor dtype.
*
* @return
* An operator that gives a tensor of size `... x n x m` representing the pseudo-inverse of the input
*/
template<typename OpA>
__MATX_INLINE__ auto pinv(const OpA &a, float rcond = get_default_rcond<typename OpA::value_type>()) {
return detail::PinvOp(a, rcond);
}

}
5 changes: 5 additions & 0 deletions include/matx/operators/svd.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ namespace detail {
template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const = delete;

// TODO: Handle SVDMode::NONE case better to not require U & VT
template <typename Out, typename Executor>
void Exec(Out &&out, Executor &&ex) const {
static_assert(cuda::std::tuple_size_v<remove_cvref_t<Out>> == 4, "Must use mtie with 3 outputs on svd(). ie: (mtie(U, S, VT) = svd(A))");
Expand Down Expand Up @@ -99,6 +100,10 @@ namespace detail {
/**
* Perform a singular value decomposition (SVD) using cuSolver or a LAPACK host
* library.
*
* The singular values within each vector are sorted in descending order.
*
* For tensors of Rank > 2, batching is performed.
*
* @tparam OpA
* Operator input type
Expand Down
2 changes: 0 additions & 2 deletions include/matx/operators/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ namespace detail {
}

/**
* Computes the trace of a tensor
*
* Computes the trace of a square matrix by summing the diagonal
*
* @tparam InputOperator
Expand Down
22 changes: 7 additions & 15 deletions include/matx/transforms/det.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,8 @@ namespace matx {
/**
* Compute the determinant of a matrix
*
* Computes the terminant of a matrix by first computing the LU composition,
* then reduces the product of the diagonal elements of U. The input and output
* parameters may be the same tensor. In that case, the input is destroyed and
* the output is stored in-place.
* Computes the determinant of a matrix by first computing the LU decomposition,
* then reduces the product of the diagonal elements of U.
*
* @tparam T1
* Data type of matrix A
Expand All @@ -80,22 +78,16 @@ void det_impl(OutputTensor &out, const InputTensor &a,
constexpr int RANK = InputTensor::Rank();
using value_type = typename OutputTensor::value_type;
using piv_value_type = std::conditional_t<is_cuda_executor_v<Executor>, int64_t, lapack_int_t>;

auto a_new = OpToTensor(a, exec);

if(!a_new.isSameView(a)) {
(a_new = a).run(exec);
}

// Get parameters required by these tensors
cuda::std::array<index_t, RANK - 1> s;

// Set batching dimensions of piv
for (int i = 0; i < RANK - 2; i++) {
s[i] = a_new.Size(i);
s[i] = a.Size(i);
}

index_t piv_len = cuda::std::min(a_new.Size(RANK - 1), a_new.Size(RANK - 2));
index_t piv_len = cuda::std::min(a.Size(RANK - 1), a.Size(RANK - 2));
s[RANK - 2] = piv_len;

tensor_t<piv_value_type, RANK-1> piv;
Expand All @@ -104,13 +96,13 @@ void det_impl(OutputTensor &out, const InputTensor &a,
if constexpr (is_cuda_executor_v<Executor>) {
const auto stream = exec.getStream();
make_tensor(piv, s, MATX_ASYNC_DEVICE_MEMORY, stream);
make_tensor(ac, a_new.Shape(), MATX_ASYNC_DEVICE_MEMORY, stream);
make_tensor(ac, a.Shape(), MATX_ASYNC_DEVICE_MEMORY, stream);
} else {
make_tensor(piv, s, MATX_HOST_MALLOC_MEMORY);
make_tensor(ac, a_new.Shape(), MATX_HOST_MALLOC_MEMORY);
make_tensor(ac, a.Shape(), MATX_HOST_MALLOC_MEMORY);
}

lu_impl(ac, piv, a_new, exec);
lu_impl(ac, piv, a, exec);

// Determinant sign adjustment based on piv permutation
// Create indices corresponding to no permutation to compare against
Expand Down
Loading