Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 1 addition & 14 deletions include/matx/operators/clone.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,7 @@ IGNORE_WARNING_POP_GCC
template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{

// convert variadic type to tuple so we can read/update
IGNORE_WARNING_PUSH_GCC("-Wmaybe-uninitialized")
cuda::std::array<index_t, Rank()> sind{indices...};
cuda::std::array<index_t, T::Rank()> gind;
IGNORE_WARNING_POP_GCC

// gather indices
for(int i = 0; i < T::Rank(); i++) {
auto idx = dims_[i];
gind[i] = sind[idx];
}

return cuda::std::apply(op_, gind);
return std::as_const(*this).template operator()(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
Expand Down
170 changes: 66 additions & 104 deletions include/matx/operators/collapse.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,70 +55,51 @@ namespace matx

__MATX_INLINE__ std::string str() const { return "lcollapse<" + std::to_string(DIM) + ">(" + op_.str() + ")"; }
__MATX_INLINE__ LCollapseOp(const T1 &op) : op_(op)
{
static_assert(DIM <= T1::Rank(), "Collapse DIM must be less than or equal to Rank() of operator");
static_assert(DIM > 1, "Must collapse multiple dims");
static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");
{
static_assert(DIM <= T1::Rank(), "Collapse DIM must be less than or equal to Rank() of operator");
static_assert(DIM > 1, "Must collapse multiple dims");
static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");

// comptue size of collapsed dimension
size_ = 1;
// comptue size of collapsed dimension
size_ = 1;

// Collapse left-most dims
#pragma unroll
for(int i = 0 ; i < DIM; i++) {
size_ *= op_.Size(i);
// Collapse left-most dims
#pragma unroll
for(int i = 0 ; i < DIM; i++) {
size_ *= op_.Size(i);
}
}
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out

#pragma unroll
for(int i = 1; i < Rank(); i++) {
// copy all but first input index into out array
out[DIM + i - 1] = in[i];
}
for(int i = 1; i < Rank(); i++) {
// copy all but first input index into out array
out[DIM + i - 1] = in[i];
}

// expand first input index into DIM indices
auto ind = in[0];
// expand first input index into DIM indices
auto ind = in[0];
#pragma unroll
for(int i = 0; i < DIM; i++) {
int d = DIM - i - 1;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}
for(int i = 0; i < DIM; i++) {
int d = DIM - i - 1;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}

return cuda::std::apply(op_, out);
}
return cuda::std::apply(op_, out);
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out

#pragma unroll
for(int i = 1; i < Rank(); i++) {
// copy all but first input index into out array
out[DIM + i - 1] = in[i];
}

// expand first input index into DIM indices
auto ind = in[0];
#pragma unroll
for(int i = 0; i < DIM; i++) {
int d = DIM - i - 1;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}

return cuda::std::apply(op_, out);
}
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
return std::as_const(*this).template operator()(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
Expand Down Expand Up @@ -211,70 +192,51 @@ namespace matx
__MATX_INLINE__ std::string str() const { return "rcollapse<" + std::to_string(DIM) + ">(" + op_.str() + ")"; }

__MATX_INLINE__ RCollapseOp(const T1 op) : op_(op)
{
static_assert(DIM <= T1::Rank(), "Collapse DIM must be less than or equal to Rank() of operator");
static_assert(DIM > 1, "Collapse DIM must have be greater than 1");
static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");
{
static_assert(DIM <= T1::Rank(), "Collapse DIM must be less than or equal to Rank() of operator");
static_assert(DIM > 1, "Collapse DIM must have be greater than 1");
static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");

// comptue size of collapsed dimension
size_ = 1;
// comptue size of collapsed dimension
size_ = 1;

// Collapse right-most dims
#pragma unroll
for(int i = 0 ; i < DIM; i++) {
size_ *= op_.Size(T1::Rank() - 1 - i);
// Collapse right-most dims
#pragma unroll
for(int i = 0 ; i < DIM; i++) {
size_ *= op_.Size(T1::Rank() - 1 - i);
}
}
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out

#pragma unroll
for(int i = 0 ; i < Rank() - 1; i++) {
// copy all but last index into out array
out[i] = in[i];
}
for(int i = 0 ; i < Rank() - 1; i++) {
// copy all but last index into out array
out[i] = in[i];
}

// expand last index into DIM indices
auto ind = in[Rank() - 1];
// expand last index into DIM indices
auto ind = in[Rank() - 1];
#pragma unroll
for(int i = 0; i < DIM; i++) {
int d = T1::Rank() - 1 - i;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}
for(int i = 0; i < DIM; i++) {
int d = T1::Rank() - 1 - i;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}

return cuda::std::apply(op_, out);
}
return cuda::std::apply(op_, out);
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
// indices coming in
cuda::std::array<index_t, Rank()> in{indices...}; // index coming in
cuda::std::array<index_t, T1::Rank()> out; // index going out

#pragma unroll
for(int i = 0 ; i < Rank() - 1; i++) {
// copy all but last index into out array
out[i] = in[i];
}

// expand last index into DIM indices
auto ind = in[Rank() - 1];
#pragma unroll
for(int i = 0; i < DIM; i++) {
int d = T1::Rank() - 1 - i;
out[d] = ind % op_.Size(d);
ind /= op_.Size(d);
}

return cuda::std::apply(op_, out);
}
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
return std::as_const(*this).template operator()(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
Expand Down
52 changes: 26 additions & 26 deletions include/matx/operators/diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,41 +63,41 @@ namespace matx
__MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) { }

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
static_assert(RANK != 0, "Cannot make get diagonals from 0D tensor");
using tt = cuda::std::tuple_element_t<0, cuda::std::tuple<Is...>>;
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
static_assert(RANK != 0, "Cannot make get diagonals from 0D tensor");
using tt = cuda::std::tuple_element_t<0, cuda::std::tuple<Is...>>;

if constexpr (RANK == 1) {
static_assert(sizeof...(Is) == 2, "Indexing of diag() on a 1D input must be 2 indices");
if (((pp_get<0>(indices...) == indices) && ...)) {
return (value_type)(pp_get<0>(indices...));
}
else {
return (value_type)(0);
}
if constexpr (RANK == 1) {
static_assert(sizeof...(Is) == 2, "Indexing of diag() on a 1D input must be 2 indices");
if (((pp_get<0>(indices...) == indices) && ...)) {
return (value_type)(pp_get<0>(indices...));
}
else {
static_assert(sizeof...(Is) == RANK - 1, "Diagonal operator must have one fewer op() index than rank of operator");

// Offset either the rows or columns by k_, depending on if it's negative
if (k_ < 0) {
auto tup = cuda::std::make_tuple(indices..., static_cast<tt>(0));
cuda::std::get<RANK - 1>(tup) = pp_get<RANK-2>(indices...) ;
return (value_type)(0);
}
}
else {
static_assert(sizeof...(Is) == RANK - 1, "Diagonal operator must have one fewer op() index than rank of operator");

// Offset either the rows or columns by k_, depending on if it's negative
if (k_ < 0) {
auto tup = cuda::std::make_tuple(indices..., static_cast<tt>(0));
cuda::std::get<RANK - 1>(tup) = pp_get<RANK-2>(indices...) ;
IGNORE_WARNING_PUSH_GCC("-Wmaybe-uninitialized")
cuda::std::get<RANK - 2>(tup) = cuda::std::get<RANK - 2>(tup) - k_;
cuda::std::get<RANK - 2>(tup) = cuda::std::get<RANK - 2>(tup) - k_;
IGNORE_WARNING_POP_GCC
return cuda::std::apply(op_, tup);
}
else {
auto tup = cuda::std::make_tuple(indices..., static_cast<tt>(0));
return cuda::std::apply(op_, tup);
}
else {
auto tup = cuda::std::make_tuple(indices..., static_cast<tt>(0));
IGNORE_WARNING_PUSH_GCC("-Wmaybe-uninitialized")
cuda::std::get<RANK - 1>(tup) = pp_get<RANK-2>(indices...) + k_;
cuda::std::get<RANK - 1>(tup) = pp_get<RANK-2>(indices...) + k_;
IGNORE_WARNING_POP_GCC
return cuda::std::apply(op_, tup);
}
return cuda::std::apply(op_, tup);
}
}
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
Expand Down
7 changes: 2 additions & 5 deletions include/matx/operators/fftshift.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,11 @@ namespace matx
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
auto tup = cuda::std::make_tuple(indices...);
cuda::std::get<Rank()-1>(tup) = (cuda::std::get<Rank()-1>(tup) + (Size(Rank()-1) + 1) / 2) % Size(Rank()-1);
return cuda::std::apply(op_, tup);
return std::as_const(*this).template operator()(indices...);
}


static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
return detail::get_rank<T1>();
Expand Down
16 changes: 8 additions & 8 deletions include/matx/operators/hermitian.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@ namespace matx
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
auto tup = cuda::std::make_tuple(indices...);
auto stl = cuda::std::get<Rank()-2>(tup);
cuda::std::get<Rank()-2>(tup) = cuda::std::get<Rank()-1>(tup);
cuda::std::get<Rank()-1>(tup) = stl;
return conj(cuda::std::apply(op_, tup));
}
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
{
auto tup = cuda::std::make_tuple(indices...);
auto stl = cuda::std::get<Rank()-2>(tup);
cuda::std::get<Rank()-2>(tup) = cuda::std::get<Rank()-1>(tup);
cuda::std::get<Rank()-1>(tup) = stl;
return conj(cuda::std::apply(op_, tup));
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
Expand Down
11 changes: 2 additions & 9 deletions include/matx/operators/interleaved.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,9 @@ namespace matx
}

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ complex_type operator()(Is... indices)
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
auto real = op_(indices...);

constexpr size_t rank_idx = (Rank() == 1) ? 0 : (Rank() - 2);
auto tup = cuda::std::make_tuple(indices...);
cuda::std::get<rank_idx>(tup) += op_.Size(rank_idx) / 2;

auto imag = cuda::std::apply(op_, tup);
return complex_type{real, imag};
return std::as_const(*this).template operator()(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
Expand Down
10 changes: 1 addition & 9 deletions include/matx/operators/kronecker.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,7 @@ namespace matx
template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
{
auto tup1 = cuda::std::make_tuple(indices...);
auto tup2 = cuda::std::make_tuple(indices...);
cuda::std::get<Rank() - 2>(tup2) = pp_get<Rank() - 2>(indices...) % op2_.Size(Rank() - 2);
cuda::std::get<Rank() - 1>(tup2) = pp_get<Rank() - 1>(indices...) % op2_.Size(Rank() - 1);

cuda::std::get<Rank() - 2>(tup1) = pp_get<Rank() - 2>(indices...) / op2_.Size(Rank() - 2);
cuda::std::get<Rank() - 1>(tup1) = pp_get<Rank() - 1>(indices...) / op2_.Size(Rank() - 1);

return cuda::std::apply(op2_, tup2) * cuda::std::apply(op1_, tup1);
return std::as_const(*this).template operator()(indices...);
}

template <typename ShapeType, typename Executor>
Expand Down
Loading