Skip to content

Commit 2cc955d

Browse files
committed
Refactor utils
Signed-off-by: Rory Mitchell <[email protected]>
1 parent b917ddf commit 2cc955d

File tree

6 files changed

+361
-344
lines changed

6 files changed

+361
-344
lines changed

cpp/include/legate_dataframe/unaryop.hpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,23 @@
1818

1919
#include <legate.h>
2020

21-
#include <cudf/unary.hpp>
22-
2321
#include <legate_dataframe/core/column.hpp>
22+
#include <legate_dataframe/core/library.hpp>
2423

2524
namespace legate::dataframe {
25+
namespace task {
26+
27+
class CastTask : public Task<CastTask, OpCode::Cast> {
28+
public:
29+
static void cpu_variant(legate::TaskContext context);
30+
static void gpu_variant(legate::TaskContext context);
31+
};
32+
class UnaryOpTask : public Task<UnaryOpTask, OpCode::UnaryOp> {
33+
public:
34+
static void cpu_variant(legate::TaskContext context);
35+
static void gpu_variant(legate::TaskContext context);
36+
};
37+
} // namespace task
2638

2739
/**
2840
* @brief Cast column to a new data type.

cpp/include/legate_dataframe/utils.hpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include <cudf/types.hpp>
2727

2828
namespace legate::dataframe {
29-
29+
// TODO: macros to go here, disabling cudf related utilities for CPU only builds
3030
[[nodiscard]] cudf::type_id to_cudf_type_id(legate::Type::Code code);
3131
[[nodiscard]] std::shared_ptr<arrow::DataType> to_arrow_type(cudf::type_id code);
3232
[[nodiscard]] cudf::data_type to_cudf_type(const arrow::DataType& arrow_type);
@@ -38,11 +38,6 @@ namespace legate::dataframe {
3838
[[nodiscard]] legate::Type to_legate_type(cudf::type_id dtype);
3939
[[nodiscard]] legate::Type to_legate_type(const arrow::DataType& arrow_type);
4040

41-
std::string pprint_1d(cudf::column_view col,
42-
cudf::size_type index,
43-
rmm::cuda_stream_view stream,
44-
rmm::mr::device_memory_resource* mr);
45-
4641
const void* read_accessor_as_1d_bytes(const legate::PhysicalStore& store);
4742

4843
std::vector<legate::PhysicalStore> get_stores(const legate::PhysicalArray& ary);

cpp/src/unaryop.cpp

Lines changed: 26 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,10 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include <cudf/types.hpp>
1817
#include <legate.h>
1918

2019
#include <arrow/compute/api.h>
21-
#include <cudf/unary.hpp>
2220

23-
#include <legate_dataframe/core/column.hpp>
24-
#include <legate_dataframe/core/library.hpp>
2521
#include <legate_dataframe/core/table.hpp>
2622
#include <legate_dataframe/core/task_argument.hpp>
2723
#include <legate_dataframe/core/task_context.hpp>
@@ -30,107 +26,41 @@
3026
namespace legate::dataframe {
3127
namespace task {
3228

33-
class CastTask : public Task<CastTask, OpCode::Cast> {
34-
public:
35-
static void cpu_variant(legate::TaskContext context)
36-
{
37-
TaskContext ctx{context};
38-
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
39-
auto output = argument::get_next_output<PhysicalColumn>(ctx);
40-
41-
auto cast = ARROW_RESULT(arrow::compute::Cast(
42-
input.arrow_array_view(), output.arrow_type(), arrow::compute::CastOptions::Unsafe()));
43-
if (get_prefer_eager_allocations()) {
44-
output.copy_into(std::move(cast.make_array()));
45-
} else {
46-
output.move_into(std::move(cast.make_array()));
47-
}
48-
}
49-
50-
static void gpu_variant(legate::TaskContext context)
51-
{
52-
TaskContext ctx{context};
53-
54-
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
55-
auto output = argument::get_next_output<PhysicalColumn>(ctx);
56-
cudf::column_view col = input.column_view();
57-
std::unique_ptr<cudf::column> ret = cudf::cast(col, output.cudf_type(), ctx.stream(), ctx.mr());
58-
if (get_prefer_eager_allocations()) {
59-
output.copy_into(std::move(ret));
60-
} else {
61-
output.move_into(std::move(ret));
62-
}
63-
}
64-
};
65-
66-
cudf::unary_operator arrow_to_cudf_unary_op(std::string op)
29+
/*static*/ void CastTask::cpu_variant(legate::TaskContext context)
6730
{
68-
// Arrow unary operators taken from the below list,
69-
// where an equivalent cudf unary operator exists.
70-
// https://arrow.apache.org/docs/cpp/compute.html#element-wise-scalar-functions
71-
// https://docs.rapids.ai/api/libcudf/stable/group__transformation__unaryops
72-
std::unordered_map<std::string, cudf::unary_operator> arrow_to_cudf_ops = {
73-
{"sin", cudf::unary_operator::SIN}, {"cos", cudf::unary_operator::COS},
74-
{"tan", cudf::unary_operator::TAN}, {"asin", cudf::unary_operator::ARCSIN},
75-
{"acos", cudf::unary_operator::ARCCOS}, {"atan", cudf::unary_operator::ARCTAN},
76-
{"sinh", cudf::unary_operator::SINH}, {"cosh", cudf::unary_operator::COSH},
77-
{"tanh", cudf::unary_operator::TANH}, {"asinh", cudf::unary_operator::ARCSINH},
78-
{"acosh", cudf::unary_operator::ARCCOSH}, {"atanh", cudf::unary_operator::ARCTANH},
79-
{"exp", cudf::unary_operator::EXP}, {"ln", cudf::unary_operator::LOG},
80-
{"sqrt", cudf::unary_operator::SQRT}, {"ceil", cudf::unary_operator::CEIL},
81-
{"floor", cudf::unary_operator::FLOOR}, {"abs", cudf::unary_operator::ABS},
82-
{"round", cudf::unary_operator::RINT}, {"bit_wise_not", cudf::unary_operator::BIT_INVERT},
83-
{"invert", cudf::unary_operator::NOT}, {"negate", cudf::unary_operator::NEGATE}};
84-
85-
if (arrow_to_cudf_ops.find(op) != arrow_to_cudf_ops.end()) { return arrow_to_cudf_ops[op]; }
86-
throw std::invalid_argument("Could not find cudf binary operator matching: " + op);
87-
return cudf::unary_operator::ABS;
88-
}
89-
90-
class UnaryOpTask : public Task<UnaryOpTask, OpCode::UnaryOp> {
91-
public:
92-
static void cpu_variant(legate::TaskContext context)
93-
{
94-
TaskContext ctx{context};
95-
96-
auto op = argument::get_next_scalar<std::string>(ctx);
97-
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
98-
auto output = argument::get_next_output<PhysicalColumn>(ctx);
99-
auto result =
100-
ARROW_RESULT(arrow::compute::CallFunction(op, {input.arrow_array_view()})).make_array();
101-
if (get_prefer_eager_allocations()) {
102-
output.copy_into(std::move(result));
103-
} else {
104-
output.move_into(std::move(result));
105-
}
31+
TaskContext ctx{context};
32+
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
33+
auto output = argument::get_next_output<PhysicalColumn>(ctx);
34+
35+
auto cast = ARROW_RESULT(arrow::compute::Cast(
36+
input.arrow_array_view(), output.arrow_type(), arrow::compute::CastOptions::Unsafe()));
37+
if (get_prefer_eager_allocations()) {
38+
output.copy_into(std::move(cast.make_array()));
39+
} else {
40+
output.move_into(std::move(cast.make_array()));
10641
}
42+
}
10743

108-
static void gpu_variant(legate::TaskContext context)
109-
{
110-
TaskContext ctx{context};
111-
112-
auto op = argument::get_next_scalar<std::string>(ctx);
113-
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
114-
auto output = argument::get_next_output<PhysicalColumn>(ctx);
115-
cudf::column_view col = input.column_view();
116-
std::unique_ptr<cudf::column> ret =
117-
cudf::unary_operation(col, arrow_to_cudf_unary_op(op), ctx.stream(), ctx.mr());
118-
if (get_prefer_eager_allocations()) {
119-
output.copy_into(std::move(ret));
120-
} else {
121-
output.move_into(std::move(ret));
122-
}
44+
/*static*/ void UnaryOpTask::cpu_variant(legate::TaskContext context)
45+
{
46+
TaskContext ctx{context};
47+
48+
auto op = argument::get_next_scalar<std::string>(ctx);
49+
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
50+
auto output = argument::get_next_output<PhysicalColumn>(ctx);
51+
auto result =
52+
ARROW_RESULT(arrow::compute::CallFunction(op, {input.arrow_array_view()})).make_array();
53+
if (get_prefer_eager_allocations()) {
54+
output.copy_into(std::move(result));
55+
} else {
56+
output.move_into(std::move(result));
12357
}
124-
};
58+
}
12559

12660
} // namespace task
12761

12862
LogicalColumn cast(const LogicalColumn& col, cudf::data_type to_type)
12963
{
130-
if (!cudf::is_supported_cast(col.cudf_type(), to_type)) {
131-
throw std::invalid_argument("Cannot cast column to specified type");
132-
}
133-
13464
auto runtime = legate::Runtime::get_runtime();
13565
legate::AutoTask task =
13666
runtime->create_task(get_library(), task::CastTask::TASK_CONFIG.task_id());

cpp/src/unaryop.cu

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <cudf/types.hpp>
18+
#include <cudf/unary.hpp>
19+
#include <legate.h>
20+
21+
#include <legate_dataframe/core/table.hpp>
22+
#include <legate_dataframe/core/task_argument.hpp>
23+
#include <legate_dataframe/core/task_context.hpp>
24+
#include <legate_dataframe/unaryop.hpp>
25+
26+
namespace legate::dataframe::task {
27+
28+
/*static*/ void CastTask::gpu_variant(legate::TaskContext context)
29+
{
30+
TaskContext ctx{context};
31+
32+
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
33+
auto output = argument::get_next_output<PhysicalColumn>(ctx);
34+
cudf::column_view col = input.column_view();
35+
std::unique_ptr<cudf::column> ret = cudf::cast(col, output.cudf_type(), ctx.stream(), ctx.mr());
36+
if (get_prefer_eager_allocations()) {
37+
output.copy_into(std::move(ret));
38+
} else {
39+
output.move_into(std::move(ret));
40+
}
41+
}
42+
43+
cudf::unary_operator arrow_to_cudf_unary_op(std::string op)
44+
{
45+
// Arrow unary operators taken from the below list,
46+
// where an equivalent cudf unary operator exists.
47+
// https://arrow.apache.org/docs/cpp/compute.html#element-wise-scalar-functions
48+
// https://docs.rapids.ai/api/libcudf/stable/group__transformation__unaryops
49+
std::unordered_map<std::string, cudf::unary_operator> arrow_to_cudf_ops = {
50+
{"sin", cudf::unary_operator::SIN}, {"cos", cudf::unary_operator::COS},
51+
{"tan", cudf::unary_operator::TAN}, {"asin", cudf::unary_operator::ARCSIN},
52+
{"acos", cudf::unary_operator::ARCCOS}, {"atan", cudf::unary_operator::ARCTAN},
53+
{"sinh", cudf::unary_operator::SINH}, {"cosh", cudf::unary_operator::COSH},
54+
{"tanh", cudf::unary_operator::TANH}, {"asinh", cudf::unary_operator::ARCSINH},
55+
{"acosh", cudf::unary_operator::ARCCOSH}, {"atanh", cudf::unary_operator::ARCTANH},
56+
{"exp", cudf::unary_operator::EXP}, {"ln", cudf::unary_operator::LOG},
57+
{"sqrt", cudf::unary_operator::SQRT}, {"ceil", cudf::unary_operator::CEIL},
58+
{"floor", cudf::unary_operator::FLOOR}, {"abs", cudf::unary_operator::ABS},
59+
{"round", cudf::unary_operator::RINT}, {"bit_wise_not", cudf::unary_operator::BIT_INVERT},
60+
{"invert", cudf::unary_operator::NOT}, {"negate", cudf::unary_operator::NEGATE}};
61+
62+
if (arrow_to_cudf_ops.find(op) != arrow_to_cudf_ops.end()) { return arrow_to_cudf_ops[op]; }
63+
throw std::invalid_argument("Could not find cudf binary operator matching: " + op);
64+
return cudf::unary_operator::ABS;
65+
}
66+
67+
/*static*/ void UnaryOpTask::gpu_variant(legate::TaskContext context)
68+
{
69+
TaskContext ctx{context};
70+
71+
auto op = argument::get_next_scalar<std::string>(ctx);
72+
const auto input = argument::get_next_input<PhysicalColumn>(ctx);
73+
auto output = argument::get_next_output<PhysicalColumn>(ctx);
74+
cudf::column_view col = input.column_view();
75+
std::unique_ptr<cudf::column> ret =
76+
cudf::unary_operation(col, arrow_to_cudf_unary_op(op), ctx.stream(), ctx.mr());
77+
if (get_prefer_eager_allocations()) {
78+
output.copy_into(std::move(ret));
79+
} else {
80+
output.move_into(std::move(ret));
81+
}
82+
}
83+
84+
} // namespace legate::dataframe::task

0 commit comments

Comments
 (0)