Skip to content

Commit 75497da

Browse files
committed
[MLIR][Math] Add erfc to math dialect
This patch adds the erfc op to the math dialect. It also does lowering of the math.erfc op to libm calls. There is also a f32 polynomial approximation for the function based on https://stackoverflow.com/questions/35966695/vectorizable-implementation-of-complementary-error-function-erfcf This is in turn based on M. M. Shepherd and J. G. Laframboise, "Chebyshev Approximation of (1+2x)exp(x^2)erfc x in 0 <= x < INF", Mathematics of Computation, Vol. 36, No. 153, January 1981, pp. 249-253. The code has a ULP error less than 3, which was tested, and MLIR test values were verified against the C implementation.
1 parent d204724 commit 75497da

File tree

9 files changed

+344
-9
lines changed

9 files changed

+344
-9
lines changed

mlir/include/mlir/Dialect/Math/IR/MathOps.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,28 @@ def Math_ErfOp : Math_FloatUnaryOp<"erf"> {
543543
let hasFolder = 1;
544544
}
545545

546+
//===----------------------------------------------------------------------===//
547+
// ErfcOp
548+
//===----------------------------------------------------------------------===//
549+
550+
def Math_ErfcOp : Math_FloatUnaryOp<"erfc"> {
551+
let summary = "complementary error function of the specified value";
552+
let description = [{
553+
The `erfc` operation computes the complementary error function.
554+
It takes one operand of floating point type (i.e., scalar, tensor or
555+
vector) and returns one result of the same type.
556+
It has no standard attributes.
557+
558+
Example:
559+
560+
```mlir
561+
// Scalar error function value.
562+
%a = math.erfc %b : f64
563+
```
564+
}];
565+
let hasFolder = 1;
566+
}
567+
546568

547569
//===----------------------------------------------------------------------===//
548570
// ExpOp

mlir/include/mlir/Dialect/Math/Transforms/Approximation.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ struct ErfPolynomialApproximation : public OpRewritePattern<math::ErfOp> {
2323
PatternRewriter &rewriter) const final;
2424
};
2525

26+
struct ErfcPolynomialApproximation : public OpRewritePattern<math::ErfcOp> {
27+
public:
28+
using OpRewritePattern::OpRewritePattern;
29+
30+
LogicalResult matchAndRewrite(math::ErfcOp op,
31+
PatternRewriter &rewriter) const final;
32+
};
33+
2634
} // namespace math
2735
} // namespace mlir
2836

mlir/include/mlir/Dialect/Math/Transforms/Passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct MathPolynomialApproximationOptions {
4747

4848
void populatePolynomialApproximateTanhPattern(RewritePatternSet &patterns);
4949
void populatePolynomialApproximateErfPattern(RewritePatternSet &patterns);
50+
void populatePolynomialApproximateErfcPattern(RewritePatternSet &patterns);
5051

5152
void populateMathPolynomialApproximationPatterns(
5253
RewritePatternSet &patterns,

mlir/lib/Conversion/MathToLibm/MathToLibm.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ void mlir::populateMathToLibmConversionPatterns(RewritePatternSet &patterns) {
175175
populatePatternsForOp<math::CosOp>(patterns, ctx, "cosf", "cos");
176176
populatePatternsForOp<math::CoshOp>(patterns, ctx, "coshf", "cosh");
177177
populatePatternsForOp<math::ErfOp>(patterns, ctx, "erff", "erf");
178+
populatePatternsForOp<math::ErfcOp>(patterns, ctx, "erfcf", "erfc");
178179
populatePatternsForOp<math::ExpOp>(patterns, ctx, "expf", "exp");
179180
populatePatternsForOp<math::Exp2Op>(patterns, ctx, "exp2f", "exp2");
180181
populatePatternsForOp<math::ExpM1Op>(patterns, ctx, "expm1f", "expm1");

mlir/lib/Dialect/Math/IR/MathOps.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,24 @@ OpFoldResult math::ErfOp::fold(FoldAdaptor adaptor) {
318318
});
319319
}
320320

321+
//===----------------------------------------------------------------------===//
322+
// ErfcOp folder
323+
//===----------------------------------------------------------------------===//
324+
325+
OpFoldResult math::ErfcOp::fold(FoldAdaptor adaptor) {
326+
return constFoldUnaryOpConditional<FloatAttr>(
327+
adaptor.getOperands(), [](const APFloat &a) -> std::optional<APFloat> {
328+
switch (a.getSizeInBits(a.getSemantics())) {
329+
case 64:
330+
return APFloat(erfc(a.convertToDouble()));
331+
case 32:
332+
return APFloat(erfcf(a.convertToFloat()));
333+
default:
334+
return {};
335+
}
336+
});
337+
}
338+
321339
//===----------------------------------------------------------------------===//
322340
// IPowIOp folder
323341
//===----------------------------------------------------------------------===//

mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp

Lines changed: 109 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ handleMultidimensionalVectors(ImplicitLocOpBuilder &builder,
173173
// Helper functions to create constants.
174174
//----------------------------------------------------------------------------//
175175

176+
static Value boolCst(ImplicitLocOpBuilder &builder, bool value) {
177+
return builder.create<arith::ConstantOp>(builder.getBoolAttr(value));
178+
}
179+
176180
static Value floatCst(ImplicitLocOpBuilder &builder, float value,
177181
Type elementType) {
178182
assert((elementType.isF16() || elementType.isF32()) &&
@@ -1118,12 +1122,102 @@ ErfPolynomialApproximation::matchAndRewrite(math::ErfOp op,
11181122
return success();
11191123
}
11201124

1125+
// Approximates erfc(x) with
1126+
LogicalResult
1127+
ErfcPolynomialApproximation::matchAndRewrite(math::ErfcOp op,
1128+
PatternRewriter &rewriter) const {
1129+
Value x = op.getOperand();
1130+
Type et = getElementTypeOrSelf(x);
1131+
1132+
if (!et.isF32())
1133+
return rewriter.notifyMatchFailure(op, "only f32 type is supported.");
1134+
std::optional<VectorShape> shape = vectorShape(x);
1135+
1136+
ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
1137+
auto bcast = [&](Value value) -> Value {
1138+
return broadcast(builder, value, shape);
1139+
};
1140+
1141+
Value trueValue = bcast(boolCst(builder, true));
1142+
Value zero = bcast(floatCst(builder, 0.0f, et));
1143+
Value one = bcast(floatCst(builder, 1.0f, et));
1144+
Value onehalf = bcast(floatCst(builder, 0.5f, et));
1145+
Value neg4 = bcast(floatCst(builder, -4.0f, et));
1146+
Value neg2 = bcast(floatCst(builder, -2.0f, et));
1147+
Value pos2 = bcast(floatCst(builder, 2.0f, et));
1148+
Value posInf = bcast(f32FromBits(builder, 0x7f800000u));
1149+
Value clampVal = bcast(floatCst(builder, 10.0546875f, et));
1150+
1151+
// Get abs(x)
1152+
Value isNegativeArg =
1153+
builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, x, zero);
1154+
Value negArg = builder.create<arith::NegFOp>(x);
1155+
Value a = builder.create<arith::SelectOp>(isNegativeArg, negArg, x);
1156+
Value p = builder.create<arith::AddFOp>(a, pos2);
1157+
Value r = builder.create<arith::DivFOp>(one, p);
1158+
Value q = builder.create<math::FmaOp>(neg4, r, one);
1159+
Value t = builder.create<math::FmaOp>(builder.create<arith::AddFOp>(q, one),
1160+
neg2, a);
1161+
Value e = builder.create<math::FmaOp>(builder.create<arith::NegFOp>(a), q, t);
1162+
q = builder.create<math::FmaOp>(r, e, q);
1163+
1164+
p = bcast(floatCst(builder, -0x1.a4a000p-12f, et)); // -4.01139259e-4
1165+
Value c1 = bcast(floatCst(builder, -0x1.42a260p-10f, et)); // -1.23075210e-3
1166+
p = builder.create<math::FmaOp>(p, q, c1);
1167+
Value c2 = bcast(floatCst(builder, 0x1.585714p-10f, et)); // 1.31355342e-3
1168+
p = builder.create<math::FmaOp>(p, q, c2);
1169+
Value c3 = bcast(floatCst(builder, 0x1.1adcc4p-07f, et)); // 8.63227434e-3
1170+
p = builder.create<math::FmaOp>(p, q, c3);
1171+
Value c4 = bcast(floatCst(builder, -0x1.081b82p-07f, et)); // -8.05991981e-3
1172+
p = builder.create<math::FmaOp>(p, q, c4);
1173+
Value c5 = bcast(floatCst(builder, -0x1.bc0b6ap-05f, et)); // -5.42046614e-2
1174+
p = builder.create<math::FmaOp>(p, q, c5);
1175+
Value c6 = bcast(floatCst(builder, 0x1.4ffc46p-03f, et)); // 1.64055392e-1
1176+
p = builder.create<math::FmaOp>(p, q, c6);
1177+
Value c7 = bcast(floatCst(builder, -0x1.540840p-03f, et)); // -1.66031361e-1
1178+
p = builder.create<math::FmaOp>(p, q, c7);
1179+
Value c8 = bcast(floatCst(builder, -0x1.7bf616p-04f, et)); // -9.27639827e-2
1180+
p = builder.create<math::FmaOp>(p, q, c8);
1181+
Value c9 = bcast(floatCst(builder, 0x1.1ba03ap-02f, et)); // 2.76978403e-1
1182+
p = builder.create<math::FmaOp>(p, q, c9);
1183+
1184+
Value d = builder.create<math::FmaOp>(pos2, a, one);
1185+
r = builder.create<arith::DivFOp>(one, d);
1186+
q = builder.create<math::FmaOp>(p, r, r);
1187+
e = builder.create<math::FmaOp>(
1188+
builder.create<math::FmaOp>(q, builder.create<arith::NegFOp>(a), onehalf),
1189+
pos2, builder.create<arith::SubFOp>(p, q));
1190+
r = builder.create<math::FmaOp>(e, r, q);
1191+
1192+
Value s = builder.create<arith::MulFOp>(a, a);
1193+
e = builder.create<math::ExpOp>(builder.create<arith::NegFOp>(s));
1194+
1195+
t = builder.create<math::FmaOp>(builder.create<arith::NegFOp>(a), a, s);
1196+
r = builder.create<math::FmaOp>(
1197+
r, e,
1198+
builder.create<arith::MulFOp>(builder.create<arith::MulFOp>(r, e), t));
1199+
1200+
Value isNotLessThanInf = builder.create<arith::XOrIOp>(
1201+
builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, a, posInf),
1202+
trueValue);
1203+
r = builder.create<arith::SelectOp>(isNotLessThanInf,
1204+
builder.create<arith::AddFOp>(x, x), r);
1205+
Value isGreaterThanClamp =
1206+
builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, a, clampVal);
1207+
r = builder.create<arith::SelectOp>(isGreaterThanClamp, zero, r);
1208+
1209+
Value isNegative =
1210+
builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, x, zero);
1211+
r = builder.create<arith::SelectOp>(
1212+
isNegative, builder.create<arith::SubFOp>(pos2, r), r);
1213+
1214+
rewriter.replaceOp(op, r);
1215+
return success();
1216+
}
11211217
//----------------------------------------------------------------------------//
11221218
// Exp approximation.
11231219
//----------------------------------------------------------------------------//
1124-
11251220
namespace {
1126-
11271221
Value clampWithNormals(ImplicitLocOpBuilder &builder,
11281222
const std::optional<VectorShape> shape, Value value,
11291223
float lowerBound, float upperBound) {
@@ -1667,6 +1761,11 @@ void mlir::populatePolynomialApproximateErfPattern(
16671761
patterns.add<ErfPolynomialApproximation>(patterns.getContext());
16681762
}
16691763

1764+
void mlir::populatePolynomialApproximateErfcPattern(
1765+
RewritePatternSet &patterns) {
1766+
patterns.add<ErfcPolynomialApproximation>(patterns.getContext());
1767+
}
1768+
16701769
void mlir::populateMathPolynomialApproximationPatterns(
16711770
RewritePatternSet &patterns,
16721771
const MathPolynomialApproximationOptions &options) {
@@ -1680,13 +1779,14 @@ void mlir::populateMathPolynomialApproximationPatterns(
16801779
ReuseF32Expansion<math::SinOp>, ReuseF32Expansion<math::CosOp>>(
16811780
patterns.getContext());
16821781

1683-
patterns
1684-
.add<AtanApproximation, Atan2Approximation, TanhApproximation,
1685-
LogApproximation, Log2Approximation, Log1pApproximation,
1686-
ErfPolynomialApproximation, AsinPolynomialApproximation,
1687-
AcosPolynomialApproximation, ExpApproximation, ExpM1Approximation,
1688-
CbrtApproximation, SinAndCosApproximation<true, math::SinOp>,
1689-
SinAndCosApproximation<false, math::CosOp>>(patterns.getContext());
1782+
patterns.add<AtanApproximation, Atan2Approximation, TanhApproximation,
1783+
LogApproximation, Log2Approximation, Log1pApproximation,
1784+
ErfPolynomialApproximation, ErfcPolynomialApproximation,
1785+
AsinPolynomialApproximation, AcosPolynomialApproximation,
1786+
ExpApproximation, ExpM1Approximation, CbrtApproximation,
1787+
SinAndCosApproximation<true, math::SinOp>,
1788+
SinAndCosApproximation<false, math::CosOp>>(
1789+
patterns.getContext());
16901790
if (options.enableAvx2) {
16911791
patterns.add<RsqrtApproximation, ReuseF32Expansion<math::RsqrtOp>>(
16921792
patterns.getContext());

mlir/test/Dialect/Math/polynomial-approximation.mlir

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,118 @@ func.func @erf_scalar(%arg0: f32) -> f32 {
8181
return %0 : f32
8282
}
8383

84+
// CHECK-LABEL: func @erfc_scalar(
85+
// CHECK-SAME: %[[val_arg0:.*]]: f32) -> f32 {
86+
// CHECK-DAG: %[[c127_i32:.*]] = arith.constant 127 : i32
87+
// CHECK-DAG: %[[c23_i32:.*]] = arith.constant 23 : i32
88+
// CHECK-DAG: %[[cst:.*]] = arith.constant 1.270000e+02 : f32
89+
// CHECK-DAG: %[[cst_0:.*]] = arith.constant -1.270000e+02 : f32
90+
// CHECK-DAG: %[[cst_1:.*]] = arith.constant 8.880000e+01 : f32
91+
// CHECK-DAG: %[[cst_2:.*]] = arith.constant -8.780000e+01 : f32
92+
// CHECK-DAG: %[[cst_3:.*]] = arith.constant 0.166666657 : f32
93+
// CHECK-DAG: %[[cst_4:.*]] = arith.constant 0.0416657962 : f32
94+
// CHECK-DAG: %[[cst_5:.*]] = arith.constant 0.00833345205 : f32
95+
// CHECK-DAG: %[[cst_6:.*]] = arith.constant 0.00139819994 : f32
96+
// CHECK-DAG: %[[cst_7:.*]] = arith.constant 1.98756912E-4 : f32
97+
// CHECK-DAG: %[[cst_8:.*]] = arith.constant 2.12194442E-4 : f32
98+
// CHECK-DAG: %[[cst_9:.*]] = arith.constant -0.693359375 : f32
99+
// CHECK-DAG: %[[cst_10:.*]] = arith.constant 1.44269502 : f32
100+
// CHECK-DAG: %[[cst_11:.*]] = arith.constant 0.276978403 : f32
101+
// CHECK-DAG: %[[cst_12:.*]] = arith.constant -0.0927639827 : f32
102+
// CHECK-DAG: %[[cst_13:.*]] = arith.constant -0.166031361 : f32
103+
// CHECK-DAG: %[[cst_14:.*]] = arith.constant 0.164055392 : f32
104+
// CHECK-DAG: %[[cst_15:.*]] = arith.constant -0.0542046614 : f32
105+
// CHECK-DAG: %[[cst_16:.*]] = arith.constant -8.059920e-03 : f32
106+
// CHECK-DAG: %[[cst_17:.*]] = arith.constant 0.00863227434 : f32
107+
// CHECK-DAG: %[[cst_18:.*]] = arith.constant 0.00131355342 : f32
108+
// CHECK-DAG: %[[cst_19:.*]] = arith.constant -0.0012307521 : f32
109+
// CHECK-DAG: %[[cst_20:.*]] = arith.constant -4.01139259E-4 : f32
110+
// CHECK-DAG: %[[cst_true:.*]] = arith.constant true
111+
// CHECK-DAG: %[[cst_21:.*]] = arith.constant 0.000000e+00 : f32
112+
// CHECK-DAG: %[[cst_22:.*]] = arith.constant 1.000000e+00 : f32
113+
// CHECK-DAG: %[[cst_23:.*]] = arith.constant 5.000000e-01 : f32
114+
// CHECK-DAG: %[[cst_24:.*]] = arith.constant -4.000000e+00 : f32
115+
// CHECK-DAG: %[[cst_25:.*]] = arith.constant -2.000000e+00 : f32
116+
// CHECK-DAG: %[[cst_26:.*]] = arith.constant 2.000000e+00 : f32
117+
// CHECK-DAG: %[[cst_27:.*]] = arith.constant 0x7F800000 : f32
118+
// CHECK-DAG: %[[cst_28:.*]] = arith.constant 10.0546875 : f32
119+
// CHECK: %[[val_0:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
120+
// CHECK: %[[val_1:.*]] = arith.negf %[[val_arg0]] : f32
121+
// CHECK: %[[val_2:.*]] = arith.select %[[val_0]], %[[val_1]], %[[val_arg0]] : f32
122+
// CHECK: %[[val_3:.*]] = arith.addf %[[val_2]], %[[cst_26]] : f32
123+
// CHECK: %[[val_4:.*]] = arith.divf %[[cst_22]], %[[val_3]] : f32
124+
// CHECK: %[[val_5:.*]] = math.fma %[[cst_24]], %[[val_4]], %[[cst_22]] : f32
125+
// CHECK: %[[val_6:.*]] = arith.addf %[[val_5]], %[[cst_22]] : f32
126+
// CHECK: %[[val_7:.*]] = math.fma %[[val_6]], %[[cst_25]], %[[val_2]] : f32
127+
// CHECK: %[[val_8:.*]] = arith.negf %[[val_2]] : f32
128+
// CHECK: %[[val_9:.*]] = math.fma %[[val_8]], %[[val_5]], %[[val_7]] : f32
129+
// CHECK: %[[val_10:.*]] = math.fma %[[val_4]], %[[val_9]], %[[val_5]] : f32
130+
// CHECK: %[[val_11:.*]] = math.fma %[[cst_20]], %[[val_10]], %[[cst_19]] : f32
131+
// CHECK: %[[val_12:.*]] = math.fma %[[val_11]], %[[val_10]], %[[cst_18]] : f32
132+
// CHECK: %[[val_13:.*]] = math.fma %[[val_12]], %[[val_10]], %[[cst_17]] : f32
133+
// CHECK: %[[val_14:.*]] = math.fma %[[val_13]], %[[val_10]], %[[cst_16]] : f32
134+
// CHECK: %[[val_15:.*]] = math.fma %[[val_14]], %[[val_10]], %[[cst_15]] : f32
135+
// CHECK: %[[val_16:.*]] = math.fma %[[val_15]], %[[val_10]], %[[cst_14]] : f32
136+
// CHECK: %[[val_17:.*]] = math.fma %[[val_16]], %[[val_10]], %[[cst_13]] : f32
137+
// CHECK: %[[val_18:.*]] = math.fma %[[val_17]], %[[val_10]], %[[cst_12]] : f32
138+
// CHECK: %[[val_19:.*]] = math.fma %[[val_18]], %[[val_10]], %[[cst_11]] : f32
139+
// CHECK: %[[val_20:.*]] = math.fma %[[cst_26]], %[[val_2]], %[[cst_22]] : f32
140+
// CHECK: %[[val_21:.*]] = arith.divf %[[cst_22]], %[[val_20]] : f32
141+
// CHECK: %[[val_22:.*]] = math.fma %[[val_19]], %[[val_21]], %[[val_21]] : f32
142+
// CHECK: %[[val_23:.*]] = arith.subf %[[val_19]], %[[val_22]] : f32
143+
// CHECK: %[[val_24:.*]] = arith.negf %[[val_2]] : f32
144+
// CHECK: %[[val_25:.*]] = math.fma %[[val_22]], %[[val_24]], %[[cst_23]] : f32
145+
// CHECK: %[[val_26:.*]] = math.fma %[[val_25]], %[[cst_26]], %[[val_23]] : f32
146+
// CHECK: %[[val_27:.*]] = math.fma %[[val_26]], %[[val_21]], %[[val_22]] : f32
147+
// CHECK: %[[val_28:.*]] = arith.mulf %[[val_2]], %[[val_2]] : f32
148+
// CHECK: %[[val_29:.*]] = arith.negf %[[val_28]] : f32
149+
// CHECK: %[[val_30:.*]] = arith.cmpf uge, %[[val_29]], %[[cst_2]] : f32
150+
// CHECK: %[[val_31:.*]] = arith.select %[[val_30]], %[[val_29]], %[[cst_2]] : f32
151+
// CHECK: %[[val_32:.*]] = arith.cmpf ule, %[[val_31]], %[[cst_1]] : f32
152+
// CHECK: %[[val_33:.*]] = arith.select %[[val_32]], %[[val_31]], %[[cst_1]] : f32
153+
// CHECK: %[[val_34:.*]] = math.fma %[[val_33]], %[[cst_10]], %[[cst_23]] : f32
154+
// CHECK: %[[val_35:.*]] = math.floor %[[val_34]] : f32
155+
// CHECK: %[[val_36:.*]] = arith.cmpf uge, %[[val_35]], %[[cst_0]] : f32
156+
// CHECK: %[[val_37:.*]] = arith.select %[[val_36]], %[[val_35]], %[[cst_0]] : f32
157+
// CHECK: %[[val_38:.*]] = arith.cmpf ule, %[[val_37]], %[[cst]] : f32
158+
// CHECK: %[[val_39:.*]] = arith.select %[[val_38]], %[[val_37]], %[[cst]] : f32
159+
// CHECK: %[[val_40:.*]] = math.fma %[[cst_9]], %[[val_39]], %[[val_33]] : f32
160+
// CHECK: %[[val_41:.*]] = math.fma %[[cst_8]], %[[val_39]], %[[val_40]] : f32
161+
// CHECK: %[[val_42:.*]] = math.fma %[[val_41]], %[[cst_7]], %[[cst_6]] : f32
162+
// CHECK: %[[val_43:.*]] = math.fma %[[val_42]], %[[val_41]], %[[cst_5]] : f32
163+
// CHECK: %[[val_44:.*]] = math.fma %[[val_43]], %[[val_41]], %[[cst_4]] : f32
164+
// CHECK: %[[val_45:.*]] = math.fma %[[val_44]], %[[val_41]], %[[cst_3]] : f32
165+
// CHECK: %[[val_46:.*]] = math.fma %[[val_45]], %[[val_41]], %[[cst_23]] : f32
166+
// CHECK: %[[val_47:.*]] = arith.mulf %[[val_41]], %[[val_41]] : f32
167+
// CHECK: %[[val_48:.*]] = math.fma %[[val_46]], %[[val_47]], %[[val_41]] : f32
168+
// CHECK: %[[val_49:.*]] = arith.addf %[[val_48]], %[[cst_22]] : f32
169+
// CHECK: %[[val_50:.*]] = arith.fptosi %[[val_39]] : f32 to i32
170+
// CHECK: %[[val_51:.*]] = arith.addi %[[val_50]], %[[c127_i32]] : i32
171+
// CHECK: %[[val_52:.*]] = arith.shli %[[val_51]], %[[c23_i32]] : i32
172+
// CHECK: %[[val_53:.*]] = arith.bitcast %[[val_52]] : i32 to f32
173+
// CHECK: %[[val_54:.*]] = arith.mulf %[[val_49]], %[[val_53]] : f32
174+
// CHECK: %[[val_55:.*]] = arith.negf %[[val_2]] : f32
175+
// CHECK: %[[val_56:.*]] = math.fma %[[val_55]], %[[val_2]], %[[val_28]] : f32
176+
// CHECK: %[[val_57:.*]] = arith.mulf %[[val_27]], %[[val_54]] : f32
177+
// CHECK: %[[val_58:.*]] = arith.mulf %[[val_57]], %[[val_56]] : f32
178+
// CHECK: %[[val_59:.*]] = math.fma %[[val_27]], %[[val_54]], %[[val_58]] : f32
179+
// CHECK: %[[val_60:.*]] = arith.cmpf olt, %[[val_2]], %[[cst_27]] : f32
180+
// CHECK: %[[val_61:.*]] = arith.xori %[[val_60]], %[[cst_true]] : i1
181+
// CHECK: %[[val_62:.*]] = arith.addf %[[val_arg0]], %[[val_arg0]] : f32
182+
// CHECK: %[[val_63:.*]] = arith.select %[[val_61]], %[[val_62]], %[[val_59]] : f32
183+
// CHECK: %[[val_64:.*]] = arith.cmpf ogt, %[[val_2]], %[[cst_28]] : f32
184+
// CHECK: %[[val_65:.*]] = arith.select %[[val_64]], %[[cst_21]], %[[val_63]] : f32
185+
// CHECK: %[[val_66:.*]] = arith.cmpf olt, %[[val_arg0]], %[[cst_21]] : f32
186+
// CHECK: %[[val_67:.*]] = arith.subf %[[cst_26]], %[[val_65]] : f32
187+
// CHECK: %[[val_68:.*]] = arith.select %[[val_66]], %[[val_67]], %[[val_65]] : f32
188+
// CHECK: return %[[val_68]] : f32
189+
// CHECK: }
190+
191+
func.func @erfc_scalar(%arg0: f32) -> f32 {
192+
%0 = math.erfc %arg0 : f32
193+
return %0 : f32
194+
}
195+
84196
// CHECK-LABEL: func @erf_vector(
85197
// CHECK-SAME: %[[arg0:.*]]: vector<8xf32>) -> vector<8xf32> {
86198
// CHECK: %[[zero:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>

0 commit comments

Comments
 (0)