Skip to content

Commit eacfcfa

Browse files
committed
change ML attribute from splits into buckets
1 parent c3cc770 commit eacfcfa

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,16 @@ final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
107107
}
108108

109109
private def prepOutputField(schema: StructType): StructField = {
110-
val attr = new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true),
111-
values = Some($(splits).map(_.toString)))
112-
110+
val innerRanges = $(splits).sliding(2).map(bucket => bucket.mkString(", ")).toArray
111+
val values = ($(lowerInclusive), $(upperInclusive)) match {
112+
case (true, true) =>
113+
Array(s"-inf, ${$(splits).head}") ++ innerRanges ++ Array(s"${$(splits).last}, inf")
114+
case (true, false) => Array(s"-inf, ${$(splits).head}") ++ innerRanges
115+
case (false, true) => innerRanges ++ Array(s"${$(splits).last}, inf")
116+
case _ => innerRanges
117+
}
118+
val attr =
119+
new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true), values = Some(values))
113120
attr.toStructField()
114121
}
115122

0 commit comments

Comments
 (0)