Skip to content

Commit 2e3a280

Browse files
jkbradleymengxr
authored andcommitted
[MINOR] [MLLIB] [ML] [DOC] Minor doc fixes for StringIndexer and MetadataUtils
Changes: * Make Scala doc for StringIndexerInverse clearer. Also remove Scala doc from transformSchema, so that the doc is inherited. * MetadataUtils.scala: “ Helper utilities for tree-based algorithms” —> not just trees anymore CC: holdenk mengxr Author: Joseph K. Bradley <[email protected]> Closes #8679 from jkbradley/doc-fixes-1.5.
1 parent 960d2d0 commit 2e3a280

File tree

3 files changed

+20
-29
lines changed

3 files changed

+20
-29
lines changed

mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,10 @@ class StringIndexerModel (
181181

182182
/**
183183
* :: Experimental ::
184-
* A [[Transformer]] that maps a column of string indices back to a new column of corresponding
185-
* string values using either the ML attributes of the input column, or if provided using the labels
186-
* supplied by the user.
187-
* All original columns are kept during transformation.
184+
* A [[Transformer]] that maps a column of indices back to a new column of corresponding
185+
* string values.
186+
* The index-string mapping is either from the ML attributes of the input column,
187+
* or from user-supplied labels (which take precedence over ML attributes).
188188
*
189189
* @see [[StringIndexer]] for converting strings into indices
190190
*/
@@ -202,32 +202,23 @@ class IndexToString private[ml] (
202202
/** @group setParam */
203203
def setOutputCol(value: String): this.type = set(outputCol, value)
204204

205-
/**
206-
* Optional labels to be provided by the user, if not supplied column
207-
* metadata is read for labels. The default value is an empty array,
208-
* but the empty array is ignored and column metadata used instead.
209-
* @group setParam
210-
*/
205+
/** @group setParam */
211206
def setLabels(value: Array[String]): this.type = set(labels, value)
212207

213208
/**
214-
* Param for array of labels.
215-
* Optional labels to be provided by the user.
216-
* Default: Empty array, in which case column metadata is used for labels.
209+
* Optional param for array of labels specifying index-string mapping.
210+
*
211+
* Default: Empty array, in which case [[inputCol]] metadata is used for labels.
217212
* @group param
218213
*/
219214
final val labels: StringArrayParam = new StringArrayParam(this, "labels",
220-
"array of labels, if not provided metadata from inputCol is used instead.")
215+
"Optional array of labels specifying index-string mapping." +
216+
" If not provided or if empty, then metadata from inputCol is used instead.")
221217
setDefault(labels, Array.empty[String])
222218

223-
/**
224-
* Optional labels to be provided by the user, if not supplied column
225-
* metadata is read for labels.
226-
* @group getParam
227-
*/
219+
/** @group getParam */
228220
final def getLabels: Array[String] = $(labels)
229221

230-
/** Transform the schema for the inverse transformation */
231222
override def transformSchema(schema: StructType): StructType = {
232223
val inputColName = $(inputCol)
233224
val inputDataType = schema(inputColName).dataType

mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructField
2525

2626

2727
/**
28-
* Helper utilities for tree-based algorithms
28+
* Helper utilities for algorithms using ML metadata
2929
*/
3030
private[spark] object MetadataUtils {
3131

python/pyspark/ml/feature.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -985,17 +985,17 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
985985
"""
986986
.. note:: Experimental
987987
988-
A :py:class:`Transformer` that maps a column of string indices back to a new column of
989-
corresponding string values using either the ML attributes of the input column, or if
990-
provided using the labels supplied by the user.
991-
All original columns are kept during transformation.
988+
A :py:class:`Transformer` that maps a column of indices back to a new column of
989+
corresponding string values.
990+
The index-string mapping is either from the ML attributes of the input column,
991+
or from user-supplied labels (which take precedence over ML attributes).
992992
See L{StringIndexer} for converting strings into indices.
993993
"""
994994

995995
# a placeholder to make the labels show up in generated doc
996996
labels = Param(Params._dummy(), "labels",
997-
"Optional array of labels to be provided by the user, if not supplied or " +
998-
"empty, column metadata is read for labels")
997+
"Optional array of labels specifying index-string mapping." +
998+
" If not provided or if empty, then metadata from inputCol is used instead.")
999999

10001000
@keyword_only
10011001
def __init__(self, inputCol=None, outputCol=None, labels=None):
@@ -1006,8 +1006,8 @@ def __init__(self, inputCol=None, outputCol=None, labels=None):
10061006
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
10071007
self.uid)
10081008
self.labels = Param(self, "labels",
1009-
"Optional array of labels to be provided by the user, if not " +
1010-
"supplied or empty, column metadata is read for labels")
1009+
"Optional array of labels specifying index-string mapping. If not" +
1010+
" provided or if empty, then metadata from inputCol is used instead.")
10111011
kwargs = self.__init__._input_kwargs
10121012
self.setParams(**kwargs)
10131013

0 commit comments

Comments
 (0)