Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 57a4c07

Browse files
committed
small fix for docs
1 parent 1178c8f commit 57a4c07

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

docs/ml-features.md

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,22 @@ Word2Vec is implemented in [Word2Vec](api/scala/index.html#org.apache.spark.ml.f
117117
{% highlight scala %}
118118
import org.apache.spark.ml.feature.Word2Vec
119119

120+
// Input data: Each row is a bag of words from a sentence or document.
120121
val documentDF = sqlContext.createDataFrame(Seq(
121122
"Hi I heard about Spark".split(" "),
122123
"I wish Java could use case classes".split(" "),
123124
"Logistic regression models are neat".split(" ")
124-
)).map(Tuple1.apply).toDF("text")
125+
).map(Tuple1.apply)).toDF("text")
125126

126-
val word2Vec = new Word2Vec.setInputCol("text").setOutputCol("result").setVectorSize(3)
127+
// Learn a mapping from words to Vectors.
128+
val word2Vec = new Word2Vec()
129+
.setInputCol("text")
130+
.setOutputCol("result")
131+
.setVectorSize(3)
132+
.setMinCount(0)
127133
val model = word2Vec.fit(documentDF)
128-
val result = model.transform(documentDF).select("result").take(3).foreach(println)
134+
val result = model.transform(documentDF)
135+
result.select("result").take(3).foreach(println)
129136
{% endhighlight %}
130137
</div>
131138

@@ -143,24 +150,26 @@ import org.apache.spark.sql.types.*;
143150

144151
JavaSparkContext jsc = ...
145152
SQLContext sqlContext = ...
153+
154+
// Input data: Each row is a bag of words from a sentence or document.
146155
JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
147156
RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
148157
RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
149158
RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
150159
));
151160
StructType schema = new StructType(new StructField[]{
152-
new StructField("text", new ArrayType(StringType$.MODULE$, true), false, Metadata.empty())
161+
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
153162
});
154163
DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema);
155164

165+
// Learn a mapping from words to Vectors.
156166
Word2Vec word2Vec = new Word2Vec()
157167
.setInputCol("text")
158168
.setOutputCol("result")
159169
.setVectorSize(3)
160170
.setMinCount(0);
161171
Word2VecModel model = word2Vec.fit(documentDF);
162172
DataFrame result = model.transform(documentDF);
163-
164173
for (Row r: result.select("result").take(3)) {
165174
System.out.println(r);
166175
}
@@ -171,12 +180,14 @@ for (Row r: result.select("result").take(3)) {
171180
{% highlight python %}
172181
from pyspark.ml.feature import Word2Vec
173182

183+
# Input data: Each row is a bag of words from a sentence or document.
174184
documentDF = sqlContext.createDataFrame([
175185
("Hi I heard about Spark".split(" "), ),
176186
("I wish Java could use case classes".split(" "), ),
177187
("Logistic regression models are neat".split(" "), )
178188
], ["text"])
179-
word2Vec = Word2Vec(vectorSize = 3, minCount = 0, inputCol = "text", outputCol = "result")
189+
# Learn a mapping from words to Vectors.
190+
word2Vec = Word2Vec(vectorSize=3, minCount=0, inputCol="text", outputCol="result")
180191
model = word2Vec.fit(documentDF)
181192
result = model.transform(documentDF)
182193
for feature in result.select("result").take(3):

mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public void testJavaWord2Vec() {
3939
RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
4040
));
4141
StructType schema = new StructType(new StructField[]{
42-
new StructField("text", new ArrayType(StringType$.MODULE$, true), false, Metadata.empty())
42+
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
4343
});
4444
DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema);
4545

0 commit comments

Comments
 (0)