refine Java API and comments

yinxusen · yinxusen · commit 766d05b47f34 · 2014-04-13T12:59:27.000+08:00
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -444,7 +444,7 @@ class SparkContext(config: SparkConf) extends Logging {
    *   hdfs://a-hdfs-path/part-nnnnn
    * }}}
    *
-   * Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")`,
+   * Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path", minSplits)`
    *
    * <p> then `rdd` contains
    * {{{
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -167,7 +167,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    *   hdfs://a-hdfs-path/part-nnnnn
    * }}}
    *
-   * Do `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")`,
+   * Do
+   * `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path", minSplit)`
    *
    * <p> then `rdd` contains
    * {{{
@@ -179,6 +180,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    *
    * @note Small files are preferred, as each file will be loaded fully in memory.
    */
+  def wholeTextFiles(path: String, minSplits: Int): JavaPairRDD[String, String] =
+    new JavaPairRDD(sc.wholeTextFiles(path, minSplits))
+
+  /**
+   * Read a directory of text files from HDFS, a local file system (available on all nodes), or any
+   * Hadoop-supported file system URI. Each file is read as a single record and returned in a
+   * key-value pair, where the key is the path of each file, the value is the content of each file.
+   *
+   * @see `wholeTextFiles(path: String, minSplits: Int)`.
+   */
   def wholeTextFiles(path: String): JavaPairRDD[String, String] =
     new JavaPairRDD(sc.wholeTextFiles(path))
 
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -626,7 +626,7 @@ public void wholeTextFiles() throws IOException {
     container.put(tempDirName+"/part-00000", new Text(content1).toString());
     container.put(tempDirName+"/part-00001", new Text(content2).toString());
 
-    JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName);
+    JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3);
     List<Tuple2<String, String>> result = readRDD.collect();
 
     for (Tuple2<String, String> res : result) {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -73,7 +73,7 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
       createNativeFile(dir, filename, contents)
     }
 
-    val res = sc.wholeTextFiles(dir.toString).collect()
+    val res = sc.wholeTextFiles(dir.toString, 3).collect()
 
     assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
       "Number of files read out does not fit with the actual value.")

Original file line number	Diff line number	Diff line change
`@@ -444,7 +444,7 @@ class SparkContext(config: SparkConf) extends Logging {`
`444`	`444`	`* hdfs://a-hdfs-path/part-nnnnn`
`445`	`445`	`* }}}`
`446`	`446`	`*`
`447`		- * Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")`,
	`447`	+ * Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path", minSplits)`
`448`	`448`	`*`
`449`	`449`	* <p> then `rdd` contains
`450`	`450`	`* {{{`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {`
`73`	`73`	`createNativeFile(dir, filename, contents)`
`74`	`74`	`}`
`75`	`75`
`76`		`- val res = sc.wholeTextFiles(dir.toString).collect()`
	`76`	`+ val res = sc.wholeTextFiles(dir.toString, 3).collect()`
`77`	`77`
`78`	`78`	`assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,`
`79`	`79`	`"Number of files read out does not fit with the actual value.")`