Skip to content

Commit 766d05b

Browse files
committed
refine Java API and comments
1 parent 4875755 commit 766d05b

File tree

4 files changed

+15
-4
lines changed

4 files changed

+15
-4
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ class SparkContext(config: SparkConf) extends Logging {
444444
* hdfs://a-hdfs-path/part-nnnnn
445445
* }}}
446446
*
447-
* Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path")`,
447+
* Do `val rdd = sparkContext.wholeTextFile("hdfs://a-hdfs-path", minSplits)`
448448
*
449449
* <p> then `rdd` contains
450450
* {{{

core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
167167
* hdfs://a-hdfs-path/part-nnnnn
168168
* }}}
169169
*
170-
* Do `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")`,
170+
* Do
171+
* `JavaPairRDD<String, String> rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path", minSplit)`
171172
*
172173
* <p> then `rdd` contains
173174
* {{{
@@ -179,6 +180,16 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
179180
*
180181
* @note Small files are preferred, as each file will be loaded fully in memory.
181182
*/
183+
def wholeTextFiles(path: String, minSplits: Int): JavaPairRDD[String, String] =
184+
new JavaPairRDD(sc.wholeTextFiles(path, minSplits))
185+
186+
/**
187+
* Read a directory of text files from HDFS, a local file system (available on all nodes), or any
188+
* Hadoop-supported file system URI. Each file is read as a single record and returned in a
189+
* key-value pair, where the key is the path of each file, the value is the content of each file.
190+
*
191+
* @see `wholeTextFiles(path: String, minSplits: Int)`.
192+
*/
182193
def wholeTextFiles(path: String): JavaPairRDD[String, String] =
183194
new JavaPairRDD(sc.wholeTextFiles(path))
184195

core/src/test/java/org/apache/spark/JavaAPISuite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ public void wholeTextFiles() throws IOException {
626626
container.put(tempDirName+"/part-00000", new Text(content1).toString());
627627
container.put(tempDirName+"/part-00001", new Text(content2).toString());
628628

629-
JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName);
629+
JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3);
630630
List<Tuple2<String, String>> result = readRDD.collect();
631631

632632
for (Tuple2<String, String> res : result) {

core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class WholeTextFileRecordReaderSuite extends FunSuite with BeforeAndAfterAll {
7373
createNativeFile(dir, filename, contents)
7474
}
7575

76-
val res = sc.wholeTextFiles(dir.toString).collect()
76+
val res = sc.wholeTextFiles(dir.toString, 3).collect()
7777

7878
assert(res.size === WholeTextFileRecordReaderSuite.fileNames.size,
7979
"Number of files read out does not fit with the actual value.")

0 commit comments

Comments
 (0)