remove more docs

hqzizania · hqzizania · commit eb4b09581a14 · 2015-05-08T09:02:25.000+08:00
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+    'schema.R'
     'generics.R'
     'jobj.R'
     'RDD.R'
     'pairRDD.R'
-    'schema.R'
     'column.R'
     'group.R'
     'DataFrame.R'
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
@@ -23,35 +23,35 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()
 
-#' @title S4 class that represents a Broadcast variable
-#' @description Broadcast variables can be created using the broadcast
-#'              function from a \code{SparkContext}.
-#' @rdname broadcast-class
-#' @seealso broadcast 
-#'
-#' @param id Id of the backing Spark broadcast variable 
-#' @export
+# @title S4 class that represents a Broadcast variable
+# @description Broadcast variables can be created using the broadcast
+#              function from a \code{SparkContext}.
+# @rdname broadcast-class
+# @seealso broadcast 
+#
+# @param id Id of the backing Spark broadcast variable 
+# @export
 setClass("Broadcast", slots = list(id = "character"))
 
-#' @rdname broadcast-class
-#' @param value Value of the broadcast variable
-#' @param jBroadcastRef reference to the backing Java broadcast object
-#' @param objName name of broadcasted object
-#' @export
+# @rdname broadcast-class
+# @param value Value of the broadcast variable
+# @param jBroadcastRef reference to the backing Java broadcast object
+# @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
   .broadcastValues[[id]] <- value
   .broadcastNames[[as.character(objName)]] <- jBroadcastRef
   .broadcastIdToName[[id]] <- as.character(objName)
   new("Broadcast", id = id)
 }
 
-#' @description
-#' \code{value} can be used to get the value of a broadcast variable inside
-#' a distributed function.
-#'
-#' @param bcast The broadcast variable to get
-#' @rdname broadcast
-#' @aliases value,Broadcast-method
+# @description
+# \code{value} can be used to get the value of a broadcast variable inside
+# a distributed function.
+#
+# @param bcast The broadcast variable to get
+# @rdname broadcast
+# @aliases value,Broadcast-method
 setMethod("value",
           signature(bcast = "Broadcast"),
           function(bcast) {
@@ -62,24 +62,24 @@ setMethod("value",
             }
           })
 
-#' Internal function to set values of a broadcast variable.
-#'
-#' This function is used internally by Spark to set the value of a broadcast
-#' variable on workers. Not intended for use outside the package.
-#'
-#' @rdname broadcast-internal
-#' @seealso broadcast, value 
+# Internal function to set values of a broadcast variable.
+#
+# This function is used internally by Spark to set the value of a broadcast
+# variable on workers. Not intended for use outside the package.
+#
+# @rdname broadcast-internal
+# @seealso broadcast, value 
 
-#' @param bcastId The id of broadcast variable to set
-#' @param value The value to be set
-#' @export
+# @param bcastId The id of broadcast variable to set
+# @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
   bcastIdStr <- as.character(bcastId)
   .broadcastValues[[bcastIdStr]] <- value
 }
 
-#' Helper function to clear the list of broadcast variables we know about
-#' Should be called when the SparkR JVM backend is shutdown
+# Helper function to clear the list of broadcast variables we know about
+# Should be called when the SparkR JVM backend is shutdown
 clearBroadcastVariables <- function() {
   bcasts <- ls(.broadcastNames)
   rm(list = bcasts, envir = .broadcastNames)
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
@@ -25,80 +25,80 @@ getMinPartitions <- function(sc, minPartitions) {
   as.integer(minPartitions)
 }
 
-#' Create an RDD from a text file.
-#'
-#' This function reads a text file from HDFS, a local file system (available on all
-#' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD where each item is of type \code{character}
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  lines <- textFile(sc, "myfile.txt")
-#'}
+# Create an RDD from a text file.
+#
+# This function reads a text file from HDFS, a local file system (available on all
+# nodes), or any Hadoop-supported file system URI, and creates an
+# RDD of strings from it.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD where each item is of type \code{character}
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  lines <- textFile(sc, "myfile.txt")
+#}
 textFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
   # jrdd is of type JavaRDD[String]
   RDD(jrdd, "string")
 }
 
-#' Load an RDD saved as a SequenceFile containing serialized objects.
-#'
-#' The file to be loaded should be one that was previously generated by calling
-#' saveAsObjectFile() of the RDD class.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD containing serialized R objects.
-#' @seealso saveAsObjectFile
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  rdd <- objectFile(sc, "myfile")
-#'}
+# Load an RDD saved as a SequenceFile containing serialized objects.
+#
+# The file to be loaded should be one that was previously generated by calling
+# saveAsObjectFile() of the RDD class.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD containing serialized R objects.
+# @seealso saveAsObjectFile
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  rdd <- objectFile(sc, "myfile")
+#}
 objectFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
   # Assume the RDD contains serialized R objects.
   RDD(jrdd, "byte")
 }
 
-#' Create an RDD from a homogeneous list or vector.
-#'
-#' This function creates an RDD from a local homogeneous list in R. The elements
-#' in the list are split into \code{numSlices} slices and distributed to nodes
-#' in the cluster.
-#'
-#' @param sc SparkContext to use
-#' @param coll collection to parallelize
-#' @param numSlices number of partitions to create in the RDD
-#' @return an RDD created from this collection
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2)
-#' # The RDD should contain 10 elements
-#' length(rdd)
-#'}
+# Create an RDD from a homogeneous list or vector.
+#
+# This function creates an RDD from a local homogeneous list in R. The elements
+# in the list are split into \code{numSlices} slices and distributed to nodes
+# in the cluster.
+#
+# @param sc SparkContext to use
+# @param coll collection to parallelize
+# @param numSlices number of partitions to create in the RDD
+# @return an RDD created from this collection
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2)
+# # The RDD should contain 10 elements
+# length(rdd)
+#}
 parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
@@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
   RDD(jrdd, "byte")
 }
 
-#' Include this specified package on all workers
-#'
-#' This function can be used to include a package on all workers before the
-#' user's code is executed. This is useful in scenarios where other R package
-#' functions are used in a function passed to functions like \code{lapply}.
-#' NOTE: The package is assumed to be installed on every node in the Spark
-#' cluster.
-#'
-#' @param sc SparkContext to use
-#' @param pkg Package name
-#'
-#' @export
-#' @examples
-#'\dontrun{
-#'  library(Matrix)
-#'
-#'  sc <- sparkR.init()
-#'  # Include the matrix library we will be using
-#'  includePackage(sc, Matrix)
-#'
-#'  generateSparse <- function(x) {
-#'    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
-#'  }
-#'
-#'  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
-#'  collect(rdd)
-#'}
+# Include this specified package on all workers
+#
+# This function can be used to include a package on all workers before the
+# user's code is executed. This is useful in scenarios where other R package
+# functions are used in a function passed to functions like \code{lapply}.
+# NOTE: The package is assumed to be installed on every node in the Spark
+# cluster.
+#
+# @param sc SparkContext to use
+# @param pkg Package name
+#
+# @export
+# @examples
+#\dontrun{
+#  library(Matrix)
+#
+#  sc <- sparkR.init()
+#  # Include the matrix library we will be using
+#  includePackage(sc, Matrix)
+#
+#  generateSparse <- function(x) {
+#    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
+#  }
+#
+#  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
+#  collect(rdd)
+#}
 includePackage <- function(sc, pkg) {
   pkg <- as.character(substitute(pkg))
   if (exists(".packages", .sparkREnv)) {
@@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
   .sparkREnv$.packages <- packages
 }
 
-#' @title Broadcast a variable to all workers
-#'
-#' @description
-#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
-#' object for reading it in distributed functions.
-#'
-#' @param sc Spark Context to use
-#' @param object Object to be broadcast
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2, 2L)
-#'
-#' # Large Matrix object that we want to broadcast
-#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
-#' randomMatBr <- broadcast(sc, randomMat)
-#'
-#' # Use the broadcast variable inside the function
-#' useBroadcast <- function(x) {
-#'   sum(value(randomMatBr) * x)
-#' }
-#' sumRDD <- lapply(rdd, useBroadcast)
-#'}
+# @title Broadcast a variable to all workers
+#
+# @description
+# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
+# object for reading it in distributed functions.
+#
+# @param sc Spark Context to use
+# @param object Object to be broadcast
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2, 2L)
+#
+# # Large Matrix object that we want to broadcast
+# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
+# randomMatBr <- broadcast(sc, randomMat)
+#
+# # Use the broadcast variable inside the function
+# useBroadcast <- function(x) {
+#   sum(value(randomMatBr) * x)
+# }
+# sumRDD <- lapply(rdd, useBroadcast)
+#}
 broadcast <- function(sc, object) {
   objName <- as.character(substitute(object))
   serializedObj <- serialize(object, connection = NULL)
@@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
   Broadcast(id, object, jBroadcast, objName)
 }
 
-#' @title Set the checkpoint directory
-#'
-#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
-#'
-#' @param sc Spark Context to use
-#' @param dirName Directory path
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoint")
-#' rdd <- parallelize(sc, 1:2, 2L)
-#' checkpoint(rdd)
-#'}
+# @title Set the checkpoint directory
+#
+# Set the directory under which RDDs are going to be checkpointed. The
+# directory must be a HDFS path if running on a cluster.
+#
+# @param sc Spark Context to use
+# @param dirName Directory path
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "~/checkpoint")
+# rdd <- parallelize(sc, 1:2, 2L)
+# checkpoint(rdd)
+#}
 setCheckpointDir <- function(sc, dirName) {
   invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
 }
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R