apache
diff --git a/‎R/pkg/R/DataFrame.R
Lines changed: 2 additions & 1 deletion b/‎R/pkg/R/DataFrame.R
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/pkg/R/functions.R
Lines changed: 3 additions & 1 deletion b/‎R/pkg/R/functions.R
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/pkg/R/mllib_classification.R
Lines changed: 14 additions & 1 deletion b/‎R/pkg/R/mllib_classification.R
Lines changed: 14 additions & 1 deletion
diff --git a/‎R/pkg/R/serialize.R
Lines changed: 6 additions & 5 deletions b/‎R/pkg/R/serialize.R
Lines changed: 6 additions & 5 deletions
diff --git a/‎R/pkg/tests/fulltests/test_Serde.R
Lines changed: 47 additions & 0 deletions b/‎R/pkg/tests/fulltests/test_Serde.R
Lines changed: 47 additions & 0 deletions
diff --git a/‎R/pkg/tests/fulltests/test_mllib_classification.R
Lines changed: 5 additions & 5 deletions b/‎R/pkg/tests/fulltests/test_mllib_classification.R
Lines changed: 5 additions & 5 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
Lines changed: 3 additions & 1 deletion b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
Lines changed: 11 additions & 7 deletions b/‎common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
Lines changed: 11 additions & 7 deletions
diff --git a/‎common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
Lines changed: 23 additions & 0 deletions b/‎common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
Lines changed: 23 additions & 0 deletions
diff --git a/‎core/src/main/java/org/apache/spark/SparkExecutorInfo.java
Lines changed: 4 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/SparkExecutorInfo.java
Lines changed: 4 additions & 0 deletions
@@ -2090,7 +2090,8 @@ setMethod("selectExpr",
 #'
 #' @param x a SparkDataFrame.
 #' @param colName a column name.
-#' @param col a Column expression, or an atomic vector in the length of 1 as literal value.
+#' @param col a Column expression (which must refer only to this SparkDataFrame), or an atomic
+#' vector in the length of 1 as literal value.
 #' @return A SparkDataFrame with the new column added or the existing column replaced.
 #' @family SparkDataFrame functions
 #' @aliases withColumn,SparkDataFrame,character-method
 
@@ -1026,7 +1026,9 @@ setMethod("last_day",
           })
 
 #' @details
-#' \code{length}: Computes the length of a given string or binary column.
+#' \code{length}: Computes the character length of a string data or number of bytes
+#' of a binary data. The length of string data includes the trailing spaces.
+#' The length of binary data includes binary zeros.
 #'
 #' @rdname column_string_functions
 #' @aliases length length,Column-method
 
@@ -279,11 +279,24 @@ function(object, path, overwrite = FALSE) {
 #' savedModel <- read.ml(path)
 #' summary(savedModel)
 #'
-#' # multinomial logistic regression
+#' # binary logistic regression against two classes with
+#' # upperBoundsOnCoefficients and upperBoundsOnIntercepts
+#' ubc <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
+#' model <- spark.logit(training, Species ~ .,
+#'                       upperBoundsOnCoefficients = ubc,
+#'                       upperBoundsOnIntercepts = 1.0)
 #'
+#' # multinomial logistic regression
 #' model <- spark.logit(training, Class ~ ., regParam = 0.5)
 #' summary <- summary(model)
 #'
+#' # multinomial logistic regression with
+#' # lowerBoundsOnCoefficients and lowerBoundsOnIntercepts
+#' lbc <- matrix(c(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0), nrow = 2, ncol = 4)
+#' lbi <- as.array(c(0.0, 0.0))
+#' model <- spark.logit(training, Species ~ ., family = "multinomial",
+#'                      lowerBoundsOnCoefficients = lbc,
+#'                      lowerBoundsOnIntercepts = lbi)
 #' }
 #' @note spark.logit since 2.1.0
 setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
 
@@ -30,14 +30,17 @@
 # POSIXct,POSIXlt -> Time
 #
 # list[T] -> Array[T], where T is one of above mentioned types
+# Multi-element vector of any of the above (except raw) -> Array[T]
 # environment -> Map[String, T], where T is a native type
 # jobj -> Object, where jobj is an object created in the backend
 # nolint end
 
 getSerdeType <- function(object) {
   type <- class(object)[[1]]
-  if (type != "list") {
-    type
+  if (is.atomic(object) & !is.raw(object) & length(object) > 1) {
+    "array"
+  } else if (type != "list") {
+     type
   } else {
     # Check if all elements are of same type
     elemType <- unique(sapply(object, function(elem) { getSerdeType(elem) }))
@@ -50,9 +53,7 @@ getSerdeType <- function(object) {
 }
 
 writeObject <- function(con, object, writeType = TRUE) {
-  # NOTE: In R vectors have same type as objects. So we don't support
-  # passing in vectors as arrays and instead require arrays to be passed
-  # as lists.
+  # NOTE: In R vectors have same type as objects
   type <- class(object)[[1]]  # class of POSIXlt is c("POSIXlt", "POSIXt")
   # Checking types is needed here, since 'is.na' only handles atomic vectors,
   # lists and pairlists
 
@@ -37,6 +37,53 @@ test_that("SerDe of primitive types", {
   expect_equal(class(x), "character")
 })
 
+test_that("SerDe of multi-element primitive vectors inside R data.frame", {
+  # vector of integers embedded in R data.frame
+  indices <- 1L:3L
+  myDf <- data.frame(indices)
+  myDf$data <- list(rep(0L, 3L))
+  mySparkDf <- as.DataFrame(myDf)
+  myResultingDf <- collect(mySparkDf)
+  myDfListedData <- data.frame(indices)
+  myDfListedData$data <- list(as.list(rep(0L, 3L)))
+  expect_equal(myResultingDf, myDfListedData)
+  expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+  expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "integer")
+
+  # vector of numeric embedded in R data.frame
+  myDf <- data.frame(indices)
+  myDf$data <- list(rep(0, 3L))
+  mySparkDf <- as.DataFrame(myDf)
+  myResultingDf <- collect(mySparkDf)
+  myDfListedData <- data.frame(indices)
+  myDfListedData$data <- list(as.list(rep(0, 3L)))
+  expect_equal(myResultingDf, myDfListedData)
+  expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+  expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "numeric")
+
+  # vector of logical embedded in R data.frame
+  myDf <- data.frame(indices)
+  myDf$data <- list(rep(TRUE, 3L))
+  mySparkDf <- as.DataFrame(myDf)
+  myResultingDf <- collect(mySparkDf)
+  myDfListedData <- data.frame(indices)
+  myDfListedData$data <- list(as.list(rep(TRUE, 3L)))
+  expect_equal(myResultingDf, myDfListedData)
+  expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+  expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "logical")
+
+  # vector of character embedded in R data.frame
+  myDf <- data.frame(indices)
+  myDf$data <- list(rep("abc", 3L))
+  mySparkDf <- as.DataFrame(myDf)
+  myResultingDf <- collect(mySparkDf)
+  myDfListedData <- data.frame(indices)
+  myDfListedData$data <- list(as.list(rep("abc", 3L)))
+  expect_equal(myResultingDf, myDfListedData)
+  expect_equal(class(myResultingDf[["data"]][[1]]), "list")
+  expect_equal(class(myResultingDf[["data"]][[1]][[1]]), "character")
+})
+
 test_that("SerDe of list of primitive types", {
   x <- list(1L, 2L, 3L)
   y <- callJStatic("SparkRHandler", "echo", x)
 
@@ -124,7 +124,7 @@ test_that("spark.logit", {
   # Petal.Width   0.42122607
   # nolint end
 
-  # Test multinomial logistic regression againt three classes
+  # Test multinomial logistic regression against three classes
   df <- suppressWarnings(createDataFrame(iris))
   model <- spark.logit(df, Species ~ ., regParam = 0.5)
   summary <- summary(model)
@@ -196,7 +196,7 @@ test_that("spark.logit", {
   #
   # nolint end
 
-  # Test multinomial logistic regression againt two classes
+  # Test multinomial logistic regression against two classes
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.logit(training, Species ~ ., regParam = 0.5, family = "multinomial")
@@ -208,7 +208,7 @@ test_that("spark.logit", {
   expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1))
   expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1))
 
-  # Test binomial logistic regression againt two classes
+  # Test binomial logistic regression against two classes
   model <- spark.logit(training, Species ~ ., regParam = 0.5)
   summary <- summary(model)
   coefsR <- c(-6.08, 0.25, 0.16, 0.48, 1.04)
@@ -239,7 +239,7 @@ test_that("spark.logit", {
   prediction2 <- collect(select(predict(model2, df2), "prediction"))
   expect_equal(sort(prediction2$prediction), c("0.0", "0.0", "0.0", "0.0", "0.0"))
 
-  # Test binomial logistic regression againt two classes with upperBoundsOnCoefficients
+  # Test binomial logistic regression against two classes with upperBoundsOnCoefficients
   # and upperBoundsOnIntercepts
   u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
   model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
@@ -252,7 +252,7 @@ test_that("spark.logit", {
   expect_error(spark.logit(training, Species ~ ., upperBoundsOnCoefficients = as.array(c(1, 2)),
                            upperBoundsOnIntercepts = 1.0))
 
-  # Test binomial logistic regression againt two classes with lowerBoundsOnCoefficients
+  # Test binomial logistic regression against two classes with lowerBoundsOnCoefficients
   # and lowerBoundsOnIntercepts
   l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
   model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
 
@@ -171,7 +171,9 @@ private class DownloadCallback implements StreamCallback {
 
     @Override
     public void onData(String streamId, ByteBuffer buf) throws IOException {
-      channel.write(buf);
+      while (buf.hasRemaining()) {
+        channel.write(buf);
+      }
     }
 
     @Override
 
@@ -46,9 +46,12 @@ private boolean shouldPool(long size) {
 
   @Override
   public MemoryBlock allocate(long size) throws OutOfMemoryError {
-    if (shouldPool(size)) {
+    int numWords = (int) ((size + 7) / 8);
+    long alignedSize = numWords * 8L;
+    assert (alignedSize >= size);
+    if (shouldPool(alignedSize)) {
       synchronized (this) {
-        final LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
+        final LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(alignedSize);
         if (pool != null) {
           while (!pool.isEmpty()) {
             final WeakReference<long[]> arrayReference = pool.pop();
@@ -62,11 +65,11 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
               return memory;
             }
           }
-          bufferPoolsBySize.remove(size);
+          bufferPoolsBySize.remove(alignedSize);
         }
       }
     }
-    long[] array = new long[(int) ((size + 7) / 8)];
+    long[] array = new long[numWords];
     MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
@@ -98,12 +101,13 @@ public void free(MemoryBlock memory) {
     long[] array = (long[]) memory.obj;
     memory.setObjAndOffset(null, 0);
 
-    if (shouldPool(size)) {
+    long alignedSize = ((size + 7) / 8) * 8;
+    if (shouldPool(alignedSize)) {
       synchronized (this) {
-        LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
+        LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(alignedSize);
         if (pool == null) {
           pool = new LinkedList<>();
-          bufferPoolsBySize.put(size, pool);
+          bufferPoolsBySize.put(alignedSize, pool);
         }
         pool.add(new WeakReference<>(array));
       }
 
@@ -17,6 +17,7 @@
 
 package org.apache.spark.unsafe;
 
+import org.apache.spark.unsafe.memory.HeapMemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
@@ -134,4 +135,26 @@ public void memoryDebugFillEnabledInTest() {
       MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
     MemoryAllocator.UNSAFE.free(offheap);
   }
+
+  @Test
+  public void heapMemoryReuse() {
+    MemoryAllocator heapMem = new HeapMemoryAllocator();
+    // The size is less than `HeapMemoryAllocator.POOLING_THRESHOLD_BYTES`,
+    // allocate new memory every time.
+    MemoryBlock onheap1 = heapMem.allocate(513);
+    Object obj1 = onheap1.getBaseObject();
+    heapMem.free(onheap1);
+    MemoryBlock onheap2 = heapMem.allocate(514);
+    Assert.assertNotEquals(obj1, onheap2.getBaseObject());
+
+    // The size is greater than `HeapMemoryAllocator.POOLING_THRESHOLD_BYTES`,
+    // reuse the previous memory which has released.
+    MemoryBlock onheap3 = heapMem.allocate(1024 * 1024 + 1);
+    Assert.assertEquals(onheap3.size(), 1024 * 1024 + 1);
+    Object obj3 = onheap3.getBaseObject();
+    heapMem.free(onheap3);
+    MemoryBlock onheap4 = heapMem.allocate(1024 * 1024 + 7);
+    Assert.assertEquals(onheap4.size(), 1024 * 1024 + 7);
+    Assert.assertEquals(obj3, onheap4.getBaseObject());
+  }
 }
@@ -30,4 +30,8 @@ public interface SparkExecutorInfo extends Serializable {
   int port();
   long cacheSize();
   int numRunningTasks();
+  long usedOnHeapStorageMemory();
+  long usedOffHeapStorageMemory();
+  long totalOnHeapStorageMemory();
+  long totalOffHeapStorageMemory();
 }
Original file line number	Diff line number	Diff line change
`@@ -171,7 +171,9 @@ private class DownloadCallback implements StreamCallback {`
`171`	`171`
`172`	`172`	`@Override`
`173`	`173`	`public void onData(String streamId, ByteBuffer buf) throws IOException {`
`174`		`- channel.write(buf);`
	`174`	`+ while (buf.hasRemaining()) {`
	`175`	`+ channel.write(buf);`
	`176`	`+ }`
`175`	`177`	`}`
`176`	`178`
`177`	`179`	`@Override`
Original file line number	Diff line number	Diff line change
`@@ -30,4 +30,8 @@ public interface SparkExecutorInfo extends Serializable {`
`30`	`30`	`int port();`
`31`	`31`	`long cacheSize();`
`32`	`32`	`int numRunningTasks();`
	`33`	`+ long usedOnHeapStorageMemory();`
	`34`	`+ long usedOffHeapStorageMemory();`
	`35`	`+ long totalOnHeapStorageMemory();`
	`36`	`+ long totalOffHeapStorageMemory();`
`33`	`37`	`}`