@@ -1388,9 +1388,22 @@ setMethod("groupByKey",
1388
1388
function (item ) {
1389
1389
item $ hash <- as.character(hashCode(item [[1 ]]))
1390
1390
updateOrCreatePair(item , keys , vals , pred ,
1391
- function (vs , v ) c(vs , list (v )),
1392
- function (x ) list (x ))
1391
+ function (acc , x ) {
1392
+ addItemToAccumulator(acc , x )
1393
+ acc
1394
+ },
1395
+ function (x ) {
1396
+ acc <- initAccumulator()
1397
+ addItemToAccumulator(acc , x )
1398
+ acc
1399
+ })
1393
1400
})
1401
+ # extract out data field
1402
+ vals <- eapply(vals ,
1403
+ function (x ) {
1404
+ length(x $ data ) <- x $ counter
1405
+ x $ data
1406
+ })
1394
1407
# Every key in the environment contains a list
1395
1408
# Convert that to list(K, Seq[V])
1396
1409
convertEnvsToList(keys , vals )
@@ -1438,7 +1451,7 @@ setMethod("reduceByKey",
1438
1451
lapply(part ,
1439
1452
function (item ) {
1440
1453
item $ hash <- as.character(hashCode(item [[1 ]]))
1441
- updateOrCreatePair(item , keys , vals , pred , combineFunc , function ( x ) x )
1454
+ updateOrCreatePair(item , keys , vals , pred , combineFunc , identity )
1442
1455
})
1443
1456
convertEnvsToList(keys , vals )
1444
1457
}
@@ -1451,13 +1464,12 @@ setMethod("reduceByKey",
1451
1464
# '
1452
1465
# ' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
1453
1466
# ' and merges the values for each key using an associative reduce function, but return the
1454
- # ' results immediately to master as R list.
1467
+ # ' results immediately to the driver as an R list.
1455
1468
# '
1456
1469
# ' @param rdd The RDD to reduce by key. Should be an RDD where each element is
1457
1470
# ' list(K, V) or c(K, V).
1458
1471
# ' @param combineFunc The associative reduce function to use.
1459
- # ' @return An list where each element is list(K, V') where V' is the merged
1460
- # ' value
1472
+ # ' @return A list of elements of type list(K, V') where V' is the merged value for each key
1461
1473
# ' @rdname reduceByKeyLocally
1462
1474
# ' @seealso reduceByKey
1463
1475
# ' @export
@@ -1467,7 +1479,7 @@ setMethod("reduceByKey",
1467
1479
# ' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
1468
1480
# ' rdd <- parallelize(sc, pairs)
1469
1481
# ' reduced <- reduceByKeyLocally(rdd, "+")
1470
- # ' reduced[[1]] # Should be a list(1, 6 )
1482
+ # ' reduced # list(list(1, 6), list(1.1, 3) )
1471
1483
# '}
1472
1484
setGeneric ("reduceByKeyLocally ",
1473
1485
function (rdd , combineFunc ) {
@@ -1486,7 +1498,7 @@ setMethod("reduceByKeyLocally",
1486
1498
lapply(part ,
1487
1499
function (item ) {
1488
1500
item $ hash <- as.character(hashCode(item [[1 ]]))
1489
- updateOrCreatePair(item , keys , vals , pred , combineFunc , function ( x ) x )
1501
+ updateOrCreatePair(item , keys , vals , pred , combineFunc , identity )
1490
1502
})
1491
1503
list (list (keys , vals )) # return hash to avoid re-compute in merge
1492
1504
}
@@ -1498,7 +1510,7 @@ setMethod("reduceByKeyLocally",
1498
1510
function (name ) {
1499
1511
item <- list (x [[1 ]][[name ]], x [[2 ]][[name ]])
1500
1512
item $ hash <- name
1501
- updateOrCreatePair(item , accum [[1 ]], accum [[2 ]], pred , combineFunc , function ( x ) x )
1513
+ updateOrCreatePair(item , accum [[1 ]], accum [[2 ]], pred , combineFunc , identity )
1502
1514
})
1503
1515
accum
1504
1516
}
@@ -1573,8 +1585,7 @@ setMethod("combineByKey",
1573
1585
lapply(part ,
1574
1586
function (item ) {
1575
1587
item $ hash <- as.character(item [[1 ]])
1576
- updateOrCreatePair(item , keys , combiners , pred , mergeCombiners ,
1577
- function (x ) x )
1588
+ updateOrCreatePair(item , keys , combiners , pred , mergeCombiners , identity )
1578
1589
})
1579
1590
convertEnvsToList(keys , combiners )
1580
1591
}
0 commit comments