17
17
18
18
package org .apache .spark .mllib .fpm
19
19
20
+ import scala .collection .mutable
21
+
20
22
import org .apache .spark .Logging
21
- import org .apache .spark .annotation .Experimental
22
23
23
24
/**
24
- *
25
- * :: Experimental ::
26
- *
27
25
* Calculate all patterns of a projected database in local.
28
26
*/
29
- @ Experimental
30
27
private [fpm] object LocalPrefixSpan extends Logging with Serializable {
31
28
32
29
/**
@@ -43,18 +40,18 @@ private[fpm] object LocalPrefixSpan extends Logging with Serializable {
43
40
minCount : Long ,
44
41
maxPatternLength : Int ,
45
42
prefix : List [Int ],
46
- database : Iterable [Array [Int ]]): Iterator [(Array [Int ], Long )] = {
43
+ database : Array [Array [Int ]]): Iterator [(List [Int ], Long )] = {
47
44
48
45
if (database.isEmpty) return Iterator .empty
49
46
50
47
val frequentItemAndCounts = getFreqItemAndCounts(minCount, database)
51
48
val frequentItems = frequentItemAndCounts.map(_._1).toSet
52
49
val frequentPatternAndCounts = frequentItemAndCounts
53
- .map { case (item, count) => ((item :: prefix).reverse.toArray , count) }
50
+ .map { case (item, count) => ((item :: prefix), count) }
54
51
55
- val filteredProjectedDatabase = database.map(x => x.filter(frequentItems.contains(_)))
56
52
57
53
if (prefix.length + 1 < maxPatternLength) {
54
+ val filteredProjectedDatabase = database.map(x => x.filter(frequentItems.contains(_)))
58
55
frequentPatternAndCounts.iterator ++ frequentItems.flatMap { item =>
59
56
val nextProjected = project(filteredProjectedDatabase, item)
60
57
run(minCount, maxPatternLength, item :: prefix, nextProjected)
@@ -79,7 +76,7 @@ private[fpm] object LocalPrefixSpan extends Logging with Serializable {
79
76
}
80
77
}
81
78
82
- def project (database : Iterable [Array [Int ]], prefix : Int ): Iterable [Array [Int ]] = {
79
+ def project (database : Array [Array [Int ]], prefix : Int ): Array [Array [Int ]] = {
83
80
database
84
81
.map(candidateSeq => getSuffix(prefix, candidateSeq))
85
82
.filter(_.nonEmpty)
@@ -93,10 +90,11 @@ private[fpm] object LocalPrefixSpan extends Logging with Serializable {
93
90
*/
94
91
private def getFreqItemAndCounts (
95
92
minCount : Long ,
96
- database : Iterable [Array [Int ]]): Iterable [(Int , Long )] = {
93
+ database : Array [Array [Int ]]): Iterable [(Int , Long )] = {
97
94
database.flatMap(_.distinct)
98
- .foldRight(Map [Int , Long ]().withDefaultValue(0L )) { case (item, ctr) =>
99
- ctr + (item -> (ctr(item) + 1 ))
95
+ .foldRight(mutable.Map [Int , Long ]().withDefaultValue(0L )) { case (item, ctr) =>
96
+ ctr(item) += 1
97
+ ctr
100
98
}
101
99
.filter(_._2 >= minCount)
102
100
}
0 commit comments