@@ -39,6 +39,22 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
39
39
.setMinSupport(0.9 )
40
40
.setNumPartitions(1 )
41
41
.run(rdd)
42
+
43
+ /* Verify results using the `R` code:
44
+ transactions = as(sapply(
45
+ list("r z h k p",
46
+ "z y x w v u t s",
47
+ "s x o n r",
48
+ "x z y m t s q e",
49
+ "z",
50
+ "x z y r q t p"),
51
+ FUN=function(x) strsplit(x," ",fixed=TRUE)),
52
+ "transactions")
53
+ > eclat(transactions, parameter = list(support = 0.9))
54
+ ...
55
+ eclat - zero frequent items
56
+ set of 0 itemsets
57
+ */
42
58
assert(model6.freqItemsets.count() === 0 )
43
59
44
60
val model3 = fpg
@@ -48,6 +64,33 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
48
64
val freqItemsets3 = model3.freqItemsets.collect().map { itemset =>
49
65
(itemset.items.toSet, itemset.freq)
50
66
}
67
+
68
+ /* Verify results using the `R` code:
69
+ fp = eclat(transactions, parameter = list(support = 0.5))
70
+ fpDF = as(sort(fp), "data.frame")
71
+ fpDF$support = fpDF$support * length(transactions)
72
+ names(fpDF)[names(fpDF) == "support"] = "freq"
73
+ > fpDF
74
+ items freq
75
+ 13 {z} 5
76
+ 14 {x} 4
77
+ 1 {s,x} 3
78
+ 2 {t,x,y,z} 3
79
+ 3 {t,y,z} 3
80
+ 4 {t,x,y} 3
81
+ 5 {x,y,z} 3
82
+ 6 {y,z} 3
83
+ 7 {x,y} 3
84
+ 8 {t,y} 3
85
+ 9 {t,x,z} 3
86
+ 10 {t,z} 3
87
+ 11 {t,x} 3
88
+ 12 {x,z} 3
89
+ 15 {t} 3
90
+ 16 {y} 3
91
+ 17 {s} 3
92
+ 18 {r} 3
93
+ */
51
94
val expected = Set (
52
95
(Set (" s" ), 3L ), (Set (" z" ), 5L ), (Set (" x" ), 4L ), (Set (" t" ), 3L ), (Set (" y" ), 3L ),
53
96
(Set (" r" ), 3L ),
@@ -62,12 +105,30 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
62
105
.setMinSupport(0.3 )
63
106
.setNumPartitions(4 )
64
107
.run(rdd)
108
+
109
+ /* Verify results using the `R` code:
110
+ fp = eclat(transactions, parameter = list(support = 0.3))
111
+ fpDF = as(fp, "data.frame")
112
+ fpDF$support = fpDF$support * length(transactions)
113
+ names(fpDF)[names(fpDF) == "support"] = "freq"
114
+ > nrow(fpDF)
115
+ [1] 54
116
+ */
65
117
assert(model2.freqItemsets.count() === 54 )
66
118
67
119
val model1 = fpg
68
120
.setMinSupport(0.1 )
69
121
.setNumPartitions(8 )
70
122
.run(rdd)
123
+
124
+ /* Verify results using the `R` code:
125
+ fp = eclat(transactions, parameter = list(support = 0.1))
126
+ fpDF = as(fp, "data.frame")
127
+ fpDF$support = fpDF$support * length(transactions)
128
+ names(fpDF)[names(fpDF) == "support"] = "freq"
129
+ > nrow(fpDF)
130
+ [1] 625
131
+ */
71
132
assert(model1.freqItemsets.count() === 625 )
72
133
}
73
134
@@ -89,6 +150,23 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
89
150
.setMinSupport(0.9 )
90
151
.setNumPartitions(1 )
91
152
.run(rdd)
153
+
154
+ /* Verify results using the `R` code:
155
+ transactions = as(sapply(
156
+ list("1 2 3",
157
+ "1 2 3 4",
158
+ "5 4 3 2 1",
159
+ "6 5 4 3 2 1",
160
+ "2 4",
161
+ "1 3",
162
+ "1 7"),
163
+ FUN=function(x) strsplit(x," ",fixed=TRUE)),
164
+ "transactions")
165
+ > eclat(transactions, parameter = list(support = 0.9))
166
+ ...
167
+ eclat - zero frequent items
168
+ set of 0 itemsets
169
+ */
92
170
assert(model6.freqItemsets.count() === 0 )
93
171
94
172
val model3 = fpg
@@ -100,6 +178,24 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
100
178
val freqItemsets3 = model3.freqItemsets.collect().map { itemset =>
101
179
(itemset.items.toSet, itemset.freq)
102
180
}
181
+
182
+ /* Verify results using the `R` code:
183
+ fp = eclat(transactions, parameter = list(support = 0.5))
184
+ fpDF = as(sort(fp), "data.frame")
185
+ fpDF$support = fpDF$support * length(transactions)
186
+ names(fpDF)[names(fpDF) == "support"] = "freq"
187
+ > fpDF
188
+ items freq
189
+ 6 {1} 6
190
+ 3 {1,3} 5
191
+ 7 {2} 5
192
+ 8 {3} 5
193
+ 1 {2,4} 4
194
+ 2 {1,2,3} 4
195
+ 4 {2,3} 4
196
+ 5 {1,2} 4
197
+ 9 {4} 4
198
+ */
103
199
val expected = Set (
104
200
(Set (1 ), 6L ), (Set (2 ), 5L ), (Set (3 ), 5L ), (Set (4 ), 4L ),
105
201
(Set (1 , 2 ), 4L ), (Set (1 , 3 ), 5L ), (Set (2 , 3 ), 4L ),
@@ -110,12 +206,30 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
110
206
.setMinSupport(0.3 )
111
207
.setNumPartitions(4 )
112
208
.run(rdd)
209
+
210
+ /* Verify results using the `R` code:
211
+ fp = eclat(transactions, parameter = list(support = 0.3))
212
+ fpDF = as(fp, "data.frame")
213
+ fpDF$support = fpDF$support * length(transactions)
214
+ names(fpDF)[names(fpDF) == "support"] = "freq"
215
+ > nrow(fpDF)
216
+ [1] 15
217
+ */
113
218
assert(model2.freqItemsets.count() === 15 )
114
219
115
220
val model1 = fpg
116
221
.setMinSupport(0.1 )
117
222
.setNumPartitions(8 )
118
223
.run(rdd)
224
+
225
+ /* Verify results using the `R` code:
226
+ fp = eclat(transactions, parameter = list(support = 0.1))
227
+ fpDF = as(fp, "data.frame")
228
+ fpDF$support = fpDF$support * length(transactions)
229
+ names(fpDF)[names(fpDF) == "support"] = "freq"
230
+ > nrow(fpDF)
231
+ [1] 65
232
+ */
119
233
assert(model1.freqItemsets.count() === 65 )
120
234
}
121
235
}
0 commit comments