Skip to content

Commit 6f8206c

Browse files
authored
Merge pull request #50 from IBM/fix_na_remover
Fix na remover
2 parents 9aa4820 + 416817d commit 6f8206c

File tree

2 files changed

+7
-8
lines changed

2 files changed

+7
-8
lines changed

src/naremover.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ export NARemover
2121
)
2222
2323
Removes columns with NAs greater than acceptance rate.
24-
Remove remaining NAs by rows and return the Dataframe.
24+
This assumes that it processes columns of features.
25+
The output column should not be part of input to avoid
26+
it being excluded if it fails the acceptance critera.
2527
2628
Implements `fit!` and `transform!`.
2729
"""
@@ -55,6 +57,7 @@ function fit!(nad::NARemover, features::DataFrame, labels::Vector=[])
5557
nad.model = nad.args
5658
end
5759

60+
5861
function transform!(nad::NARemover, nfeatures::DataFrame)
5962
features = deepcopy(nfeatures)
6063
if features == DataFrame()
@@ -69,11 +72,7 @@ function transform!(nad::NARemover, nfeatures::DataFrame)
6972
end
7073
end
7174
xtr = features[:,colnames]
72-
if xtr != DataFrame()
73-
return xtr[completecases(xtr),:]
74-
else
75-
return DataFrame()
76-
end
75+
return xtr
7776
end
7877

7978
end

test/test_naremover.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ function test_naremover()
1010
Random.seed!(123)
1111
df = DataFrame(a=rand([1:3...,missing],100),b=rand([1:9...,missing],100),c=rand([1:20...,missing],100))
1212
nara = NARemover(0.25)
13-
@test fit_transform!(nara,df) |> Matrix |> sum == 1425
13+
@test fit_transform!(nara,df) |> Matrix |> x->sum(skipmissing(x)) == 1546
1414
narb = NARemover(0.05)
15-
@test fit_transform!(narb,df) |> Matrix |>sum == 1086
15+
@test fit_transform!(narb,df) |> Matrix |> x->sum(skipmissing(x)) == 1086
1616
narc = NARemover(0.01)
1717
@test fit_transform!(narc,df) == DataFrame()
1818
end

0 commit comments

Comments
 (0)