Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions impyute/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@

### Top Level Modules

__all__ = ["datasets", "utils", "deletions", "filters"]
__all__ = ["dataset", "util", "deletion", "filter"]

### Cross Sectional Imputations

from impyute.imputations.cs import mean_imputation as mean
from impyute.imputations.cs import median_imputation as median
from impyute.imputations.cs import mode_imputation as mode
from impyute.imputations.cs import em
from impyute.imputations.cs import fast_knn
from impyute.imputations.cs import mice
from impyute.imputations.cs import random_imputation as random
from impyute.imputation.cs import mean
from impyute.imputation.cs import median
from impyute.imputation.cs import mode
from impyute.imputation.cs import em
from impyute.imputation.cs import fast_knn
from impyute.imputation.cs import mice
from impyute.imputation.cs import random

__all__.extend([
"mean",
Expand All @@ -37,16 +37,16 @@

### Time Series Imputations

from impyute.imputations.ts import locf
from impyute.imputations.ts import arima
from impyute.imputation.ts import locf
from impyute.imputation.ts import arima

__all__.extend([
"locf",
"arima"
])

### Deletions
from impyute.deletions import complete_case
from impyute.deletion import complete_case

__all__.extend([
"complete_case"
Expand Down
6 changes: 3 additions & 3 deletions impyute/datasets/__init__.py → impyute/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
Real-world/mock datasets and missingness corruptors to experiment with.
"""
from .base import random_uniform
from .base import random_normal
from .base import randu
from .base import randn
from .base import test_data
from .base import mnist

__all__ = ["random_uniform", "random_normal", "test_data", "mnist"]
__all__ = ["randu", "randn", "test_data", "mnist"]
8 changes: 4 additions & 4 deletions impyute/datasets/base.py → impyute/dataset/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
""" impyute.datasets.base
""" impyute.dataset.base

Load/generate data

"""
import numpy as np
from impyute.datasets.corrupt import Corruptor
from impyute.dataset.corrupt import Corruptor


def random_uniform(bound=(0, 10), shape=(5, 5), missingness="mcar",
def randu(bound=(0, 10), shape=(5, 5), missingness="mcar",
thr=0.2, dtype="int"):
""" Return randomly generated dataset of numbers with uniformly
distributed values between bound[0] and bound[1]
Expand Down Expand Up @@ -40,7 +40,7 @@ def random_uniform(bound=(0, 10), shape=(5, 5), missingness="mcar",
return raw_data


def random_normal(theta=(0, 1), shape=(5, 5), missingness="mcar", thr=0.2,
def randn(theta=(0, 1), shape=(5, 5), missingness="mcar", thr=0.2,
dtype="float"):
""" Return randomly generated dataset of numbers with normally
distributed values with given and sigma.
Expand Down
2 changes: 1 addition & 1 deletion impyute/datasets/corrupt.py → impyute/dataset/corrupt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" impyute.datasets.corrupt """
""" impyute.dataset.corrupt """
import numpy as np


Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" impyute.deletions.complete_case """
""" impyute.deletion.complete_case """
import numpy as np

def complete_case(data):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 14 additions & 0 deletions impyute/imputation/cs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Imputations for cross-sectional data.
"""

from .random import random
from .central_tendency import mean
from .central_tendency import mode
from .central_tendency import median
from .mice import mice
from .em import em
from .fast_knn import fast_knn

__all__ = ["random", "mean", "mode",
"median", "mice", "em", "fast_knn"]
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
""" impyute.imputations.cs.averaging_imputations """
""" impyute.imputation.cs.central_tendency """
import numpy as np
from impyute.utils import find_null
from impyute.utils import checks
from impyute.utils import preprocess
from impyute.util import find_null
from impyute.util import checks
from impyute.util import preprocess
# pylint:disable=unused-argument
# pylint:disable=invalid-name

@preprocess
@checks
def mean_imputation(data, **kwargs):
def mean(data, **kwargs):
""" Substitute missing values with the mean of that column.

Parameters
Expand All @@ -31,7 +31,7 @@ def mean_imputation(data, **kwargs):

@preprocess
@checks
def median_imputation(data, **kwargs):
def median(data, **kwargs):
""" Substitute missing values with the median of that column(middle).

Parameters
Expand All @@ -58,7 +58,7 @@ def median_imputation(data, **kwargs):

@preprocess
@checks
def mode_imputation(data, **kwargs):
def mode(data, **kwargs):
""" Substitute missing values with the mode of that column(most frequent).

In the case that there is a tie (there are multiple, most frequent values)
Expand Down
13 changes: 6 additions & 7 deletions impyute/imputations/cs/em.py → impyute/imputation/cs/em.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
""" impyute.imputations.cs.em"""
import random
""" impyute.imputation.cs.em"""
import numpy as np
from impyute.utils import find_null
from impyute.utils import preprocess
from impyute.utils import checks
from impyute.util import find_null
from impyute.util import preprocess
from impyute.util import checks
# pylint:disable=invalid-name
# pylint:disable=unused-argument

Expand Down Expand Up @@ -36,14 +35,14 @@ def em(data, loops=50, **kwargs):
col = data[:, int(y_i)]
mu = col[~np.isnan(col)].mean()
std = col[~np.isnan(col)].std()
col[x_i] = random.gauss(mu, std)
col[x_i] = np.random.normal(loc=mu, scale=std)
previous, i = 1, 1
for i in range(loops):
# Expectation
mu = col[~np.isnan(col)].mean()
std = col[~np.isnan(col)].std()
# Maximization
col[x_i] = random.gauss(mu, std)
col[x_i] = np.random.normal(loc=mu, scale=std)
# Break out of loop if likelihood doesn't change at least 10%
# and has run at least 5 times
delta = (col[x_i]-previous)/previous
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
""" impyute.imputations.cs.knn"""
""" impyute.imputation.cs.knn """
import numpy as np
from impyute.utils import find_null
from impyute.utils import checks
from impyute.utils import preprocess
from impyute.imputations.cs import mean_imputation
from impyute.util import find_null
from impyute.util import checks
from impyute.util import preprocess
from impyute.imputation.cs import mean
from scipy.spatial import KDTree
# pylint: disable=invalid-name
# pylint:disable=unused-argument
Expand Down Expand Up @@ -32,7 +32,7 @@ def fast_knn(data, k=3, **kwargs):

"""
null_xy = find_null(data)
data_c = mean_imputation(data)
data_c = mean(data)
kdtree = KDTree(data_c)

for x_i, y_i in null_xy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""imputations.cs.mice"""
""" impyute.imputation.cs.mice """
import numpy as np
from sklearn.linear_model import LinearRegression
from impyute.utils import find_null
from impyute.utils import checks
from impyute.utils import preprocess
from impyute.util import find_null
from impyute.util import checks
from impyute.util import preprocess
# pylint: disable=too-many-locals
# pylint:disable=invalid-name
# pylint:disable=unused-argument
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
""" impyute.imputations.cs.random_imputation"""
""" impyute.imputation.cs.random """
import numpy as np
from impyute.utils import find_null
from impyute.utils import preprocess
from impyute.utils import checks
from impyute.util import find_null
from impyute.util import preprocess
from impyute.util import checks
# pylint:disable=invalid-name
# pylint:disable=unused-argument

@preprocess
@checks
def random_imputation(data, **kwargs):
def random(data, **kwargs):
""" Fill missing values in with a randomly selected value from the same
column.

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Autoregressive Integrated Moving Average Imputation"""
import numpy as np
from impyute.utils import find_null
from impyute.utils import checks
""" impyute.imputation.ts.arima """
from impyute.util import find_null
from impyute.util import checks
# pylint: disable=invalid-name

@checks
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" impyute.imputations.ts.locf """
""" impyute.imputation.ts.locf """
import numpy as np
from impyute.utils import find_null
from impyute.utils import checks
from impyute.util import find_null
from impyute.util import checks


@checks
Expand Down
14 changes: 0 additions & 14 deletions impyute/imputations/cs/__init__.py

This file was deleted.

3 changes: 1 addition & 2 deletions impyute/utils/__init__.py → impyute/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

from .find_null import find_null
from .loggers import print_io
from .describe import describe
# from .mcar_test import mcar_test
from .count_missing import count_missing
Expand All @@ -12,5 +11,5 @@
from .compare import compare
from .preprocess import preprocess

__all__ = ["find_null", "print_io", "describe", "count_missing",
__all__ = ["find_null", "describe", "count_missing",
"checks", "compare", "BadInputError", "preprocess"]
6 changes: 3 additions & 3 deletions impyute/utils/checks.py → impyute/util/checks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
""" impyute.utils.check """
""" impyute.util.check """
from functools import wraps
import numpy as np
from impyute.utils import find_null
from impyute.utils import BadInputError
from impyute.util import find_null
from impyute.util import BadInputError
# pylint:disable=invalid-name

def checks(fn):
Expand Down
2 changes: 1 addition & 1 deletion impyute/utils/compare.py → impyute/util/compare.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""impyute.utils.compare.py"""
"""impyute.util.compare.py"""
import importlib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" impyute.utils.count_missing.py """
""" impyute.util.count_missing.py """
import numpy as np
from impyute.utils import find_null
from impyute.util import find_null


def count_missing(data):
Expand Down
4 changes: 2 additions & 2 deletions impyute/utils/describe.py → impyute/util/describe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" impyute.utils.describe """
from impyute.utils import find_null
""" impyute.util.describe """
from impyute.util import find_null


def describe(data): # verbose=True):
Expand Down
2 changes: 1 addition & 1 deletion impyute/utils/errors.py → impyute/util/errors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" impyute.utils.errors """
""" impyute.util.errors """
class BadInputError(Exception):
def __init__(self, value):
self.value = value
Expand Down
2 changes: 1 addition & 1 deletion impyute/utils/find_null.py → impyute/util/find_null.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" impyute.utils.find_null """
""" impyute.util.find_null """
import numpy as np


Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion impyute/utils/preprocess.py → impyute/util/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" impyute.utils.preprocess """
""" impyute.util.preprocess """
from functools import wraps
# pylint:disable=invalid-name

Expand Down
26 changes: 0 additions & 26 deletions impyute/utils/loggers.py

This file was deleted.

File renamed without changes.
4 changes: 2 additions & 2 deletions test/datasets/test_mnist.py → test/dataset/test_mnist.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""test_mnist.py"""
import unittest
import numpy as np
from impyute.datasets import mnist
from impyute.utils import find_null
from impyute.dataset import mnist
from impyute.util import find_null


@unittest.skip("takes a long time, adds 30 sec for each test")
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""test_complete_case.py"""
import unittest
import numpy as np
from impyute.datasets import test_data
from impyute.deletions import complete_case
from impyute.utils import checks
from impyute.dataset import test_data
from impyute.deletion import complete_case
from impyute.util import checks

@checks
class TestCC(unittest.TestCase):
Expand Down
File renamed without changes.
File renamed without changes.
Loading