From e90dd2cb94fff274e4a595e40e747306253e1be0 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Tue, 22 Dec 2020 14:47:37 +0530 Subject: [PATCH 1/4] [mongoDB] update API docs --- .../experimental/mongodb_dataset_ops.py | 51 ++++++++++++++++++- .../python/experimental/mongodb_writer_ops.py | 49 +++++++++++++++++- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py index 7f0b963f3..82547dd56 100644 --- a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py @@ -35,7 +35,9 @@ def get_healthy_resource(self): """Retrieve the resource which is connected to a healthy node""" resource = core_ops.io_mongo_db_readable_init( - uri=self.uri, database=self.database, collection=self.collection, + uri=self.uri, + database=self.database, + collection=self.collection, ) print("Connection successful: {}".format(self.uri)) return resource @@ -54,9 +56,54 @@ def get_next_batch(self, resource): class MongoDBIODataset(tf.data.Dataset): - """Fetch records from mongoDB""" + """Fetch records from mongoDB + + The dataset aids in faster retrieval of data from MongoDB collections. + + To make a connection and read the documents from the mongo collections, + the `tfio.experimental.mongodb.MongoDBWriter` API can be used. + + Example: + + >>> URI = "mongodb://mongoadmin:default_password@localhost:27017" + >>> DATABASE = "tfiodb" + >>> COLLECTION = "test" + >>> dataset = tfio.experimental.mongodb.MongoDBIODataset( + uri=URI, database=DATABASE, collection=COLLECTION) + + Perform operations on the dataset as one would with any `tf.data.Dataset` + >>> dataset = dataset.map(transform_func) + >>> dataset = dataset.batch(batch_size) + + Assuming the user has already built a `tf.keras` model, the dataset can be directly + passed for training purposes. + + >>> model.fit(dataset) # to train + >>> model.predict(dataset) # to infer + + """ def __init__(self, uri, database, collection): + """Initialize the dataset with the following parameters + + Args: + uri: The uri of the mongo server or replicaset to connect to. + - To connect to a MongoDB server with username and password + based authentication, the following uri pattern can be used. + Example: `"mongodb://mongoadmin:default_password@localhost:27017"`. + + - Connecting to a replica set is much like connecting to a + standalone MongoDB server. Simply specify the replica set name + using the `?replicaSet=myreplset` URI option. + Example: "mongodb://host01:27017,host02:27017,host03:27017/?replicaSet=myreplset" + + Additional information on writing uri's can be found here: + - [libmongoc uri docs](http://mongoc.org/libmongoc/current/mongoc_uri_t.html) + - [mongodb uri docs](https://docs.mongodb.com/manual/reference/connection-string/) + database: The database in the standalone standalone MongoDB server or a replica set + to connect to. + collection: The collection from which the documents have to be retrieved. + """ handler = _MongoDBHandler(uri=uri, database=database, collection=collection) resource = handler.get_healthy_resource() dataset = tf.data.experimental.Counter() diff --git a/tensorflow_io/core/python/experimental/mongodb_writer_ops.py b/tensorflow_io/core/python/experimental/mongodb_writer_ops.py index 3af8a14bf..b6fc6d0fb 100644 --- a/tensorflow_io/core/python/experimental/mongodb_writer_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_writer_ops.py @@ -22,15 +22,60 @@ class MongoDBWriter: - """Write documents to mongoDB""" + """Write documents to mongoDB. + + The writer can be used to store documents in mongoDB while dealing with tensorflow + based models and inference outputs. Without loss of generality, consider an ML + model that is being used for inference. The outputs of inference can be modelled into + a structured record by enriching the schema with additional information( for ex: metadata + about input data and the semantics of the inference etc.) and can be stored in mongo + collections for persistence or future analysis. + + To make a connection and write the documents to the mongo collections, + the `tfio.experimental.mongodb.MongoDBWriter` API can be used. + + Example: + + >>> URI = "mongodb://mongoadmin:default_password@localhost:27017" + >>> DATABASE = "tfiodb" + >>> COLLECTION = "test" + >>> writer = tfio.experimental.mongodb.MongoDBWriter( + uri=URI, database=DATABASE, collection=COLLECTION + ) + >>> for i in range(1000): + ... data = {"key{}".format(i): "value{}".format(i)} + ... writer.write(data) + + """ def __init__(self, uri, database, collection): + """Initialize the dataset with the following parameters + + Args: + uri: The uri of the mongo server or replicaset to connect to. + - To connect to a MongoDB server with username and password + based authentication, the following uri pattern can be used. + Example: `"mongodb://mongoadmin:default_password@localhost:27017"`. + + - Connecting to a replica set is much like connecting to a + standalone MongoDB server. Simply specify the replica set name + using the `?replicaSet=myreplset` URI option. + Example: "mongodb://host01:27017,host02:27017,host03:27017/?replicaSet=myreplset" + Additional information on writing uri's can be found here: + - [libmongoc uri docs](http://mongoc.org/libmongoc/current/mongoc_uri_t.html) + - [mongodb uri docs](https://docs.mongodb.com/manual/reference/connection-string/) + database: The database in the standalone standalone MongoDB server or a replica set + to connect to. + collection: The collection from which the documents have to be retrieved. + """ self.uri = uri self.database = database self.collection = collection self.resource = core_ops.io_mongo_db_writable_init( - uri=self.uri, database=self.database, collection=self.collection, + uri=self.uri, + database=self.database, + collection=self.collection, ) def write(self, doc): From 992ccd92993fd8078a1b52d6d27c9b6aaee67a69 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Tue, 22 Dec 2020 14:50:34 +0530 Subject: [PATCH 2/4] lint fixes --- tensorflow_io/core/python/experimental/mongodb_dataset_ops.py | 4 +--- tensorflow_io/core/python/experimental/mongodb_writer_ops.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py index 82547dd56..941d0147a 100644 --- a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py @@ -35,9 +35,7 @@ def get_healthy_resource(self): """Retrieve the resource which is connected to a healthy node""" resource = core_ops.io_mongo_db_readable_init( - uri=self.uri, - database=self.database, - collection=self.collection, + uri=self.uri, database=self.database, collection=self.collection, ) print("Connection successful: {}".format(self.uri)) return resource diff --git a/tensorflow_io/core/python/experimental/mongodb_writer_ops.py b/tensorflow_io/core/python/experimental/mongodb_writer_ops.py index b6fc6d0fb..cb1fff29e 100644 --- a/tensorflow_io/core/python/experimental/mongodb_writer_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_writer_ops.py @@ -73,9 +73,7 @@ def __init__(self, uri, database, collection): self.database = database self.collection = collection self.resource = core_ops.io_mongo_db_writable_init( - uri=self.uri, - database=self.database, - collection=self.collection, + uri=self.uri, database=self.database, collection=self.collection, ) def write(self, doc): From e849b57eca19f84048d5a86cf526255097cf3926 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Tue, 22 Dec 2020 14:52:28 +0530 Subject: [PATCH 3/4] rename wrong API --- .../core/python/experimental/mongodb_dataset_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py index 941d0147a..dbf398702 100644 --- a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py @@ -35,7 +35,9 @@ def get_healthy_resource(self): """Retrieve the resource which is connected to a healthy node""" resource = core_ops.io_mongo_db_readable_init( - uri=self.uri, database=self.database, collection=self.collection, + uri=self.uri, + database=self.database, + collection=self.collection, ) print("Connection successful: {}".format(self.uri)) return resource @@ -59,7 +61,7 @@ class MongoDBIODataset(tf.data.Dataset): The dataset aids in faster retrieval of data from MongoDB collections. To make a connection and read the documents from the mongo collections, - the `tfio.experimental.mongodb.MongoDBWriter` API can be used. + the `tfio.experimental.mongodb.MongoDBIODataset` API can be used. Example: From e7b784b48bb13752ade95d34dcbcb8a45c75c535 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Tue, 22 Dec 2020 14:55:20 +0530 Subject: [PATCH 4/4] lint fixes --- tensorflow_io/core/python/experimental/mongodb_dataset_ops.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py index dbf398702..7331db65f 100644 --- a/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py +++ b/tensorflow_io/core/python/experimental/mongodb_dataset_ops.py @@ -35,9 +35,7 @@ def get_healthy_resource(self): """Retrieve the resource which is connected to a healthy node""" resource = core_ops.io_mongo_db_readable_init( - uri=self.uri, - database=self.database, - collection=self.collection, + uri=self.uri, database=self.database, collection=self.collection, ) print("Connection successful: {}".format(self.uri)) return resource