diff --git a/.gitignore b/.gitignore index b72dc7014..3c6151425 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,9 @@ __pycache__ *.pbxproj *.xcworkspacedata .ipynb_checkpoints + +# Auto-generated files by `R CMD check` +tfio.Rcheck/ +tfio_*.tar.gz +.Rproj.user + diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index fc28d4b80..31becb436 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -1,3 +1,4 @@ ^.*\.Rproj$ ^\.Rproj\.user$ ^man-roxygen/ +scripts diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d6a78cb3b..e80872668 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -24,6 +24,7 @@ Imports: reticulate (>= 1.10), tensorflow (>= 1.9), tfdatasets (>= 1.9), + forge, magrittr, rlang, tidyselect, @@ -32,7 +33,5 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.0 Suggests: testthat, - knitr, - tfestimators, - keras + knitr VignetteBuilder: knitr diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index af3467069..256a822c8 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -1,9 +1,36 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(dataset_batch) +export(dataset_cache) +export(dataset_concatenate) +export(dataset_filter) +export(dataset_flat_map) +export(dataset_interleave) +export(dataset_map) +export(dataset_map_and_batch) +export(dataset_padded_batch) +export(dataset_prefetch) +export(dataset_prefetch_to_device) +export(dataset_prepare) +export(dataset_repeat) +export(dataset_shard) +export(dataset_shuffle) +export(dataset_shuffle_and_repeat) +export(dataset_skip) +export(dataset_take) +export(ignite_dataset) export(install_tensorflow) +export(kafka_dataset) +export(kinesis_dataset) +export(next_batch) +export(sequence_file_dataset) export(tf) export(tf_config) +export(tf_version) +export(until_out_of_range) +export(with_dataset) +import(forge) import(rlang) import(tfdatasets) import(tidyselect) @@ -16,3 +43,25 @@ importFrom(reticulate,tuple) importFrom(tensorflow,install_tensorflow) importFrom(tensorflow,tf) importFrom(tensorflow,tf_config) +importFrom(tensorflow,tf_version) +importFrom(tfdatasets,dataset_batch) +importFrom(tfdatasets,dataset_cache) +importFrom(tfdatasets,dataset_concatenate) +importFrom(tfdatasets,dataset_filter) +importFrom(tfdatasets,dataset_flat_map) +importFrom(tfdatasets,dataset_interleave) +importFrom(tfdatasets,dataset_map) +importFrom(tfdatasets,dataset_map_and_batch) +importFrom(tfdatasets,dataset_padded_batch) +importFrom(tfdatasets,dataset_prefetch) +importFrom(tfdatasets,dataset_prefetch_to_device) +importFrom(tfdatasets,dataset_prepare) +importFrom(tfdatasets,dataset_repeat) +importFrom(tfdatasets,dataset_shard) +importFrom(tfdatasets,dataset_shuffle) +importFrom(tfdatasets,dataset_shuffle_and_repeat) +importFrom(tfdatasets,dataset_skip) +importFrom(tfdatasets,dataset_take) +importFrom(tfdatasets,next_batch) +importFrom(tfdatasets,until_out_of_range) +importFrom(tfdatasets,with_dataset) diff --git a/R-package/R/dataset_utils.R b/R-package/R/dataset_utils.R index c8418a83e..37bbef49c 100644 --- a/R-package/R/dataset_utils.R +++ b/R-package/R/dataset_utils.R @@ -1,15 +1,15 @@ as_tf_dataset <- function (dataset) { - if (!is_dataset(dataset)) + if (!is_dataset(dataset)) stop("Provided dataset is not a TensorFlow Dataset") - if (!inherits(dataset, "tf_dataset")) + if (!inherits(dataset, "tf_dataset")) class(dataset) <- c("tf_dataset", class(dataset)) dataset } is_dataset <- function (x) { - inherits(x, "tensorflow.python.data.ops.dataset_ops.Dataset") || is_tfio_dataset(X) + inherits(x, "tensorflow.python.data.ops.dataset_ops.Dataset") || is_tfio_dataset(x) } is_tfio_dataset <- function(x) { - "tensorflow_io" %in% class(x) + grepl("tensorflow_io", class(x)) } diff --git a/R-package/R/hadoop_dataset.R b/R-package/R/hadoop_dataset.R new file mode 100644 index 000000000..8386fbdbc --- /dev/null +++ b/R-package/R/hadoop_dataset.R @@ -0,0 +1,14 @@ +#' Create a `SequenceFileDataset`. +#' +#' This function allows a user to read data from a hadoop sequence +#' file. A sequence file consists of (key value) pairs sequentially. At +#' the moment, `org.apache.hadoop.io.Text` is the only serialization type +#' being supported, and there is no compression support. +#' +#' @param filenames A `tf.string` tensor containing one or more filenames. +#' +#' @export +sequence_file_dataset <- function(filenames) { + dataset <- tfio_lib$hadoop$SequenceFileDataset(filenames = filenames) + as_tf_dataset(dataset) +} diff --git a/R-package/R/ignite_dataset.R b/R-package/R/ignite_dataset.R new file mode 100644 index 000000000..3288c6fe8 --- /dev/null +++ b/R-package/R/ignite_dataset.R @@ -0,0 +1,56 @@ +#' Create a `IgniteDataset`. +#' +#' Apache Ignite is a memory-centric distributed database, caching, and +#' processing platform for transactional, analytical, and streaming workloads, +#' delivering in-memory speeds at petabyte scale. This contrib package +#' contains an integration between Apache Ignite and TensorFlow. The +#' integration is based on tf.data from TensorFlow side and Binary Client +#' Protocol from Apache Ignite side. It allows to use Apache Ignite as a +#' datasource for neural network training, inference and all other +#' computations supported by TensorFlow. Ignite Dataset is based on Apache +#' Ignite Binary Client Protocol. +#' +#' @param cache_name Cache name to be used as datasource. +#' @param host Apache Ignite Thin Client host to be connected. +#' @param port Apache Ignite Thin Client port to be connected. +#' @param local Local flag that defines to query only local data. +#' @param part Number of partitions to be queried. +#' @param page_size Apache Ignite Thin Client page size. +#' @param username Apache Ignite Thin Client authentication username. +#' @param password Apache Ignite Thin Client authentication password. +#' @param certfile File in PEM format containing the certificate as well as any +#' number of CA certificates needed to establish the certificate's +#' authenticity. +#' @param keyfile File containing the private key (otherwise the private key +#' will be taken from certfile as well). +#' @param cert_password Password to be used if the private key is encrypted and +#' a password is necessary. +#' +#' @export +ignite_dataset <- function( + cache_name, + host = "localhost", + port = 10800, + local = FALSE, + part = -1, + page_size = 100, + username = NULL, + password = NULL, + certfile = NULL, + keyfile = NULL, + cert_password = NULL) { + dataset <- tfio_lib$ignite$IgniteDataset( + cache_name = cache_name, + host = host, + port = cast_scalar_integer(port), + local = cast_logical(local), + part = cast_scalar_integer(part), + page_size = cast_scalar_integer(page_size), + username = cast_nullable_string(username), + password = cast_nullable_string(password), + certfile = cast_nullable_string(certfile), + keyfile = cast_nullable_string(keyfile), + cert_password = cast_nullable_string(cert_password) + ) + as_tf_dataset(dataset) +} diff --git a/R-package/R/kafka_dataset.R b/R-package/R/kafka_dataset.R new file mode 100644 index 000000000..895a3f767 --- /dev/null +++ b/R-package/R/kafka_dataset.R @@ -0,0 +1,27 @@ +#' Creates a `KafkaDataset`. +#' +#' @param topics A `tf.string` tensor containing one or more subscriptions, in +#' the format of `[topic:partition:offset:length]`, by default length is -1 +#' for unlimited. +#' @param servers A list of bootstrap servers. +#' @param group The consumer group id. +#' @param eof If True, the kafka reader will stop on EOF. +#' @param timeout The timeout value for the Kafka Consumer to wait (in +#' millisecond). +#' +#' @export +kafka_dataset <- function( + topics, + servers = "localhost", + group = "", + eof = FALSE, + timeout = 1000) { + dataset <- tfio_lib$kafka$KafkaDataset( + topics = topics, + servers = servers, + group = group, + eof = cast_logical(eof), + timeout = cast_scalar_integer(timeout) + ) + as_tf_dataset(dataset) +} diff --git a/R-package/R/kinesis_dataset.R b/R-package/R/kinesis_dataset.R new file mode 100644 index 000000000..967bf585c --- /dev/null +++ b/R-package/R/kinesis_dataset.R @@ -0,0 +1,28 @@ +#' Creates a `KinesisDataset`. +#' +#' Kinesis is a managed service provided by AWS for data streaming. +#' This dataset reads messages from Kinesis with each message presented +#' as a `tf.string`. +#' +#' @param stream A `tf.string` tensor containing the name of the stream. +#' @param shard A `tf.string` tensor containing the id of the shard. +#' @param read_indefinitely If `True`, the Kinesis dataset will keep retry again +#' on `EOF` after the `interval` period. If `False`, then the dataset will +#' stop on `EOF`. The default value is `True`. +#' @param interval The interval for the Kinesis Client to wait before it tries +#' to get records again (in millisecond). +#' +#' @export +kinesis_dataset <- function( + stream, + shard = "", + read_indefinitely = TRUE, + interval = 100000) { + dataset <- tfio_lib$kinesis$KinesisDataset( + stream = stream, + shard = shard, + read_indefinitely = cast_logical(read_indefinitely), + interval = cast_scalar_integer(interval) + ) + as_tf_dataset(dataset) +} diff --git a/R-package/R/package.R b/R-package/R/package.R index 8f93111f2..69583bde4 100644 --- a/R-package/R/package.R +++ b/R-package/R/package.R @@ -12,6 +12,7 @@ NULL #' @import tidyselect #' @import rlang #' @import tfdatasets +#' @import forge NULL tfio_lib <- NULL @@ -35,8 +36,7 @@ tfio_lib <- NULL } ) - # TODO: This is commented out for now until we add the wrappers. - # tfio_lib <<- import("tensorflow_io", delay_load = delay_load) + tfio_lib <<- import("tensorflow_io", delay_load = delay_load) } @@ -63,25 +63,3 @@ check_tensorflow_version <- function(displayed_warning) { .onDetach <- function(libpath) { } - -# Reusable function for registering a set of methods with S3 manually. The -# methods argument is a list of character vectors, each of which has the form -# c(package, genname, class). -registerMethods <- function(methods) { - lapply(methods, function(method) { - pkg <- method[[1]] - generic <- method[[2]] - class <- method[[3]] - func <- get(paste(generic, class, sep = ".")) - if (pkg %in% loadedNamespaces()) { - registerS3method(generic, class, func, envir = asNamespace(pkg)) - } - setHook( - packageEvent(pkg, "onLoad"), - function(...) { - registerS3method(generic, class, func, envir = asNamespace(pkg)) - } - ) - }) -} - diff --git a/R-package/R/reexports.R b/R-package/R/reexports.R index acd6b6096..4e5c8ff6c 100644 --- a/R-package/R/reexports.R +++ b/R-package/R/reexports.R @@ -21,3 +21,101 @@ tensorflow::install_tensorflow #' @importFrom tensorflow tf_config #' @export tensorflow::tf_config + +#' @importFrom tensorflow tf_version +#' @export +tensorflow::tf_version + + +# Re-exports from tfdatasets dataset_iterators + +#' @importFrom tfdatasets next_batch +#' @export +tfdatasets::next_batch + +#' @importFrom tfdatasets with_dataset +#' @export +tfdatasets::with_dataset + +#' @importFrom tfdatasets until_out_of_range +#' @export +tfdatasets::until_out_of_range + + +# Re-exports from tfdatasets dataset_methods + +#' @importFrom tfdatasets dataset_repeat +#' @export +tfdatasets::dataset_repeat + +#' @importFrom tfdatasets dataset_shuffle +#' @export +tfdatasets::dataset_shuffle + +#' @importFrom tfdatasets dataset_shuffle_and_repeat +#' @export +tfdatasets::dataset_shuffle_and_repeat + +#' @importFrom tfdatasets dataset_batch +#' @export +tfdatasets::dataset_batch + +#' @importFrom tfdatasets dataset_cache +#' @export +tfdatasets::dataset_cache + +#' @importFrom tfdatasets dataset_concatenate +#' @export +tfdatasets::dataset_concatenate + +#' @importFrom tfdatasets dataset_take +#' @export +tfdatasets::dataset_take + +#' @importFrom tfdatasets dataset_map +#' @export +tfdatasets::dataset_map + +#' @importFrom tfdatasets dataset_map_and_batch +#' @export +tfdatasets::dataset_map_and_batch + +#' @importFrom tfdatasets dataset_flat_map +#' @export +tfdatasets::dataset_flat_map + +#' @importFrom tfdatasets dataset_prefetch +#' @export +tfdatasets::dataset_prefetch + +#' @importFrom tfdatasets dataset_prefetch_to_device +#' @export +tfdatasets::dataset_prefetch_to_device + +#' @importFrom tfdatasets dataset_filter +#' @export +tfdatasets::dataset_filter + +#' @importFrom tfdatasets dataset_skip +#' @export +tfdatasets::dataset_skip + +#' @importFrom tfdatasets dataset_interleave +#' @export +tfdatasets::dataset_interleave + +#' @importFrom tfdatasets dataset_prefetch +#' @export +tfdatasets::dataset_prefetch + +#' @importFrom tfdatasets dataset_shard +#' @export +tfdatasets::dataset_shard + +#' @importFrom tfdatasets dataset_padded_batch +#' @export +tfdatasets::dataset_padded_batch + +#' @importFrom tfdatasets dataset_prepare +#' @export +tfdatasets::dataset_prepare diff --git a/R-package/man/ignite_dataset.Rd b/R-package/man/ignite_dataset.Rd new file mode 100644 index 000000000..5b6f4234b --- /dev/null +++ b/R-package/man/ignite_dataset.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ignite_dataset.R +\name{ignite_dataset} +\alias{ignite_dataset} +\title{Create a \code{IgniteDataset}.} +\usage{ +ignite_dataset(cache_name, host = "localhost", port = 10800, + local = FALSE, part = -1, page_size = 100, username = NULL, + password = NULL, certfile = NULL, keyfile = NULL, + cert_password = NULL) +} +\arguments{ +\item{cache_name}{Cache name to be used as datasource.} + +\item{host}{Apache Ignite Thin Client host to be connected.} + +\item{port}{Apache Ignite Thin Client port to be connected.} + +\item{local}{Local flag that defines to query only local data.} + +\item{part}{Number of partitions to be queried.} + +\item{page_size}{Apache Ignite Thin Client page size.} + +\item{username}{Apache Ignite Thin Client authentication username.} + +\item{password}{Apache Ignite Thin Client authentication password.} + +\item{certfile}{File in PEM format containing the certificate as well as any +number of CA certificates needed to establish the certificate's +authenticity.} + +\item{keyfile}{File containing the private key (otherwise the private key +will be taken from certfile as well).} + +\item{cert_password}{Password to be used if the private key is encrypted and +a password is necessary.} +} +\description{ +Apache Ignite is a memory-centric distributed database, caching, and +processing platform for transactional, analytical, and streaming workloads, +delivering in-memory speeds at petabyte scale. This contrib package +contains an integration between Apache Ignite and TensorFlow. The +integration is based on tf.data from TensorFlow side and Binary Client +Protocol from Apache Ignite side. It allows to use Apache Ignite as a +datasource for neural network training, inference and all other +computations supported by TensorFlow. Ignite Dataset is based on Apache +Ignite Binary Client Protocol. +} diff --git a/R-package/man/kafka_dataset.Rd b/R-package/man/kafka_dataset.Rd new file mode 100644 index 000000000..8ff10110f --- /dev/null +++ b/R-package/man/kafka_dataset.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kafka_dataset.R +\name{kafka_dataset} +\alias{kafka_dataset} +\title{Creates a \code{KafkaDataset}.} +\usage{ +kafka_dataset(topics, servers = "localhost", group = "", eof = FALSE, + timeout = 1000) +} +\arguments{ +\item{topics}{A \code{tf.string} tensor containing one or more subscriptions, in +the format of \code{[topic:partition:offset:length]}, by default length is -1 +for unlimited.} + +\item{servers}{A list of bootstrap servers.} + +\item{group}{The consumer group id.} + +\item{eof}{If True, the kafka reader will stop on EOF.} + +\item{timeout}{The timeout value for the Kafka Consumer to wait (in +millisecond).} +} +\description{ +Creates a \code{KafkaDataset}. +} diff --git a/R-package/man/kinesis_dataset.Rd b/R-package/man/kinesis_dataset.Rd new file mode 100644 index 000000000..986e2c119 --- /dev/null +++ b/R-package/man/kinesis_dataset.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kinesis_dataset.R +\name{kinesis_dataset} +\alias{kinesis_dataset} +\title{Creates a \code{KinesisDataset}.} +\usage{ +kinesis_dataset(stream, shard = "", read_indefinitely = TRUE, + interval = 1e+05) +} +\arguments{ +\item{stream}{A \code{tf.string} tensor containing the name of the stream.} + +\item{shard}{A \code{tf.string} tensor containing the id of the shard.} + +\item{read_indefinitely}{If \code{True}, the Kinesis dataset will keep retry again +on \code{EOF} after the \code{interval} period. If \code{False}, then the dataset will +stop on \code{EOF}. The default value is \code{True}.} + +\item{interval}{The interval for the Kinesis Client to wait before it tries +to get records again (in millisecond).} +} +\description{ +Kinesis is a managed service provided by AWS for data streaming. +This dataset reads messages from Kinesis with each message presented +as a \code{tf.string}. +} diff --git a/R-package/man/reexports.Rd b/R-package/man/reexports.Rd index 8a1dd55e5..5f5f5725c 100644 --- a/R-package/man/reexports.Rd +++ b/R-package/man/reexports.Rd @@ -6,6 +6,28 @@ \alias{tf} \alias{install_tensorflow} \alias{tf_config} +\alias{tf_version} +\alias{next_batch} +\alias{with_dataset} +\alias{until_out_of_range} +\alias{dataset_repeat} +\alias{dataset_shuffle} +\alias{dataset_shuffle_and_repeat} +\alias{dataset_batch} +\alias{dataset_cache} +\alias{dataset_concatenate} +\alias{dataset_take} +\alias{dataset_map} +\alias{dataset_map_and_batch} +\alias{dataset_flat_map} +\alias{dataset_prefetch} +\alias{dataset_prefetch_to_device} +\alias{dataset_filter} +\alias{dataset_skip} +\alias{dataset_interleave} +\alias{dataset_shard} +\alias{dataset_padded_batch} +\alias{dataset_prepare} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -13,6 +35,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{tensorflow}{\code{\link[tensorflow]{tf}}, \code{\link[tensorflow]{install_tensorflow}}, \code{\link[tensorflow]{tf_config}}} + \item{tensorflow}{\code{\link[tensorflow]{tf}}, \code{\link[tensorflow]{install_tensorflow}}, \code{\link[tensorflow]{tf_config}}, \code{\link[tensorflow]{tf_version}}} + + \item{tfdatasets}{\code{\link[tfdatasets]{next_batch}}, \code{\link[tfdatasets]{with_dataset}}, \code{\link[tfdatasets]{until_out_of_range}}, \code{\link[tfdatasets]{dataset_repeat}}, \code{\link[tfdatasets]{dataset_shuffle}}, \code{\link[tfdatasets]{dataset_shuffle_and_repeat}}, \code{\link[tfdatasets]{dataset_batch}}, \code{\link[tfdatasets]{dataset_cache}}, \code{\link[tfdatasets]{dataset_concatenate}}, \code{\link[tfdatasets]{dataset_take}}, \code{\link[tfdatasets]{dataset_map}}, \code{\link[tfdatasets]{dataset_map_and_batch}}, \code{\link[tfdatasets]{dataset_flat_map}}, \code{\link[tfdatasets]{dataset_prefetch}}, \code{\link[tfdatasets]{dataset_prefetch_to_device}}, \code{\link[tfdatasets]{dataset_filter}}, \code{\link[tfdatasets]{dataset_skip}}, \code{\link[tfdatasets]{dataset_interleave}}, \code{\link[tfdatasets]{dataset_prefetch}}, \code{\link[tfdatasets]{dataset_shard}}, \code{\link[tfdatasets]{dataset_padded_batch}}, \code{\link[tfdatasets]{dataset_prepare}}} }} diff --git a/R-package/man/sequence_file_dataset.Rd b/R-package/man/sequence_file_dataset.Rd new file mode 100644 index 000000000..5da68ad3b --- /dev/null +++ b/R-package/man/sequence_file_dataset.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hadoop_dataset.R +\name{sequence_file_dataset} +\alias{sequence_file_dataset} +\title{Create a \code{SequenceFileDataset}.} +\usage{ +sequence_file_dataset(filenames) +} +\arguments{ +\item{filenames}{A \code{tf.string} tensor containing one or more filenames.} +} +\description{ +This function allows a user to read data from a hadoop sequence +file. A sequence file consists of (key value) pairs sequentially. At +the moment, \code{org.apache.hadoop.io.Text} is the only serialization type +being supported, and there is no compression support. +} diff --git a/R-package/scripts/Dockerfile b/R-package/scripts/Dockerfile new file mode 100644 index 000000000..f7f0385be --- /dev/null +++ b/R-package/scripts/Dockerfile @@ -0,0 +1,18 @@ +FROM r-base +COPY . . + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python-dev \ + python-setuptools \ + python-pip && \ + rm -rf /var/lib/apt/lists/* + +# Dependencies +RUN pip install tensorflow-io +RUN Rscript -e 'install.packages(c("Rcpp", "reticulate", "knitr", "tensorflow", "tfdatasets", "forge", "tidyselect"))' + +# tfio package installation +RUN R CMD build R-package/ +RUN R CMD INSTALL tfio_*.gz diff --git a/R-package/tests/testthat/test-datasets-ops.R b/R-package/tests/testthat/test-datasets-ops.R index f21796778..c3bdb0891 100644 --- a/R-package/tests/testthat/test-datasets-ops.R +++ b/R-package/tests/testthat/test-datasets-ops.R @@ -2,6 +2,7 @@ context("TensorFlow IO dataset ops") source("utils.R") -test_succeeds("All TensorFlow IO dataset ops work", { - print("Placeholder for now") +test_succeeds("sequence_file_dataset() works successfully", { + sequence_file_dataset("testdata/string.seq") %>% + dataset_repeat(2) }) diff --git a/R-package/tests/testthat/testdata/string.seq b/R-package/tests/testthat/testdata/string.seq new file mode 100755 index 000000000..b7175338a Binary files /dev/null and b/R-package/tests/testthat/testdata/string.seq differ diff --git a/README.md b/README.md index 1cb073033..d3c07fc05 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,22 @@ $ # In docker $ R -e "devtools::test()" ``` +Alternatively, we also provided a reference Dockerfile [here](R-package/scripts/Dockerfile) for you +so that you can use the R package directly for testing. You can build it via: +``` +docker build -t tfio-r-dev -f R-package/scripts/Dockerfile . +``` + +Inside the container, you can start your R session, instantiate a `SequenceFileDataset` +from an example [Hadoop SequenceFile](https://wiki.apache.org/hadoop/SequenceFile) +[string.seq](R-package/tests/testthat/testdata/string.seq), and then use any [transformation functions](https://tensorflow.rstudio.com/tools/tfdatasets/articles/introduction.html#transformations) provided by [tfdatasets package](https://tensorflow.rstudio.com/tools/tfdatasets/) on the dataset like the following: + +```{R} +library(tfio) +dataset <- sequence_file_dataset("R-package/tests/testthat/testdata/string.seq") %>% + dataset_repeat(2) +``` + ## License [Apache License 2.0](LICENSE)