Skip to content

Issue 49: added toRedisHASHes function (#49) #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/rdd.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@ sc.toRedisHASH(hashRDD, hashName, ttl)

By default, Hashes won't have any expiry set.

Use the following to store an RDD into multiple hashs:

```scala
sc.toRedisHASHes(hashRDD, ttl)
```

The `hashRDD` is a rdd of tuples (`hashname`, `map[field name, field value]`)

#### Lists
Use the following to store an RDD in a Redis List:

Expand Down
41 changes: 41 additions & 0 deletions src/main/scala/com/redislabs/provider/redis/redisFunctions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import com.redislabs.provider.redis.util.ConnectionUtils.withConnection
import com.redislabs.provider.redis.util.PipelineUtils._
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import scala.collection.JavaConversions.mapAsJavaMap

/**
* RedisContext extends sparkContext's functionality with redis functions
Expand Down Expand Up @@ -264,6 +265,19 @@ class RedisContext(@transient val sc: SparkContext) extends Serializable {
kvs.foreachPartition(partition => setHash(hashName, partition, ttl, redisConfig, readWriteConfig))
}

/**
* Write RDD of (hash name, hash KVs)
*
* @param kvs RDD of tuples (hash name, Map(hash field name, hash field value))
* @param ttl time to live
*/
def toRedisHASHes(kvs: RDD[(String, Map[String, String])], ttl: Int = 0)
(implicit
redisConfig: RedisConfig = RedisConfig.fromSparkConf(sc.getConf),
readWriteConfig: ReadWriteConfig = ReadWriteConfig.fromSparkConf(sc.getConf)) {
kvs.foreachPartition(partition => setHash(partition, ttl, redisConfig, readWriteConfig))
}

/**
* @param kvs Pair RDD of K/V
* @param zsetName target zset's name which hold all the kvs
Expand Down Expand Up @@ -401,6 +415,33 @@ object RedisContext extends Serializable {
conn.close()
}

/**
* @param hashes hashName: map of k/vs to be saved in the target host
* @param ttl time to live
*/
def setHash(hashes: Iterator[(String, Map[String,String])],
ttl: Int,
redisConfig: RedisConfig,
readWriteConfig: ReadWriteConfig) {
implicit val rwConf: ReadWriteConfig = readWriteConfig

hashes
.map { case (key, hashFields) =>
(redisConfig.getHost(key), (key, hashFields))
}
.toArray
.groupBy(_._1)
.foreach { case (node, arr) =>
withConnection(node.endpoint.connect()) { conn =>
foreachWithPipeline(conn, arr) { (pipeline, a) =>
val (key, hashFields) = a._2
pipeline.hmset(key, hashFields)
if (ttl > 0) pipeline.expire(key, ttl)
}
}
}
}

/**
* @param zsetName
* @param arr k/vs which should be saved in the target host
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,30 @@ trait RedisRddExtraSuite extends SparkRedisSuite with Keys with Matchers {
verifyList("list2", list2)
}

test("toRedisHASHes") {
val map1 = Map("k1" -> "v1", "k2" -> "v2")
val map2 = Map("k3" -> "v3", "k4" -> "v4")
val hashes = Seq(
("hash1", map1),
("hash2", map2)
)
val rdd = sc.parallelize(hashes)
sc.toRedisHASHes(rdd)

verifyHash("hash1", map1)
verifyHash("hash2", map2)
}

def verifyList(list: String, vals: Seq[String]): Unit = {
withConnection(redisConfig.getHost(list).endpoint.connect()) { conn =>
conn.lrange(list, 0, vals.size).asScala should be(vals.toList)
}
}

def verifyHash(hash: String, vals: Map[String, String]): Unit = {
withConnection(redisConfig.getHost(hash).endpoint.connect()) { conn =>
conn.hgetAll(hash).asScala should be(vals)
}
}

}