Skip to content

Commit 2453ebe

Browse files
committed
check and adjust the fraction.
1 parent d641fbb commit 2453ebe

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.ExplainCommand
4040
import org.apache.spark.sql.sources.DescribeCommand
4141
import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DropTable, AnalyzeTable, HiveScriptIOSchema}
4242
import org.apache.spark.sql.types._
43+
import org.apache.spark.util.random.RandomSampler
4344

4445
/* Implicit conversions */
4546
import scala.collection.JavaConversions._
@@ -850,7 +851,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
850851
case Token("TOK_TABLESPLITSAMPLE",
851852
Token("TOK_PERCENT", Nil) ::
852853
Token(fraction, Nil) :: Nil) =>
853-
Sample(fraction.toDouble, withReplacement = false, (math.random * 1000).toInt, relation)
854+
// The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
855+
// function takes X PERCENT as the input and the range of X is [0, 100], we need to
856+
// adjust the fraction.
857+
require(
858+
fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
859+
&& fraction.toDouble <= (100.0 + RandomSampler.roundingEpsilon),
860+
s"Sampling fraction ($fraction) must be on interval [0, 100]")
861+
Sample(fraction.toDouble / 100, withReplacement = false, (math.random * 1000).toInt,
862+
relation)
854863
case Token("TOK_TABLEBUCKETSAMPLE",
855864
Token(numerator, Nil) ::
856865
Token(denominator, Nil) :: Nil) =>

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
467467

468468
test("sampling") {
469469
sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
470+
sql("SELECT * FROM src TABLESAMPLE(100 PERCENT) s")
470471
}
471472

472473
test("DataFrame toString") {

0 commit comments

Comments
 (0)