@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.ExplainCommand
40
40
import org .apache .spark .sql .sources .DescribeCommand
41
41
import org .apache .spark .sql .hive .execution .{HiveNativeCommand , DropTable , AnalyzeTable , HiveScriptIOSchema }
42
42
import org .apache .spark .sql .types ._
43
+ import org .apache .spark .util .random .RandomSampler
43
44
44
45
/* Implicit conversions */
45
46
import scala .collection .JavaConversions ._
@@ -850,7 +851,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
850
851
case Token (" TOK_TABLESPLITSAMPLE" ,
851
852
Token (" TOK_PERCENT" , Nil ) ::
852
853
Token (fraction, Nil ) :: Nil ) =>
853
- Sample (fraction.toDouble, withReplacement = false , (math.random * 1000 ).toInt, relation)
854
+ // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
855
+ // function takes X PERCENT as the input and the range of X is [0, 100], we need to
856
+ // adjust the fraction.
857
+ require(
858
+ fraction.toDouble >= (0.0 - RandomSampler .roundingEpsilon)
859
+ && fraction.toDouble <= (100.0 + RandomSampler .roundingEpsilon),
860
+ s " Sampling fraction ( $fraction) must be on interval [0, 100] " )
861
+ Sample (fraction.toDouble / 100 , withReplacement = false , (math.random * 1000 ).toInt,
862
+ relation)
854
863
case Token (" TOK_TABLEBUCKETSAMPLE" ,
855
864
Token (numerator, Nil ) ::
856
865
Token (denominator, Nil ) :: Nil ) =>
0 commit comments