From 9a1bd8d52621eaa5a724f142d2bd52018f23d837 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 12 Mar 2026 16:17:06 +0900 Subject: [PATCH 1/8] HIVE-29503: Use the fallback of half the number of rows when estimating the join product row count with an NDV of 0 --- .../annotation/StatsRulesProcFactory.java | 7 +- .../ndv_zero_join_selectivity.q | 24 +++ .../llap/ndv_zero_join_selectivity.q.out | 164 ++++++++++++++++++ 3 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q create mode 100644 ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index abfe6170217e..8cfe3a7e08f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -2030,7 +2030,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, String col = joinKeys.get(i).get(idx); ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { - perAttrDVs.add(cs.getCountDistint()); + long estimatedNdv = cs.getCountDistint(); + if (estimatedNdv == 0) { + // TOOD: explain here why + estimatedNdv = joinStats.get(i).getNumRows() / 2; + } + perAttrDVs.add(estimatedNdv); } } distinctVals.add(getDenominator(perAttrDVs)); diff --git a/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q b/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q new file mode 100644 index 000000000000..2b1ad6d34941 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q @@ -0,0 +1,24 @@ +-- Test: NDV=0 (unknown) causes incorrect join selectivity calculation +-- When column statistics have NDV=0, the join selectivity incorrectly becomes 1.0 +-- This leads to cross-product cardinality estimates: rows1 * rows2 +-- Bug location: HiveRelMdSelectivity.getMaxNDVFromProjections() + +CREATE TABLE ndv_zero_t1 (id BIGINT, data STRING); +CREATE TABLE ndv_zero_t2 (id BIGINT, value STRING); + +-- Set up large row counts but NDV=0 (unknown) for join columns +ALTER TABLE ndv_zero_t1 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000'); +ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0'); +ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50'); + +ALTER TABLE ndv_zero_t2 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000'); +ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0'); +ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN value SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50'); + +-- BUG: With NDV=0 on join columns, selectivity becomes 1.0 (cross product) +-- Expected cardinality should be reasonable (e.g., 100M if NDV=100M) +-- Actual cardinality will be 100M * 100M = 10 quadrillion (cross product) +EXPLAIN +SELECT t1.id, t2.value +FROM ndv_zero_t1 t1 +JOIN ndv_zero_t2 t2 ON t1.id = t2.id; diff --git a/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out new file mode 100644 index 000000000000..ae564cafc40e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: CREATE TABLE ndv_zero_t1 (id BIGINT, data STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ndv_zero_t1 +POSTHOOK: query: CREATE TABLE ndv_zero_t1 (id BIGINT, data STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ndv_zero_t1 +PREHOOK: query: CREATE TABLE ndv_zero_t2 (id BIGINT, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ndv_zero_t2 +POSTHOOK: query: CREATE TABLE ndv_zero_t2 (id BIGINT, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ndv_zero_t2 +PREHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t1 +PREHOOK: Output: default@ndv_zero_t1 +POSTHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t1 +POSTHOOK: Output: default@ndv_zero_t1 +PREHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t1 +PREHOOK: Output: default@ndv_zero_t1 +POSTHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t1 +POSTHOOK: Output: default@ndv_zero_t1 +PREHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t1 +PREHOOK: Output: default@ndv_zero_t1 +POSTHOOK: query: ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t1 +POSTHOOK: Output: default@ndv_zero_t1 +PREHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t2 +PREHOOK: Output: default@ndv_zero_t2 +POSTHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t2 +POSTHOOK: Output: default@ndv_zero_t2 +PREHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t2 +PREHOOK: Output: default@ndv_zero_t2 +POSTHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t2 +POSTHOOK: Output: default@ndv_zero_t2 +PREHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN value SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@ndv_zero_t2 +PREHOOK: Output: default@ndv_zero_t2 +POSTHOOK: query: ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN value SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@ndv_zero_t2 +POSTHOOK: Output: default@ndv_zero_t2 +PREHOOK: query: EXPLAIN +SELECT t1.id, t2.value +FROM ndv_zero_t1 t1 +JOIN ndv_zero_t2 t2 ON t1.id = t2.id +PREHOOK: type: QUERY +PREHOOK: Input: default@ndv_zero_t1 +PREHOOK: Input: default@ndv_zero_t2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT t1.id, t2.value +FROM ndv_zero_t1 t1 +JOIN ndv_zero_t2 t2 ON t1.id = t2.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ndv_zero_t1 +POSTHOOK: Input: default@ndv_zero_t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 800000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 800000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 100000000 Data size: 800000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 100000000 Data size: 800000000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 10200000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 100000000 Data size: 10200000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: bigint), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100000000 Data size: 10200000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 100000000 Data size: 10200000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + From cac79326bb472857936779ca162187323e6d2e99 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 13 Mar 2026 08:16:19 +0900 Subject: [PATCH 2/8] HIVE-29503: trying to trigger the fallback logic differently --- .../annotation/StatsRulesProcFactory.java | 29 ++++-- .../llap/bucket_map_join_tez3.q.out | 92 +++++++++---------- .../llap/ndv_zero_join_selectivity.q.out | 6 +- 3 files changed, 72 insertions(+), 55 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 8cfe3a7e08f9..a58d9450a914 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1964,6 +1964,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } + // NDV=0 means join key statistics are unavailable - fall back to joinFactor heuristic + if (allSatisfyPreCondition) { + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + List keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf().getOutputKeyColumnNames()); + if (!satisfyPrecondition(parent.getStatistics(), keyExprs)) { + allSatisfyPreCondition = false; + break; + } + } + } + if (allSatisfyPreCondition) { // statistics object that is combination of statistics from all @@ -2030,12 +2042,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, String col = joinKeys.get(i).get(idx); ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { - long estimatedNdv = cs.getCountDistint(); - if (estimatedNdv == 0) { - // TOOD: explain here why - estimatedNdv = joinStats.get(i).getNumRows() / 2; - } - perAttrDVs.add(estimatedNdv); + perAttrDVs.add(cs.getCountDistint()); } } distinctVals.add(getDenominator(perAttrDVs)); @@ -3181,6 +3188,16 @@ static boolean satisfyPrecondition(Statistics stats) { && !stats.getColumnStatsState().equals(Statistics.State.NONE); } + static boolean satisfyPrecondition(Statistics stats, List joinKeys) { + for (String col : joinKeys) { + ColStatistics cs = stats.getColumnStatisticsFromColName(col); + if (cs != null && cs.getCountDistint() == 0L) { + return false; + } + } + return true; + } + // check if all parent statistics are available private static boolean isAllParentsContainStatistics(Operator op) { for (Operator parent : op.getParentOperators()) { diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out index 56b18f1c9766..a3939ad5d3a2 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out @@ -151,19 +151,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,19 +380,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -467,10 +467,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -478,7 +478,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: llap @@ -533,7 +533,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -541,7 +541,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -628,18 +628,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -848,7 +848,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 3 + Estimated key counts: Reducer 3 => 1 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -856,18 +856,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -942,10 +942,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -953,7 +953,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -1008,7 +1008,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1016,7 +1016,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true @@ -1103,19 +1103,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1332,19 +1332,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1419,10 +1419,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1430,7 +1430,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: vectorized, llap @@ -1485,7 +1485,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1493,7 +1493,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -1580,18 +1580,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1800,7 +1800,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 3 + Estimated key counts: Reducer 3 => 1 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1808,18 +1808,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1894,10 +1894,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1905,7 +1905,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: vectorized, llap @@ -1960,7 +1960,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1968,7 +1968,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out index ae564cafc40e..c4aa752b1b6b 100644 --- a/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out +++ b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out @@ -143,14 +143,14 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 200000000 Data size: 20400000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From ac637314ae4f4a49e88c2178f94c8faddcd93193 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 13 Mar 2026 11:30:14 +0900 Subject: [PATCH 3/8] HIVE-29503: don't trigger "NDV==0 fallback" on empty or super small tables --- .../ql/optimizer/stats/annotation/StatsRulesProcFactory.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index a58d9450a914..bfb796f2bd70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -3189,6 +3189,9 @@ static boolean satisfyPrecondition(Statistics stats) { } static boolean satisfyPrecondition(Statistics stats, List joinKeys) { + if (stats.getNumRows() <= 1) { + return true; + } for (String col : joinKeys) { ColStatistics cs = stats.getColumnStatisticsFromColName(col); if (cs != null && cs.getCountDistint() == 0L) { From c0788bdbca1c0aeb1f06f5fe0f7f304a6e5594d9 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Mon, 23 Mar 2026 10:08:58 -0700 Subject: [PATCH 4/8] HIVE-29503: .out files impacted after rebasing onto latest master --- .../llap/iceberg_bucket_map_join_1.q.out | 48 +- .../clientpositive/llap/mapjoin_date.q.out | 4 +- .../llap/vector_binary_join_groupby.q.out | 14 +- .../llap/vector_full_outer_join_date.q.out | 8 +- .../llap/vector_interval_mapjoin.q.out | 6 +- .../llap/vector_outer_join_constants.q.out | 410 +++++------- ...ctorized_dynamic_semijoin_reduction2.q.out | 24 +- .../perf/tpcds30tb/cte/cbo_query58.q.out | 51 +- .../perf/tpcds30tb/json/query58.q.out | 139 ++--- .../perf/tpcds30tb/tez/cbo_query58.q.out | 39 +- .../perf/tpcds30tb/tez/query51.q.out | 46 +- .../perf/tpcds30tb/tez/query58.q.out | 588 +++++++----------- .../perf/tpcds30tb/tez/query83.q.out | 565 ++++++++--------- 13 files changed, 832 insertions(+), 1110 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index f8dfb22e5fa1..c190c147dfcb 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -111,9 +111,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=3 width=520) + Select Operator [SEL_9] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_45] (rows=11 width=168) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -175,19 +175,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=3 width=520) + Select Operator [SEL_13] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_49] (rows=11 width=168) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] PartitionCols:_col1 - Group By Operator [GBY_8] (rows=3 width=168) + Group By Operator [GBY_8] (rows=1 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=3 width=168) + Group By Operator [GBY_6] (rows=1 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -245,9 +245,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=3 width=520) + Select Operator [SEL_9] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_45] (rows=11 width=168) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -309,19 +309,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=3 width=520) + Select Operator [SEL_13] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_49] (rows=11 width=168) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=3 width=168) + Group By Operator [GBY_8] (rows=1 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=3 width=168) + Group By Operator [GBY_6] (rows=1 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -379,9 +379,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + Select Operator [SEL_53] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) + Map Join Operator [MAPJOIN_52] (rows=11 width=168) BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_49] @@ -443,19 +443,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + Select Operator [SEL_60] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) + Map Join Operator [MAPJOIN_59] (rows=11 width=168) BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_56] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_55] (rows=1 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_53] (rows=1 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -513,9 +513,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + Select Operator [SEL_53] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) + Map Join Operator [MAPJOIN_52] (rows=11 width=168) Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_49] @@ -577,19 +577,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + Select Operator [SEL_60] (rows=11 width=168) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) + Map Join Operator [MAPJOIN_59] (rows=11 width=168) Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_55] (rows=1 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_53] (rows=1 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out index c426f13591b6..73c48b0541e6 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -85,13 +85,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 41bc14e5e354..c6fa83e9a6e4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -137,7 +137,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: bin is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:1.1 Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -170,7 +170,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 - Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 @@ -179,7 +179,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [23] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 23:int - Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -192,7 +192,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: @@ -200,7 +200,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -278,13 +278,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out index b585e0adef4f..49c9b0a2b635 100644 --- a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -190,7 +190,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -202,7 +202,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:date, 3:date - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col3 (type: date) Reducer 3 Execution mode: vectorized, llap @@ -227,13 +227,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 2bbf69ab5033..9bf081ac1ebd 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 17] - Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index 9ea03dbdcca3..6090bcafe9a6 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@lday POSTHOOK: Output: default@lday #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select * from (select item1.S_ID S_ID, @@ -272,112 +272,48 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) - Map 7 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Map 6 <- Map 7 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: item1 - filterExpr: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) - predicate: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - includeColumns: [0, 1] - dataColumns: id:int, s_id:int, name:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 - Map Operator Tree: - TableScan - alias: lday2 - filterExpr: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + alias: od1 + filterExpr: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) - predicate: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date (type: timestamp), ly_date (type: timestamp) + expressions: id (type: int), o_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: timestamp) - 1 _col0 (type: timestamp) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumns: 0:timestamp + bigTableKeyColumns: 0:int bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyMultiKeyOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -385,14 +321,14 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 6 + 1 Map 5 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: timestamp) - 1 _col1 (type: timestamp) + 1 _col0 (type: timestamp) Map Join Vectorization: bigTableKeyColumns: 1:timestamp bigTableRetainColumnNums: [] @@ -403,7 +339,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED input vertices: 1 Map 7 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -413,10 +349,51 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:boolean + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: timestamp) + 1 _col1 (type: timestamp) + Map Join Vectorization: + bigTableKeyColumns: 1:timestamp + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED + input vertices: + 0 Map 6 + Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConstantVectorExpression(val 1) -> 5:boolean + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: true (type: boolean) + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -427,7 +404,7 @@ STAGE PLANS: keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -442,55 +419,44 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: d_date:timestamp, ly_date:timestamp + dataColumns: id:int, o_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] - Map 6 + scratchColumnTypeNames: [bigint, bigint] + Map 5 Map Operator Tree: TableScan - alias: ytday2 - filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + alias: item1 + filterExpr: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) - predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) + predicate: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ytd_date (type: timestamp) + expressions: id (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: timestamp) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: timestamp) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -503,46 +469,46 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 + dataColumnCount: 3 includeColumns: [0, 1] - dataColumns: d_date:timestamp, ytd_date:timestamp + dataColumns: id:int, s_id:int, name:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 7 + Map 6 Map Operator Tree: TableScan - alias: od2 - filterExpr: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + alias: lday2 + filterExpr: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) - predicate: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) + predicate: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int), o_date (type: timestamp) + expressions: d_date (type: timestamp), ly_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: timestamp) + 1 _col0 (type: timestamp) Map Join Vectorization: - bigTableKeyColumns: 0:int + bigTableKeyColumns: 0:timestamp bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -550,8 +516,8 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 1 - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 7 + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: timestamp) null sort order: z @@ -562,68 +528,7 @@ STAGE PLANS: keyColumns: 1:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: timestamp) - 1 _col0 (type: timestamp) - Map Join Vectorization: - bigTableKeyColumns: 1:timestamp - bigTableRetainColumnNums: [0] - bigTableValueColumns: 0:int - className: VectorMapJoinInnerBigOnlyMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - projectedOutput: 0:int - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0 - input vertices: - 1 Map 6 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: 0:int - bigTableRetainColumnNums: [] - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - hashTableImplementationType: OPTIMIZED - input vertices: - 1 Reducer 2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: true (type: boolean) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:boolean - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -632,50 +537,79 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, o_date:timestamp + dataColumns: d_date:timestamp, ly_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] - Reducer 2 + scratchColumnTypeNames: [] + Map 7 + Map Operator Tree: + TableScan + alias: ytday2 + filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) + predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ytd_date (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap - Reduce Vectorization: + LLAP IO: all inputs + Map Vectorization: enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true - reduceColumnNullOrder: z - reduceColumnSortOrder: + + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: d_date:timestamp, ytd_date:timestamp partitionColumnCount: 0 scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -702,7 +636,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22 (type: int), TIMESTAMP'2008-04-30 00:00:00' (type: timestamp) outputColumnNames: _col0, _col1 @@ -711,7 +645,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2] selectExpressions: ConstantVectorExpression(val 22) -> 1:int, ConstantVectorExpression(val 2008-04-30 00:00:00) -> 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: @@ -720,9 +654,9 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:int, 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: timestamp) - Reducer 5 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -735,10 +669,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 141 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 141 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -746,7 +680,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 8 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -773,7 +707,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 22 (type: int), TIMESTAMP'2008-04-30 00:00:00' (type: timestamp) outputColumnNames: _col0, _col1 @@ -782,7 +716,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2] selectExpressions: ConstantVectorExpression(val 22) -> 1:int, ConstantVectorExpression(val 2008-04-30 00:00:00) -> 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: @@ -791,7 +725,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:int, 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: timestamp) Stage: Stage-0 @@ -800,7 +734,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from (select item1.S_ID S_ID, ytday1.D_DATE D_DATE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 5bbdded348a2..557332cbe6cf 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -761,17 +761,17 @@ STAGE PLANS: keys: 0 _col0 (type: date) 1 _col0 (type: date) - Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 6160 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - minReductionHashAggr: 0.95238096 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -780,10 +780,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -910,17 +910,17 @@ STAGE PLANS: keys: 0 _col0 (type: timestamp) 1 _col0 (type: timestamp) - Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 4400 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - minReductionHashAggr: 0.95238096 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -929,10 +929,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index 964a40da2c92..e31ef660eff0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -1,24 +1,25 @@ CTE Suggestion: HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveFilter(condition=[sq_count_check($0)]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CTE Suggestion: HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Reducer 11' is a cross product +Warning: Map Join MAPJOIN[385][bigTable=?] in task 'Map 15' is a cross product +Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Map 14' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -37,19 +38,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), <=(*(0.9:DECIMAL(1, 1), $1), $3), <=($3, *(1.1:DECIMAL(2, 1), $1)), <=(*(0.9:DECIMAL(1, 1), $3), $1), <=($1, *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -64,19 +65,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -90,19 +91,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out index ff9ac3ce98d7..12bd29013ecd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out @@ -1,5 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product { "CBOPlan": { "rels": [ @@ -721,7 +720,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "d_date", - "ndv": 76511, + "ndv": 0, "minValue": -25566, "maxValue": 47482 }, @@ -1144,7 +1143,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "d_date", - "ndv": 76511, + "ndv": 0, "minValue": -25566, "maxValue": 47482 }, @@ -1556,7 +1555,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "d_date", - "ndv": 76511, + "ndv": 0, "minValue": -25566, "maxValue": 47482 }, @@ -1794,6 +1793,25 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "id": "16", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "9", + "15" + ], + "rowCount": 59169.69 + }, + { + "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1843,7 +1861,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "rowCount": 9861.615 }, { - "id": "17", + "id": "18", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -1856,25 +1874,6 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc ], "rowCount": 9861.615 }, - { - "id": "18", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "15", - "17" - ], - "rowCount": 9861.615 - }, { "id": "19", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -1899,7 +1898,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "algorithm": "none", "cost": "not available", "inputs": [ - "9", + "16", "18" ], "rowCount": 8.75263053674025E7 @@ -2137,13 +2136,13 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 4, + "ndv": 0, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 3, + "ndv": 0, "minValue": 10891, "maxValue": 11621 }, @@ -3117,6 +3116,25 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "id": "40", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "34", + "39" + ], + "rowCount": 59169.69 + }, + { + "id": "41", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -3166,7 +3184,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "rowCount": 9861.615 }, { - "id": "41", + "id": "42", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -3179,25 +3197,6 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc ], "rowCount": 9861.615 }, - { - "id": "42", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "39", - "41" - ], - "rowCount": 9861.615 - }, { "id": "43", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -3222,7 +3221,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "algorithm": "none", "cost": "not available", "inputs": [ - "34", + "40", "42" ], "rowCount": 8.75263053674025E7 @@ -3980,6 +3979,25 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc }, { "id": "63", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "57", + "62" + ], + "rowCount": 59169.69 + }, + { + "id": "64", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -4029,7 +4047,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "rowCount": 9861.615 }, { - "id": "64", + "id": "65", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -4042,25 +4060,6 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc ], "rowCount": 9861.615 }, - { - "id": "65", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "62", - "64" - ], - "rowCount": 9861.615 - }, { "id": "66", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -4085,7 +4084,7 @@ Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross produc "algorithm": "none", "cost": "not available", "inputs": [ - "57", + "63", "65" ], "rowCount": 8.75263053674025E7 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index de98d243fa41..4ddd16adf023 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,5 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -17,19 +16,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -46,19 +45,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -74,19 +73,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out index 5091dfb25a49..f87adc1999a3 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out @@ -42,13 +42,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -78,13 +78,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -180,17 +180,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Reducer 3 Execution mode: llap @@ -202,13 +202,13 @@ STAGE PLANS: 0 _col0 (type: bigint), _col1 (type: date) 1 _col0 (type: bigint), _col1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: date) null sort order: az sort order: ++ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint) - Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(27,2)), _col3 (type: bigint), _col4 (type: date), _col5 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -216,7 +216,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: bigint), VALUE._col4 (type: date), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -242,25 +242,25 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (max_window_0 > max_window_1) (type: boolean) - Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE Top N Key Operator sort order: ++ keys: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date) null sort order: zz - Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE top n: 100 Select Operator expressions: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ - Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) Reducer 5 Execution mode: vectorized, llap @@ -268,13 +268,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: date), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 17600 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 17600 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -287,7 +287,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -310,17 +310,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index 31dc5e2d3eca..35a5cf030f3f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,5 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -9,21 +8,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 10 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) - Map 13 <- Reducer 6 (BROADCAST_EDGE) - Map 15 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) - Map 3 <- Reducer 14 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) - Reducer 16 <- Map 15 (SIMPLE_EDGE) + Map 1 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 11 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 8 <- Map 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Map 9 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) - Reducer 7 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +38,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 3 + 1 Map 6 Statistics: Num rows: 43005109025 Data size: 7556852441408 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -54,7 +49,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 8 - Statistics: Num rows: 3532295 Data size: 28258472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 47305620952 Data size: 8312537865718 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -63,142 +58,25 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 17 - Statistics: Num rows: 3532295 Data size: 353229612 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 13 + Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.92992544 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 10 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 13 - Statistics: Num rows: 6777167 Data size: 54217448 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 17 - Statistics: Num rows: 6777167 Data size: 677716812 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.9634768 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date (type: date), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2 - input vertices: - 0 Reducer 6 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: date) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.8333333 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 15 + Map 11 Map Operator Tree: TableScan alias: web_sales @@ -215,7 +93,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 3 + 1 Map 6 Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -225,8 +103,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 13 - Statistics: Num rows: 1773711 Data size: 14189800 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 8 + Statistics: Num rows: 23754102805 Data size: 4180389225463 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -235,25 +113,25 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 17 - Statistics: Num rows: 1773711 Data size: 177371212 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 13 + Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.86044854 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 17 + Map 13 Map Operator Tree: TableScan alias: item @@ -285,14 +163,14 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 3 + Map 6 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))))) (type: boolean) + filterExpr: (d_date is not null or ((d_date = DATE'1998-02-19') and d_week_seq is not null) or (d_date = DATE'1998-02-19')) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) (type: boolean) + predicate: d_date is not null (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) @@ -321,13 +199,6 @@ STAGE PLANS: Partition key expr: cs_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 1 - Filter Operator - predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))) (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -350,7 +221,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 10 + Target Vertex: Map 9 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -373,23 +244,28 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 15 - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 4 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_date = DATE'1998-02-19') or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 11 + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_date = DATE'1998-02-19') (type: boolean) - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -398,30 +274,13 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Filter Operator - predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 8 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -434,139 +293,120 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2 + 0 + 1 + outputColumnNames: _col0, _col1 input vertices: - 0 Reducer 5 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: date) + 1 Reducer 7 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + input vertices: + 1 Map 6 + Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.8333333 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) - Filter Operator - predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) - Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE + Map 9 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 16 - Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: decimal(17,2)) - null sort order: zz - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 6 + Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 8 + Statistics: Num rows: 90761969900 Data size: 15734311200358 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 13 + Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 12 + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 14 + LLAP IO: may be used (ACID table) + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 49919084526 Data size: 8653871347677 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) - Reducer 16 + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 49919084526 Data size: 8653871347677 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -574,17 +414,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 247524 Data size: 107920464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 107920464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) Reducer 2 Execution mode: vectorized, llap @@ -594,44 +434,86 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26018092087 Data size: 4571895925150 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26018092087 Data size: 4571895925150 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 54910994168 Data size: 9519258688769 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) + Statistics: Num rows: 677913508 Data size: 117521712164 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 677913508 Data size: 117521712164 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + input vertices: + 1 Reducer 12 + Statistics: Num rows: 14371232818 Data size: 2529135590942 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true Filter Operator - predicate: sq_count_check(_col0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - input vertices: - 1 Reducer 7 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) + Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: decimal(17,2)) + null sort order: zz + Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + null sort order: zz + sort order: ++ + Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 17500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 17500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -644,46 +526,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - input vertices: - 1 Map 8 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int) - outputColumnNames: _col0 - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index c25dea04819d..30f313668531 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,19 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 15 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 13 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) - Map 15 <- Reducer 10 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 3 <- Map 9 (BROADCAST_EDGE) - Map 6 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) - Map 9 <- Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (SIMPLE_EDGE) + Map 1 <- Map 10 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Map 10 <- Map 13 (BROADCAST_EDGE) + Map 14 <- Map 10 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE) + Map 3 <- Map 10 (BROADCAST_EDGE) + Map 5 <- Map 10 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Map 10 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -39,7 +39,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 15 + 1 Map 10 Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -50,7 +50,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1183036 Data size: 9464292 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4753078211 Data size: 322828331180 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -59,120 +59,86 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 5 - Statistics: Num rows: 1183036 Data size: 118303604 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 4 + Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.7907722 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 10 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 13 + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.690705 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 - Map Operator Tree: - TableScan - alias: web_returns - Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: wr_item_sk (type: bigint), wr_return_quantity (type: int), wr_returned_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 15 - Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 9 - Statistics: Num rows: 564772 Data size: 4518180 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 5 - Statistics: Num rows: 564772 Data size: 56477204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.5617275 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 15 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))))) (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) (type: boolean) + predicate: d_date is not null (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) @@ -200,7 +166,7 @@ STAGE PLANS: Target Input: web_returns Partition key expr: wr_returned_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 13 + Target Vertex: Map 14 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -219,18 +185,11 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 6 - Filter Operator - predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))) (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -249,11 +208,93 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + Target Vertex: Map 5 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.690705 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 14 + Map Operator Tree: + TableScan + alias: web_returns + Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: wr_item_sk (type: bigint), wr_return_quantity (type: int), wr_returned_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Reducer 11 + Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 10 + Statistics: Num rows: 2269082656 Data size: 154083758074 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 3 @@ -261,7 +302,7 @@ STAGE PLANS: TableScan alias: date_dim filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:2.7378882667798324E-4 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:0.0 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -278,38 +319,23 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 9 - Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 10 + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 4 Map Operator Tree: TableScan alias: item @@ -341,7 +367,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 + Map 5 Map Operator Tree: TableScan alias: store_returns @@ -358,7 +384,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 15 + 1 Reducer 12 Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -368,8 +394,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 9 - Statistics: Num rows: 2281371 Data size: 18250972 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 10 + Statistics: Num rows: 9165855478 Data size: 622609811625 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -378,143 +404,51 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 5 - Statistics: Num rows: 2281371 Data size: 228137104 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 4 + Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.8915021 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null)) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date (type: date), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Reducer 12 - Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: date) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 11 - Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: date) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.95 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) - Filter Operator - predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 10 + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 14 + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -522,17 +456,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: double) Reducer 2 Execution mode: vectorized, llap @@ -542,28 +476,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2614193072 Data size: 177555585963 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2614193072 Data size: 177555585963 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) - Reducer 7 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -571,56 +492,78 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6 - input vertices: - 1 Reducer 14 - Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: bigint) - null sort order: zz - Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), (((UDFToDouble(_col3) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((UDFToDouble(_col1) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col5 (type: bigint), (((_col6 / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col3 + _col1) + _col5) AS decimal(19,0)) / 3) (type: decimal(25,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz - sort order: ++ - Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) + Statistics: Num rows: 5041220622 Data size: 342435403815 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5041220622 Data size: 342435403815 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 5545342804 Data size: 376678952360 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5545342804 Data size: 376678952360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col3 (type: bigint) Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6 + input vertices: + 1 Reducer 15 + Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: bigint) + null sort order: zz + Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), (((UDFToDouble(_col3) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((UDFToDouble(_col1) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col5 (type: bigint), (((_col6 / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col3 + _col1) + _col5) AS decimal(19,0)) / 3) (type: decimal(25,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: double), VALUE._col1 (type: bigint), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: decimal(25,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 26000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 6700 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 26000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 6700 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From b431bdb211b5c17bffe383a1c14c3609f233e893 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Mon, 23 Mar 2026 16:19:11 -0700 Subject: [PATCH 5/8] HIVE-29503: .q file comments' cleanup --- .../queries/clientpositive/ndv_zero_join_selectivity.q | 9 --------- 1 file changed, 9 deletions(-) diff --git a/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q b/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q index 2b1ad6d34941..195b7d74de28 100644 --- a/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q +++ b/ql/src/test/queries/clientpositive/ndv_zero_join_selectivity.q @@ -1,12 +1,6 @@ --- Test: NDV=0 (unknown) causes incorrect join selectivity calculation --- When column statistics have NDV=0, the join selectivity incorrectly becomes 1.0 --- This leads to cross-product cardinality estimates: rows1 * rows2 --- Bug location: HiveRelMdSelectivity.getMaxNDVFromProjections() - CREATE TABLE ndv_zero_t1 (id BIGINT, data STRING); CREATE TABLE ndv_zero_t2 (id BIGINT, value STRING); --- Set up large row counts but NDV=0 (unknown) for join columns ALTER TABLE ndv_zero_t1 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize'='1000000000'); ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0'); ALTER TABLE ndv_zero_t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50'); @@ -15,9 +9,6 @@ ALTER TABLE ndv_zero_t2 UPDATE STATISTICS SET('numRows'='100000000','rawDataSize ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN id SET('numDVs'='0','numNulls'='0'); ALTER TABLE ndv_zero_t2 UPDATE STATISTICS FOR COLUMN value SET('numDVs'='1000','numNulls'='0','avgColLen'='10','maxColLen'='50'); --- BUG: With NDV=0 on join columns, selectivity becomes 1.0 (cross product) --- Expected cardinality should be reasonable (e.g., 100M if NDV=100M) --- Actual cardinality will be 100M * 100M = 10 quadrillion (cross product) EXPLAIN SELECT t1.id, t2.value FROM ndv_zero_t1 t1 From 784084a1ac574b98806822c0c79f1bdbd5e0144b Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Tue, 24 Mar 2026 16:33:01 -0700 Subject: [PATCH 6/8] HIVE-29503: refactored ndv join estimation logic to preserve statistics --- .../llap/iceberg_bucket_map_join_1.q.out | 32 +- .../annotation/StatsRulesProcFactory.java | 64 ++- .../annotation/TestStatsRulesProcFactory.java | 357 ++++++++++--- .../llap/bucket_map_join_tez3.q.out | 48 +- .../clientpositive/llap/mapjoin_date.q.out | 6 +- .../llap/ndv_zero_join_selectivity.q.out | 6 +- .../llap/vector_binary_join_groupby.q.out | 14 +- .../llap/vector_full_outer_join_date.q.out | 8 +- .../llap/vector_interval_mapjoin.q.out | 6 +- .../llap/vector_outer_join_constants.q.out | 36 +- ...ctorized_dynamic_semijoin_reduction2.q.out | 20 +- .../perf/tpcds30tb/cte/cbo_query58.q.out | 4 +- .../perf/tpcds30tb/json/query58.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query58.q.out | 2 +- .../perf/tpcds30tb/tez/query51.q.out | 22 +- .../perf/tpcds30tb/tez/query58.q.out | 335 ++++++------ .../perf/tpcds30tb/tez/query83.q.out | 478 +++++++++--------- 17 files changed, 813 insertions(+), 627 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index c190c147dfcb..890cfaf2705e 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -111,9 +111,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=11 width=168) + Select Operator [SEL_9] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=11 width=168) + Map Join Operator [MAPJOIN_45] (rows=11 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -175,9 +175,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=11 width=168) + Select Operator [SEL_13] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=11 width=168) + Map Join Operator [MAPJOIN_49] (rows=11 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] @@ -245,9 +245,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=11 width=168) + Select Operator [SEL_9] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=11 width=168) + Map Join Operator [MAPJOIN_45] (rows=11 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -309,9 +309,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=11 width=168) + Select Operator [SEL_13] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=11 width=168) + Map Join Operator [MAPJOIN_49] (rows=11 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] @@ -379,9 +379,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=11 width=168) + Select Operator [SEL_53] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=11 width=168) + Map Join Operator [MAPJOIN_52] (rows=11 width=336) BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_49] @@ -443,9 +443,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=11 width=168) + Select Operator [SEL_60] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=11 width=168) + Map Join Operator [MAPJOIN_59] (rows=11 width=336) BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_56] @@ -513,9 +513,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=11 width=168) + Select Operator [SEL_53] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=11 width=168) + Map Join Operator [MAPJOIN_52] (rows=11 width=336) Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_49] @@ -577,9 +577,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=11 width=168) + Select Operator [SEL_60] (rows=11 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=11 width=168) + Map Join Operator [MAPJOIN_59] (rows=11 width=336) Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_56] diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index bfb796f2bd70..1de391c06079 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1964,18 +1964,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } - // NDV=0 means join key statistics are unavailable - fall back to joinFactor heuristic - if (allSatisfyPreCondition) { - for (int pos = 0; pos < parents.size(); pos++) { - ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); - List keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf().getOutputKeyColumnNames()); - if (!satisfyPrecondition(parent.getStatistics(), keyExprs)) { - allSatisfyPreCondition = false; - break; - } - } - } - if (allSatisfyPreCondition) { // statistics object that is combination of statistics from all @@ -2025,6 +2013,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } + // If PK-FK inference failed, check for NDV=0 on join keys and use joinFactor heuristic + if (inferredRowCount == -1 && hasZeroNdvJoinKey(joinKeys, joinStats)) { + inferredRowCount = computeJoinFactorEstimate(conf, Collections.max(rowCounts), rowCounts.size()); + } + List distinctVals = Lists.newArrayList(); // these ndvs are later used to compute unmatched rows and num of nulls for outer joins @@ -2148,7 +2141,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { // worst case when there are no column statistics - float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR); int numParents = parents.size(); long crossRowCount = 1; long crossDataSize = 1; @@ -2194,14 +2186,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newNumRows = crossRowCount; newDataSize = crossDataSize; } else { - if (numParents > 1) { - newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); - newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); - } else { - // MUX operator with 1 parent - newNumRows = StatsUtils.safeMult(maxRowCount, joinFactor); - newDataSize = StatsUtils.safeMult(maxDataSize, joinFactor); - } + newNumRows = computeJoinFactorEstimate(conf, maxRowCount, numParents); + newDataSize = computeJoinFactorEstimate(conf, maxDataSize, numParents); } Statistics wcStats = new Statistics(newNumRows, newDataSize, 0, 0); @@ -2760,6 +2746,29 @@ private long computeRowCountAssumingInnerJoin(List rowCountParents, long d return result; } + @VisibleForTesting + static long computeJoinFactorEstimate(HiveConf conf, long maxValue, int numParents) { + float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR); + if (numParents > 1) { + return StatsUtils.safeMult(StatsUtils.safeMult(maxValue, (numParents - 1)), joinFactor); + } + return StatsUtils.safeMult(maxValue, joinFactor); + } + + @VisibleForTesting + static boolean hasZeroNdvJoinKey(Map> joinKeys, + Map joinStats) { + return joinKeys.entrySet().stream().anyMatch(entry -> { + Statistics posStats = joinStats.get(entry.getKey()); + if (posStats.getNumRows() <= 1) { + return false; + } + return entry.getValue().stream() + .map(posStats::getColumnStatisticsFromColName) + .anyMatch(cs -> cs != null && cs.getCountDistint() == 0L); + }); + } + private void updateJoinColumnsNDV(Map> joinKeys, Map joinStats, int numAttr) { int joinColIdx = 0; @@ -3188,19 +3197,6 @@ static boolean satisfyPrecondition(Statistics stats) { && !stats.getColumnStatsState().equals(Statistics.State.NONE); } - static boolean satisfyPrecondition(Statistics stats, List joinKeys) { - if (stats.getNumRows() <= 1) { - return true; - } - for (String col : joinKeys) { - ColStatistics cs = stats.getColumnStatisticsFromColName(col); - if (cs != null && cs.getCountDistint() == 0L) { - return false; - } - } - return true; - } - // check if all parent statistics are available private static boolean isAllParentsContainStatistics(Operator op) { for (Operator parent : op.getParentOperators()) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java index 4d9d351af8f1..14a45b9a68f0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java @@ -37,24 +37,47 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; +import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; +import java.util.stream.Stream; -import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; +import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.FilterStatsRule.extractFloatFromLiteralValue; +import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.JoinStatsRule.computeJoinFactorEstimate; +import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.JoinStatsRule.hasZeroNdvJoinKey; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.FilterStatsRule.extractFloatFromLiteralValue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertThrows; - public class TestStatsRulesProcFactory { private final static String COL_NAME = "col1"; @@ -599,49 +622,6 @@ public void testNotBetweenRightLowerThanLeft() throws SemanticException { assertEquals(VALUES.length, numRows); } - private ExprNodeDesc createExprNodeConstantDesc(int value) { - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, value); - } - - private Statistics createStatistics(long[] values, long numNulls) { - long numDVs = Arrays.stream(values).distinct().count(); - Statistics stats = new Statistics(values.length + numNulls, 100, 100, 100); - - HyperLogLog hll = StatisticsTestUtils.createHll(values); - float[] val = new float[values.length]; - for (int i = 0; i < values.length; i++) { - val[i] = values[i]; - } - KllFloatsSketch kll = StatisticsTestUtils.createKll(val); - ColStatistics colStatistics = createColStatistics(COL_NAME, "int", numNulls, numDVs, hll, kll); - - stats.addToColumnStats(Collections.singletonList(colStatistics)); - - return stats; - } - - private static ColStatistics createColStatistics( - String colName, String colType, long numNulls, long numDVs, Object hll, KllFloatsSketch kll) { - ColStatistics colStatistics = new ColStatistics(colName, colType); - - colStatistics.setNumNulls(numNulls); - colStatistics.setCountDistint(numDVs); - if (hll != null) { - if (hll instanceof HyperLogLog) { - colStatistics.setBitVectors(((HyperLogLog) hll).serialize()); - } else if (hll instanceof FMSketch) { - colStatistics.setBitVectors(((FMSketch) hll).serialize()); - } else { - throw new IllegalArgumentException("Unsupported HLL class: " + hll.getClass().getName()); - } - } - if (kll != null) { - colStatistics.setHistogram(kll.toByteArray()); - } - - return colStatistics; - } - /** * Test that computeAggregateColumnMinMax properly handles numNulls=-1 (unknown). * With the fix, numNulls=-1 should be treated as 0, giving valuesCount = numRows. @@ -676,10 +656,10 @@ public void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws Semanti // Verify: With the fix, COUNT Range should be (0, 100) // numNulls=-1 is treated as 0, so valuesCount = 100 - 0 = 100 // Without the fix, valuesCount = 100 - (-1) = 101 (WRONG) - assertNotNull("Range should be set on COUNT column", cs.getRange()); - assertEquals("COUNT min should be 0", 0L, ((Number) cs.getRange().minValue).longValue()); - assertEquals("COUNT max should be 100 (numRows), not 101", - 100L, ((Number) cs.getRange().maxValue).longValue()); + assertNotNull(cs.getRange(), "Range should be set on COUNT column"); + assertEquals(0L, ((Number) cs.getRange().minValue).longValue(), "COUNT min should be 0"); + assertEquals(100L, ((Number) cs.getRange().maxValue).longValue(), + "COUNT max should be 100 (numRows), not 101"); } @Test @@ -708,10 +688,10 @@ public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticE cs, conf, agg, "bigint", parentStats); // With known numNulls=20, valuesCount = 100 - 20 = 80 - assertNotNull("Range should be set", cs.getRange()); + assertNotNull(cs.getRange(), "Range should be set"); assertEquals(0L, ((Number) cs.getRange().minValue).longValue()); - assertEquals("COUNT max should be 80 (numRows - numNulls)", - 80L, ((Number) cs.getRange().maxValue).longValue()); + assertEquals(80L, ((Number) cs.getRange().maxValue).longValue(), + "COUNT max should be 80 (numRows - numNulls)"); } /** @@ -749,7 +729,262 @@ public void testUpdateNumNullsPreservesUnknownNumNulls() { joinStatsRule.updateNumNulls(colStats, 100L, 100L, 1000L, 0L, mockJop); // Assert that numNulls is still -1 (unchanged) - assertEquals("Unknown numNulls (-1) should be preserved after updateNumNulls", - -1L, colStats.getNumNulls()); + assertEquals(-1L, colStats.getNumNulls(), + "Unknown numNulls (-1) should be preserved after updateNumNulls"); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("joinFactorEstimateTestData") + void testComputeJoinFactorEstimate(String scenario, long maxValue, int numParents, long expected) { + HiveConf conf = new HiveConf(); + assertEquals(expected, computeJoinFactorEstimate(conf, maxValue, numParents)); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("hasZeroNdvJoinKeyTestData") + void testHasZeroNdvJoinKey(String scenario, Map> joinKeys, + Map joinStats, boolean expected) { + assertEquals(expected, hasZeroNdvJoinKey(joinKeys, joinStats)); + } + + @Test + @SuppressWarnings("unchecked") + void testJoinStatsRuleWithZeroNdv() throws SemanticException { + HiveConf conf = new HiveConf(); + PlanMapper planMapper = mock(PlanMapper.class); + StatsSource statsSource = mock(StatsSource.class); + Context context = mock(Context.class); + when(context.getConf()).thenReturn(conf); + when(context.getPlanMapper()).thenReturn(planMapper); + when(context.getStatsSource()).thenReturn(statsSource); + ParseContext pctx = mock(ParseContext.class); + when(pctx.getConf()).thenReturn(conf); + when(pctx.getContext()).thenReturn(context); + AnnotateStatsProcCtx ctx = new AnnotateStatsProcCtx(pctx); + + Statistics leftStats = new Statistics(1000L, 10000L, 0L, 0L); + leftStats.setBasicStatsState(Statistics.State.COMPLETE); + leftStats.setColumnStatsState(Statistics.State.COMPLETE); + ColStatistics leftColStats = new ColStatistics("KEY.key", "int"); + leftColStats.setCountDistint(0L); + leftColStats.setNumNulls(0L); + leftStats.addToColumnStats(Collections.singletonList(leftColStats)); + + Statistics rightStats = new Statistics(500L, 5000L, 0L, 0L); + rightStats.setBasicStatsState(Statistics.State.COMPLETE); + rightStats.setColumnStatsState(Statistics.State.COMPLETE); + ColStatistics rightColStats = new ColStatistics("KEY.key", "int"); + rightColStats.setCountDistint(100L); + rightColStats.setNumNulls(0L); + rightStats.addToColumnStats(Collections.singletonList(rightColStats)); + + ReduceSinkOperator leftRsOp = mock(ReduceSinkOperator.class); + ReduceSinkDesc leftRsDesc = mock(ReduceSinkDesc.class); + when(leftRsOp.getStatistics()).thenReturn(leftStats); + when(leftRsOp.getConf()).thenReturn(leftRsDesc); + when(leftRsDesc.getOutputKeyColumnNames()).thenReturn(Arrays.asList("key")); + + ReduceSinkOperator rightRsOp = mock(ReduceSinkOperator.class); + ReduceSinkDesc rightRsDesc = mock(ReduceSinkDesc.class); + when(rightRsOp.getStatistics()).thenReturn(rightStats); + when(rightRsOp.getConf()).thenReturn(rightRsDesc); + when(rightRsDesc.getOutputKeyColumnNames()).thenReturn(Arrays.asList("key")); + + List> parents = new ArrayList<>(); + parents.add(leftRsOp); + parents.add(rightRsOp); + + JoinOperator joinOp = mock(JoinOperator.class); + JoinDesc joinDesc = mock(JoinDesc.class); + JoinCondDesc joinCond = new JoinCondDesc(0, 1, JoinDesc.INNER_JOIN); + when(joinOp.getParentOperators()).thenReturn(parents); + when(joinOp.getConf()).thenReturn(joinDesc); + when(joinDesc.getConds()).thenReturn(new JoinCondDesc[]{joinCond}); + + RowSchema rowSchema = mock(RowSchema.class); + ColumnInfo colInfo = new ColumnInfo("key", TypeInfoFactory.intTypeInfo, "", false); + when(rowSchema.getSignature()).thenReturn(Arrays.asList(colInfo)); + when(joinOp.getSchema()).thenReturn(rowSchema); + + final Statistics[] capturedStats = new Statistics[1]; + doAnswer(invocation -> { + capturedStats[0] = invocation.getArgument(0); + return null; + }).when(joinOp).setStatistics(any(Statistics.class)); + + StatsRulesProcFactory.JoinStatsRule rule = new StatsRulesProcFactory.JoinStatsRule(); + rule.process(joinOp, new Stack<>(), ctx); + + assertNotNull(capturedStats[0], "Statistics should have been set on join operator"); + assertEquals(Statistics.State.COMPLETE, capturedStats[0].getColumnStatsState(), + "Column stats state should be COMPLETE when using NDV=0 fallback"); + assertEquals(1100L, capturedStats[0].getNumRows(), + "Row count should use joinFactor heuristic: max(1000,500) * 1.1 = 1100"); + } + + private ExprNodeDesc createExprNodeConstantDesc(int value) { + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, value); + } + + private Statistics createStatistics(long[] values, long numNulls) { + long numDVs = Arrays.stream(values).distinct().count(); + Statistics stats = new Statistics(values.length + numNulls, 100, 100, 100); + + HyperLogLog hll = StatisticsTestUtils.createHll(values); + float[] val = new float[values.length]; + for (int i = 0; i < values.length; i++) { + val[i] = values[i]; + } + KllFloatsSketch kll = StatisticsTestUtils.createKll(val); + ColStatistics colStatistics = createColStatistics(COL_NAME, "int", numNulls, numDVs, hll, kll); + + stats.addToColumnStats(Collections.singletonList(colStatistics)); + + return stats; + } + + private static ColStatistics createColStatistics( + String colName, String colType, long numNulls, long numDVs, Object hll, KllFloatsSketch kll) { + ColStatistics colStatistics = new ColStatistics(colName, colType); + + colStatistics.setNumNulls(numNulls); + colStatistics.setCountDistint(numDVs); + if (hll != null) { + if (hll instanceof HyperLogLog) { + colStatistics.setBitVectors(((HyperLogLog) hll).serialize()); + } else if (hll instanceof FMSketch) { + colStatistics.setBitVectors(((FMSketch) hll).serialize()); + } else { + throw new IllegalArgumentException("Unsupported HLL class: " + hll.getClass().getName()); + } + } + if (kll != null) { + colStatistics.setHistogram(kll.toByteArray()); + } + + return colStatistics; + } + + static Stream joinFactorEstimateTestData() { + return Stream.of( + Arguments.of("SingleParent", 1000L, 1, 1100L), + Arguments.of("TwoParents", 1000L, 2, 1100L), + Arguments.of("ThreeParents", 1000L, 3, 2200L), + Arguments.of("Overflow", Long.MAX_VALUE, 3, Long.MAX_VALUE) + ); + } + + static Stream hasZeroNdvJoinKeyTestData() { + // Helper to create Statistics with given row count + java.util.function.Function statsWithRows = rows -> { + Statistics s = new Statistics(rows, 100L, 0L, 0L); + s.setColumnStatsState(Statistics.State.COMPLETE); + return s; + }; + + // Helper to create ColStatistics with given NDV + java.util.function.BiConsumer addColWithNdv = (stats, ndv) -> { + ColStatistics cs = new ColStatistics("col", "int"); + cs.setCountDistint(ndv); + stats.addToColumnStats(Collections.singletonList(cs)); + }; + + // Empty joinKeys + Map> emptyKeys = new HashMap<>(); + Map emptyStats = new HashMap<>(); + + // Single table with <=1 row (should skip) + Map> singleRowKeys = new HashMap<>(); + singleRowKeys.put(0, Arrays.asList("col")); + Map singleRowStats = new HashMap<>(); + Statistics singleRowStat = statsWithRows.apply(1L); + addColWithNdv.accept(singleRowStat, 0L); + singleRowStats.put(0, singleRowStat); + + // Table with rows but no zero NDV + Map> noZeroNdvKeys = new HashMap<>(); + noZeroNdvKeys.put(0, Arrays.asList("col")); + Map noZeroNdvStats = new HashMap<>(); + Statistics noZeroNdvStat = statsWithRows.apply(100L); + addColWithNdv.accept(noZeroNdvStat, 50L); + noZeroNdvStats.put(0, noZeroNdvStat); + + // Table with rows and zero NDV + Map> zeroNdvKeys = new HashMap<>(); + zeroNdvKeys.put(0, Arrays.asList("col")); + Map zeroNdvStats = new HashMap<>(); + Statistics zeroNdvStat = statsWithRows.apply(100L); + addColWithNdv.accept(zeroNdvStat, 0L); + zeroNdvStats.put(0, zeroNdvStat); + + // Column not found (null ColStatistics) + Map> nullColKeys = new HashMap<>(); + nullColKeys.put(0, Arrays.asList("nonexistent")); + Map nullColStats = new HashMap<>(); + nullColStats.put(0, statsWithRows.apply(100L)); + + // Two tables: first has non-zero NDV, second has zero NDV + Map> mixedKeys = new HashMap<>(); + mixedKeys.put(0, Arrays.asList("col")); + mixedKeys.put(1, Arrays.asList("col")); + Map mixedStats = new HashMap<>(); + Statistics mixedStat0 = statsWithRows.apply(100L); + addColWithNdv.accept(mixedStat0, 50L); + mixedStats.put(0, mixedStat0); + Statistics mixedStat1 = statsWithRows.apply(100L); + addColWithNdv.accept(mixedStat1, 0L); + mixedStats.put(1, mixedStat1); + + // Two tables: first has zero NDV, second has non-zero NDV + Map> firstZeroKeys = new HashMap<>(); + firstZeroKeys.put(0, Arrays.asList("col")); + firstZeroKeys.put(1, Arrays.asList("col")); + Map firstZeroStats = new HashMap<>(); + Statistics firstZeroStat0 = statsWithRows.apply(100L); + addColWithNdv.accept(firstZeroStat0, 0L); + firstZeroStats.put(0, firstZeroStat0); + Statistics firstZeroStat1 = statsWithRows.apply(100L); + addColWithNdv.accept(firstZeroStat1, 50L); + firstZeroStats.put(1, firstZeroStat1); + + // Three tables: first two have non-zero NDV, third has zero NDV + Map> threeTableKeys = new HashMap<>(); + threeTableKeys.put(0, Arrays.asList("col")); + threeTableKeys.put(1, Arrays.asList("col")); + threeTableKeys.put(2, Arrays.asList("col")); + Map threeTableStats = new HashMap<>(); + Statistics threeStat0 = statsWithRows.apply(100L); + addColWithNdv.accept(threeStat0, 50L); + threeTableStats.put(0, threeStat0); + Statistics threeStat1 = statsWithRows.apply(100L); + addColWithNdv.accept(threeStat1, 25L); + threeTableStats.put(1, threeStat1); + Statistics threeStat2 = statsWithRows.apply(100L); + addColWithNdv.accept(threeStat2, 0L); + threeTableStats.put(2, threeStat2); + + // Two tables: first has 1 row (skipped), second has zero NDV + Map> skipFirstKeys = new HashMap<>(); + skipFirstKeys.put(0, Arrays.asList("col")); + skipFirstKeys.put(1, Arrays.asList("col")); + Map skipFirstStats = new HashMap<>(); + Statistics skipStat0 = statsWithRows.apply(1L); + addColWithNdv.accept(skipStat0, 0L); + skipFirstStats.put(0, skipStat0); + Statistics skipStat1 = statsWithRows.apply(100L); + addColWithNdv.accept(skipStat1, 0L); + skipFirstStats.put(1, skipStat1); + + return Stream.of( + Arguments.of("EmptyJoinKeys", emptyKeys, emptyStats, false), + Arguments.of("SingleRowTable", singleRowKeys, singleRowStats, false), + Arguments.of("NoZeroNdv", noZeroNdvKeys, noZeroNdvStats, false), + Arguments.of("HasZeroNdv", zeroNdvKeys, zeroNdvStats, true), + Arguments.of("NullColStatistics", nullColKeys, nullColStats, false), + Arguments.of("TwoTablesFirstHasZero", firstZeroKeys, firstZeroStats, true), + Arguments.of("TwoTablesSecondHasZero", mixedKeys, mixedStats, true), + Arguments.of("ThreeTablesThirdHasZero", threeTableKeys, threeTableStats, true), + Arguments.of("FirstSkippedSecondHasZero", skipFirstKeys, skipFirstStats, true) + ); } } diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out index a3939ad5d3a2..1c3ed1cc54ab 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out @@ -151,19 +151,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,19 +380,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -628,18 +628,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -856,18 +856,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1103,19 +1103,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1332,19 +1332,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1580,18 +1580,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1808,18 +1808,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 1848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out index 73c48b0541e6..a5888fe4265c 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: p1 filterExpr: birthdate is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:1.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0 Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -85,13 +85,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out index c4aa752b1b6b..64c9b3e2355b 100644 --- a/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out +++ b/ql/src/test/results/clientpositive/llap/ndv_zero_join_selectivity.q.out @@ -143,14 +143,14 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110000002 Data size: 11220000204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110000002 Data size: 11220000204 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 110000002 Data size: 880000019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110000002 Data size: 11220000204 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index c6fa83e9a6e4..6de2c9fbf920 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -137,7 +137,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: bin is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:1.1 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:0.0 Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -170,7 +170,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 - Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 74988 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 @@ -179,7 +179,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [23] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 23:int - Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 74988 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -192,7 +192,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -200,7 +200,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -278,13 +278,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out index 49c9b0a2b635..8c00b44c3ae2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -190,7 +190,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -202,7 +202,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:date, 3:date - Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col3 (type: date) Reducer 3 Execution mode: vectorized, llap @@ -227,13 +227,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 9bf081ac1ebd..8307c418ffaa 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 17] - Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 995 Data size: 105523 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index 6090bcafe9a6..32c5a76385f7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -339,7 +339,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED input vertices: 1 Map 7 - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -349,10 +349,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -363,7 +363,7 @@ STAGE PLANS: keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -380,7 +380,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED input vertices: 0 Map 6 - Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -390,10 +390,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -404,7 +404,7 @@ STAGE PLANS: keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -517,7 +517,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 7 - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: timestamp) null sort order: z @@ -528,7 +528,7 @@ STAGE PLANS: keyColumns: 1:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -636,7 +636,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22 (type: int), TIMESTAMP'2008-04-30 00:00:00' (type: timestamp) outputColumnNames: _col0, _col1 @@ -645,7 +645,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2] selectExpressions: ConstantVectorExpression(val 22) -> 1:int, ConstantVectorExpression(val 2008-04-30 00:00:00) -> 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -654,7 +654,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:int, 2:timestamp - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: timestamp) Reducer 3 Execution mode: llap @@ -669,10 +669,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 141 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 141 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -707,7 +707,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22 (type: int), TIMESTAMP'2008-04-30 00:00:00' (type: timestamp) outputColumnNames: _col0, _col1 @@ -716,7 +716,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2] selectExpressions: ConstantVectorExpression(val 22) -> 1:int, ConstantVectorExpression(val 2008-04-30 00:00:00) -> 2:timestamp - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -725,7 +725,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:int, 2:timestamp - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: timestamp) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 557332cbe6cf..b6682d8667ff 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -761,17 +761,17 @@ STAGE PLANS: keys: 0 _col0 (type: date) 1 _col0 (type: date) - Statistics: Num rows: 110 Data size: 6160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -780,10 +780,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -910,17 +910,17 @@ STAGE PLANS: keys: 0 _col0 (type: timestamp) 1 _col0 (type: timestamp) - Statistics: Num rows: 110 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -929,10 +929,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index e31ef660eff0..e470e4ea7aa1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -18,8 +18,8 @@ HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[385][bigTable=?] in task 'Map 15' is a cross product -Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Map 14' is a cross product +Warning: Map Join MAPJOIN[385][bigTable=?] in task 'Map 13' is a cross product +Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Map 12' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out index 12bd29013ecd..1cb25790654e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product { "CBOPlan": { "rels": [ diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index 4ddd16adf023..9664e4762b89 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out index f87adc1999a3..2effa2e3433c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out @@ -202,13 +202,13 @@ STAGE PLANS: 0 _col0 (type: bigint), _col1 (type: date) 1 _col0 (type: bigint), _col1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: date) null sort order: az sort order: ++ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint) - Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(27,2)), _col3 (type: bigint), _col4 (type: date), _col5 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -216,7 +216,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: bigint), VALUE._col4 (type: date), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -242,25 +242,25 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 4460994065 Data size: 785134955614 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (max_window_0 > max_window_1) (type: boolean) - Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1486998021 Data size: 523423303392 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date) null sort order: zz - Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1486998021 Data size: 523423303392 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator expressions: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ - Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) Reducer 5 Execution mode: vectorized, llap @@ -268,13 +268,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: date), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1486998021 Data size: 261711651754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 17600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 17600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index 35a5cf030f3f..a4158d0ee7f9 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 8' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,17 +8,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 11 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 8 <- Map 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Map 9 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 1 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 6 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -38,7 +36,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 6 + 1 Map 3 Statistics: Num rows: 43005109025 Data size: 7556852441408 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -48,8 +46,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 47305620952 Data size: 8312537865718 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 47305620952 Data size: 5664627767248 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -58,80 +56,25 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 13 - Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE + 1 Map 11 + Statistics: Num rows: 47305620952 Data size: 10016744894832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9685119072 Data size: 2053245243264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 52036184175 Data size: 9143791850476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9685119072 Data size: 2053245243264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 6 - Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 8 - Statistics: Num rows: 23754102805 Data size: 4180389225463 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 13 - Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26129513651 Data size: 4598428247677 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 Map Operator Tree: TableScan alias: item @@ -163,7 +106,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 + Map 3 Map Operator Tree: TableScan alias: date_dim @@ -221,7 +164,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 9 + Target Vertex: Map 6 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -244,7 +187,7 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 11 + Target Vertex: Map 9 Filter Operator predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE @@ -276,7 +219,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 5 Map Operator Tree: TableScan alias: date_dim @@ -297,7 +240,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 7 + 1 Reducer 4 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -307,7 +250,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 6 + 1 Map 3 Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) @@ -335,7 +278,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 + Map 6 Map Operator Tree: TableScan alias: store_sales @@ -352,7 +295,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 6 + 1 Map 3 Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -362,8 +305,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 90761969900 Data size: 15734311200358 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 90761969900 Data size: 10673440481760 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -372,41 +315,80 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 13 - Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE + 1 Map 11 + Statistics: Num rows: 90761969900 Data size: 19023541712560 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18393755964 Data size: 3899476264368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 99838169053 Data size: 17307742695529 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18393755964 Data size: 3899476264368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 10 + Map 9 + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 23754102805 Data size: 2850189729064 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 11 + Statistics: Num rows: 23754102805 Data size: 5035567187124 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4869044604 Data size: 1032237456048 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4869044604 Data size: 1032237456048 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 49919084526 Data size: 8653871347677 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 49919084526 Data size: 8653871347677 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 12 + LLAP IO: may be used (ACID table) + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -414,17 +396,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 107920464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13064756825 Data size: 2299214123750 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 107920464 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) Reducer 2 Execution mode: vectorized, llap @@ -434,86 +416,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 26018092087 Data size: 4571895925150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26018092087 Data size: 4571895925150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 54910994168 Data size: 9519258688769 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) - Statistics: Num rows: 677913508 Data size: 117521712164 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 677913508 Data size: 117521712164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col3 (type: decimal(17,2)) Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: string) - 1 KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 - input vertices: - 1 Reducer 12 - Statistics: Num rows: 14371232818 Data size: 2529135590942 Basic stats: COMPLETE Column stats: NONE - DynamicPartitionHashJoin: true - Filter Operator - predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) - Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: decimal(17,2)) - null sort order: zz - Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) - null sort order: zz - sort order: ++ - Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 2190402 Data size: 385480057 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 17500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 17500 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -530,6 +441,74 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) + Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + input vertices: + 1 Reducer 10 + Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: decimal(17,2)) + null sort order: zz + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index 30f313668531..6eaa80a5a0fe 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,19 +7,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 10 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Map 10 <- Map 13 (BROADCAST_EDGE) - Map 14 <- Map 10 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE) - Map 3 <- Map 10 (BROADCAST_EDGE) - Map 5 <- Map 10 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 12 <- Map 10 (SIMPLE_EDGE) - Reducer 15 <- Map 14 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 12 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 3 <- Map 8 (BROADCAST_EDGE) + Map 5 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) + Map 8 <- Map 11 (BROADCAST_EDGE) + Reducer 10 <- Map 8 (SIMPLE_EDGE) + Reducer 13 <- Map 12 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -39,7 +37,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 10 + 1 Map 8 Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -50,7 +48,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 4753078211 Data size: 322828331180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4753078211 Data size: 56690586512 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -60,162 +58,24 @@ STAGE PLANS: outputColumnNames: _col1, _col6 input vertices: 1 Map 4 - Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4753078211 Data size: 493973781924 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 477721320 Data size: 51593902560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5228386145 Data size: 355111171994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 477721320 Data size: 51593902560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date (type: date), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 13 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: date) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.690705 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d_date is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: wr_returned_date_sk (bigint) - Target Input: web_returns - Partition key expr: wr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 14 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -242,7 +102,7 @@ STAGE PLANS: Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 12 Map Operator Tree: TableScan alias: web_returns @@ -259,7 +119,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 11 + 1 Reducer 9 Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -269,8 +129,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 10 - Statistics: Num rows: 2269082656 Data size: 154083758074 Basic stats: COMPLETE Column stats: NONE + 1 Map 8 + Statistics: Num rows: 2269082656 Data size: 27034571380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -280,20 +140,20 @@ STAGE PLANS: outputColumnNames: _col1, _col6 input vertices: 1 Map 4 - Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2269082656 Data size: 235790175732 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228217128 Data size: 24647449824 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2495990975 Data size: 169492137555 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 228217128 Data size: 24647449824 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -319,7 +179,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 10 + 1 Map 8 Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) @@ -384,7 +244,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 12 + 1 Reducer 10 Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -394,8 +254,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 10 - Statistics: Num rows: 9165855478 Data size: 622609811625 Basic stats: COMPLETE Column stats: NONE + 1 Map 8 + Statistics: Num rows: 9165855478 Data size: 109382664916 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -405,37 +265,162 @@ STAGE PLANS: outputColumnNames: _col1, _col6 input vertices: 1 Map 4 - Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9165855478 Data size: 952641368892 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 921284328 Data size: 99498707424 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10082441244 Data size: 684870807631 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 921284328 Data size: 99498707424 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 11 + Map 8 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 11 + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.690705 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: d_date is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: wr_returned_date_sk (bigint) + Target Input: web_returns + Partition key expr: wr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 12 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 5 Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Reducer 12 + LLAP IO: may be used (ACID table) + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -448,7 +433,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date) - Reducer 15 + Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -456,17 +441,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1247995487 Data size: 84746068743 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: double) Reducer 2 Execution mode: vectorized, llap @@ -476,13 +461,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2614193072 Data size: 177555585963 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2614193072 Data size: 177555585963 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 6 Execution mode: vectorized, llap @@ -492,82 +477,73 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5041220622 Data size: 342435403815 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5041220622 Data size: 342435403815 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: string) - 1 KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 5545342804 Data size: 376678952360 Basic stats: COMPLETE Column stats: NONE - DynamicPartitionHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5545342804 Data size: 376678952360 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col3 (type: bigint) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: string) - 1 KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6 - input vertices: - 1 Reducer 15 - Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE - DynamicPartitionHashJoin: true - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: bigint) - null sort order: zz - Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), (((UDFToDouble(_col3) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((UDFToDouble(_col1) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col5 (type: bigint), (((_col6 / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col3 + _col1) + _col5) AS decimal(19,0)) / 3) (type: decimal(25,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - null sort order: zz + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6 + input vertices: + 1 Reducer 13 + Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator sort order: ++ - Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 9 + keys: _col0 (type: string), _col3 (type: bigint) + null sort order: zz + Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), (((UDFToDouble(_col3) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((UDFToDouble(_col1) / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), _col5 (type: bigint), (((_col6 / UDFToDouble(((_col3 + _col1) + _col5))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col3 + _col1) + _col5) AS decimal(19,0)) / 3) (type: decimal(25,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: double), VALUE._col1 (type: bigint), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: decimal(25,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 6099877216 Data size: 414346856576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 6700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 26000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 6700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 26000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) Stage: Stage-0 Fetch Operator From 01b2998332fefdb1108cb54c55a9422eddb67201 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Tue, 31 Mar 2026 17:05:34 -0700 Subject: [PATCH 7/8] HIVE-29503; SonarQube feedback --- .../annotation/TestStatsRulesProcFactory.java | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java index 14a45b9a68f0..11b6e78b8e58 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java @@ -68,8 +68,8 @@ import java.util.stream.Stream; import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.FilterStatsRule.extractFloatFromLiteralValue; -import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.JoinStatsRule.computeJoinFactorEstimate; -import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.JoinStatsRule.hasZeroNdvJoinKey; + +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.JoinStatsRule; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -89,7 +89,7 @@ public class TestStatsRulesProcFactory { private final static long[] VALUES = { 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L, 6L, 7L }; @Test - public void testComparisonRowCountZeroNonNullValues() throws SemanticException { + void testComparisonRowCountZeroNonNullValues() throws SemanticException { long numNulls = 2; long[] values = {}; Statistics stats = createStatistics(values, numNulls); @@ -103,7 +103,7 @@ public void testComparisonRowCountZeroNonNullValues() throws SemanticException { } @Test - public void testComparisonRowCountInvalidKll() throws SemanticException { + void testComparisonRowCountInvalidKll() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); stats.getColumnStats().get(0).setHistogram(null); @@ -126,7 +126,7 @@ public void testComparisonRowCountInvalidKll() throws SemanticException { } @Test - public void testComparisonRowCountLessThan() throws SemanticException { + void testComparisonRowCountLessThan() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -139,7 +139,7 @@ public void testComparisonRowCountLessThan() throws SemanticException { } @Test - public void testComparisonRowCountLessThanMin() throws SemanticException { + void testComparisonRowCountLessThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -152,7 +152,7 @@ public void testComparisonRowCountLessThanMin() throws SemanticException { } @Test - public void testComparisonRowCountLessThanBelowMin() throws SemanticException { + void testComparisonRowCountLessThanBelowMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -165,7 +165,7 @@ public void testComparisonRowCountLessThanBelowMin() throws SemanticException { } @Test - public void testComparisonRowCountLessThanMax() throws SemanticException { + void testComparisonRowCountLessThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -178,7 +178,7 @@ public void testComparisonRowCountLessThanMax() throws SemanticException { } @Test - public void testComparisonRowCountLessThanAboveMax() throws SemanticException { + void testComparisonRowCountLessThanAboveMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -191,7 +191,7 @@ public void testComparisonRowCountLessThanAboveMax() throws SemanticException { } @Test - public void testComparisonRowCountEqualOrLessThan() throws SemanticException { + void testComparisonRowCountEqualOrLessThan() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -203,7 +203,7 @@ public void testComparisonRowCountEqualOrLessThan() throws SemanticException { } @Test - public void testComparisonRowCountEqualOrLessThanMin() throws SemanticException { + void testComparisonRowCountEqualOrLessThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -215,7 +215,7 @@ public void testComparisonRowCountEqualOrLessThanMin() throws SemanticException } @Test - public void testComparisonRowCountEqualOrLessThanBelowMin() throws SemanticException { + void testComparisonRowCountEqualOrLessThanBelowMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -227,7 +227,7 @@ public void testComparisonRowCountEqualOrLessThanBelowMin() throws SemanticExcep } @Test - public void testComparisonRowCountEqualOrLessThanMax() throws SemanticException { + void testComparisonRowCountEqualOrLessThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -239,7 +239,7 @@ public void testComparisonRowCountEqualOrLessThanMax() throws SemanticException } @Test - public void testComparisonRowCountEqualOrLessThanAboveMax() throws SemanticException { + void testComparisonRowCountEqualOrLessThanAboveMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -251,7 +251,7 @@ public void testComparisonRowCountEqualOrLessThanAboveMax() throws SemanticExcep } @Test - public void testComparisonRowCountGreaterThan() throws SemanticException { + void testComparisonRowCountGreaterThan() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -263,7 +263,7 @@ public void testComparisonRowCountGreaterThan() throws SemanticException { } @Test - public void testComparisonRowCountGreaterThanMin() throws SemanticException { + void testComparisonRowCountGreaterThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -275,7 +275,7 @@ public void testComparisonRowCountGreaterThanMin() throws SemanticException { } @Test - public void testComparisonRowCountGreaterThanBelowMin() throws SemanticException { + void testComparisonRowCountGreaterThanBelowMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -287,7 +287,7 @@ public void testComparisonRowCountGreaterThanBelowMin() throws SemanticException } @Test - public void testComparisonRowCountGreaterThanMax() throws SemanticException { + void testComparisonRowCountGreaterThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -299,7 +299,7 @@ public void testComparisonRowCountGreaterThanMax() throws SemanticException { } @Test - public void testComparisonRowCountGreaterThanAboveMax() throws SemanticException { + void testComparisonRowCountGreaterThanAboveMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -311,7 +311,7 @@ public void testComparisonRowCountGreaterThanAboveMax() throws SemanticException } @Test - public void testComparisonRowCountEqualOrGreaterThan() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThan() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -323,7 +323,7 @@ public void testComparisonRowCountEqualOrGreaterThan() throws SemanticException } @Test - public void testComparisonRowCountEqualOrGreaterThanMin() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -335,7 +335,7 @@ public void testComparisonRowCountEqualOrGreaterThanMin() throws SemanticExcepti } @Test - public void testComparisonRowCountEqualOrGreaterThanBelowMin() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThanBelowMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -347,7 +347,7 @@ public void testComparisonRowCountEqualOrGreaterThanBelowMin() throws SemanticEx } @Test - public void testComparisonRowCountEqualOrGreaterThanMax() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -359,7 +359,7 @@ public void testComparisonRowCountEqualOrGreaterThanMax() throws SemanticExcepti } @Test - public void testComparisonRowCountEqualOrGreaterThanBeyondMax() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThanBeyondMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, @@ -371,7 +371,7 @@ public void testComparisonRowCountEqualOrGreaterThanBeyondMax() throws SemanticE } @Test - public void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws SemanticException { + void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws SemanticException { long[] values = { 1L, 1L }; long numNulls = 2; Statistics stats = createStatistics(values, numNulls); @@ -385,7 +385,7 @@ public void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws Semant } @Test - public void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws SemanticException { + void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws SemanticException { long[] values = { 1L, 1L }; long numNulls = 2; Statistics stats = createStatistics(values, numNulls); @@ -399,7 +399,7 @@ public void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws Sem } @Test - public void testBetween() throws SemanticException { + void testBetween() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -413,7 +413,7 @@ public void testBetween() throws SemanticException { } @Test - public void testLiteralExtraction() { + void testLiteralExtraction() { final double DELTA = 1e-5; assertEquals((float) 100, @@ -443,7 +443,7 @@ public void testLiteralExtraction() { } @Test - public void testLiteralExtractionFailures() { + void testLiteralExtractionFailures() { // make sure the correct exceptions are raised so that we can default to standard computation String[] types = {"int", "tinyint", "smallint", "bigint", "date", "timestamp", "float", "double"}; for (String type : types) { @@ -460,7 +460,7 @@ public void testLiteralExtractionFailures() { } @Test - public void testBetweenLeftLowerThanMin() throws SemanticException { + void testBetweenLeftLowerThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -480,7 +480,7 @@ public void testBetweenLeftLowerThanMin() throws SemanticException { } @Test - public void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticException { + void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -494,7 +494,7 @@ public void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticExcep } @Test - public void testBetweenRightHigherThanMax() throws SemanticException { + void testBetweenRightHigherThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -514,7 +514,7 @@ public void testBetweenRightHigherThanMax() throws SemanticException { } @Test - public void testBetweenRightLowerThanMin() throws SemanticException { + void testBetweenRightLowerThanMin() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -528,7 +528,7 @@ public void testBetweenRightLowerThanMin() throws SemanticException { } @Test - public void testBetweenLeftHigherThanMax() throws SemanticException { + void testBetweenLeftHigherThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -542,7 +542,7 @@ public void testBetweenLeftHigherThanMax() throws SemanticException { } @Test - public void testBetweenLeftEqualMax() throws SemanticException { + void testBetweenLeftEqualMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -556,7 +556,7 @@ public void testBetweenLeftEqualMax() throws SemanticException { } @Test - public void testNotBetween() throws SemanticException { + void testNotBetween() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -581,7 +581,7 @@ public void testNotBetween() throws SemanticException { } @Test - public void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException { + void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -595,7 +595,7 @@ public void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException { } @Test - public void testNotBetweenLeftEqualsRight() throws SemanticException { + void testNotBetweenLeftEqualsRight() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -609,7 +609,7 @@ public void testNotBetweenLeftEqualsRight() throws SemanticException { } @Test - public void testNotBetweenRightLowerThanLeft() throws SemanticException { + void testNotBetweenRightLowerThanLeft() throws SemanticException { long numNulls = 2; Statistics stats = createStatistics(VALUES, numNulls); @@ -628,7 +628,7 @@ public void testNotBetweenRightLowerThanLeft() throws SemanticException { * Without the fix, valuesCount = numRows - (-1) = numRows + 1 (wrong). */ @Test - public void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws SemanticException { + void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws SemanticException { ColStatistics cs = new ColStatistics("_col0", "bigint"); HiveConf conf = new HiveConf(); @@ -663,7 +663,7 @@ public void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws Semanti } @Test - public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticException { + void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticException { ColStatistics cs = new ColStatistics("_col0", "bigint"); HiveConf conf = new HiveConf(); @@ -700,7 +700,7 @@ public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticE * Without the fix, LEFT_OUTER_JOIN would calculate: newNumNulls = oldNumNulls + leftUnmatchedRows = -1 + 100 = 99 */ @Test - public void testUpdateNumNullsPreservesUnknownNumNulls() { + void testUpdateNumNullsPreservesUnknownNumNulls() { StatsRulesProcFactory.JoinStatsRule joinStatsRule = new StatsRulesProcFactory.JoinStatsRule(); // Create ColStatistics with numNulls = -1 (unknown) @@ -737,14 +737,14 @@ public void testUpdateNumNullsPreservesUnknownNumNulls() { @MethodSource("joinFactorEstimateTestData") void testComputeJoinFactorEstimate(String scenario, long maxValue, int numParents, long expected) { HiveConf conf = new HiveConf(); - assertEquals(expected, computeJoinFactorEstimate(conf, maxValue, numParents)); + assertEquals(expected, JoinStatsRule.computeJoinFactorEstimate(conf, maxValue, numParents)); } @ParameterizedTest(name = "{0}") @MethodSource("hasZeroNdvJoinKeyTestData") void testHasZeroNdvJoinKey(String scenario, Map> joinKeys, Map joinStats, boolean expected) { - assertEquals(expected, hasZeroNdvJoinKey(joinKeys, joinStats)); + assertEquals(expected, JoinStatsRule.hasZeroNdvJoinKey(joinKeys, joinStats)); } @Test From c34c234566e20615ebfb4ae8645c1bdd4c029022 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 9 Apr 2026 08:46:52 -0700 Subject: [PATCH 8/8] HIVE-29503; .out files impacted after rebasing onto latest master --- .../llap/iceberg_bucket_map_join_1.q.out | 48 +- .../llap/bucket_map_join_tez3.q.out | 92 +-- .../clientpositive/llap/mapjoin_date.q.out | 2 +- .../llap/vector_full_outer_join_date.q.out | 8 +- .../llap/vector_interval_mapjoin.q.out | 6 +- .../llap/vector_outer_join_constants.q.out | 382 ++++++----- ...ctorized_dynamic_semijoin_reduction2.q.out | 8 +- .../perf/tpcds30tb/cte/cbo_query58.q.out | 51 +- .../perf/tpcds30tb/json/query58.q.out | 139 ++-- .../perf/tpcds30tb/tez/cbo_query58.q.out | 39 +- .../perf/tpcds30tb/tez/query51.q.out | 42 +- .../perf/tpcds30tb/tez/query58.q.out | 639 +++++++++++------- .../perf/tpcds30tb/tez/query83.q.out | 399 ++++++----- 13 files changed, 1089 insertions(+), 766 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index 890cfaf2705e..f8dfb22e5fa1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -111,9 +111,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=11 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=11 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -175,19 +175,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=11 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=11 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] PartitionCols:_col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -245,9 +245,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=11 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=11 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -309,19 +309,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=11 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=11 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -379,9 +379,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=11 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=11 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_49] @@ -443,19 +443,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=11 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=11 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_56] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -513,9 +513,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=11 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=11 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_49] @@ -577,19 +577,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=11 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=11 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out index 1c3ed1cc54ab..56b18f1c9766 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out @@ -151,19 +151,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,19 +380,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -467,10 +467,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -478,7 +478,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: llap @@ -533,7 +533,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -541,7 +541,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -628,18 +628,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -848,7 +848,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -856,18 +856,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -942,10 +942,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -953,7 +953,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -1008,7 +1008,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1016,7 +1016,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true @@ -1103,19 +1103,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1332,19 +1332,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1419,10 +1419,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1430,7 +1430,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: vectorized, llap @@ -1485,7 +1485,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1493,7 +1493,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -1580,18 +1580,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1800,7 +1800,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1808,18 +1808,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 3696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 5720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1894,10 +1894,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1905,7 +1905,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: vectorized, llap @@ -1960,7 +1960,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1968,7 +1968,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out index a5888fe4265c..c426f13591b6 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: p1 filterExpr: birthdate is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out index 8c00b44c3ae2..b585e0adef4f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -190,7 +190,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -202,7 +202,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:date, 3:date - Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col3 (type: date) Reducer 3 Execution mode: vectorized, llap @@ -227,13 +227,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 8307c418ffaa..2bbf69ab5033 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 17] - Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 995 Data size: 199000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index 32c5a76385f7..9ea03dbdcca3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@lday POSTHOOK: Output: default@lday #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select * from (select item1.S_ID S_ID, @@ -272,48 +272,112 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) - Map 6 <- Map 7 (BROADCAST_EDGE) + Map 3 <- Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Map 7 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: od1 - filterExpr: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + alias: item1 + filterExpr: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) - predicate: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) + predicate: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int), o_date (type: timestamp) + expressions: id (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: id:int, s_id:int, name:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: lday2 + filterExpr: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) + predicate: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: timestamp), ly_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: timestamp) + 1 _col0 (type: timestamp) Map Join Vectorization: - bigTableKeyColumns: 0:int + bigTableKeyColumns: 0:timestamp bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -321,14 +385,14 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: timestamp) - 1 _col0 (type: timestamp) + 1 _col1 (type: timestamp) Map Join Vectorization: bigTableKeyColumns: 1:timestamp bigTableRetainColumnNums: [] @@ -364,47 +428,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: timestamp) - 1 _col1 (type: timestamp) - Map Join Vectorization: - bigTableKeyColumns: 1:timestamp - bigTableRetainColumnNums: [] - className: VectorMapJoinInnerBigOnlyMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - hashTableImplementationType: OPTIMIZED - input vertices: - 0 Map 6 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 1) -> 5:boolean - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: true (type: boolean) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:boolean - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -419,44 +442,55 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, o_date:timestamp + dataColumns: d_date:timestamp, ly_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Map 5 + scratchColumnTypeNames: [bigint] + Map 6 Map Operator Tree: TableScan - alias: item1 - filterExpr: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + alias: ytday2 + filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) - predicate: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) + predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: ytd_date (type: timestamp) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: timestamp) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: timestamp) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -469,46 +503,46 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 3 + dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, s_id:int, name:string + dataColumns: d_date:timestamp, ytd_date:timestamp partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 6 + Map 7 Map Operator Tree: TableScan - alias: lday2 - filterExpr: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + alias: od2 + filterExpr: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) - predicate: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date (type: timestamp), ly_date (type: timestamp) + expressions: id (type: int), o_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: timestamp) - 1 _col0 (type: timestamp) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumns: 0:timestamp + bigTableKeyColumns: 0:int bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyMultiKeyOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -516,7 +550,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 7 + 1 Map 1 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: timestamp) @@ -529,6 +563,67 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: timestamp) + 1 _col0 (type: timestamp) + Map Join Vectorization: + bigTableKeyColumns: 1:timestamp + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED + input vertices: + 1 Reducer 2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: true (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:boolean + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -537,79 +632,50 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: d_date:timestamp, ly_date:timestamp + dataColumns: id:int, o_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 7 - Map Operator Tree: - TableScan - alias: ytday2 - filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) - predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ytd_date (type: timestamp) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: + Reduce Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: d_date:timestamp, ytd_date:timestamp + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -656,7 +722,7 @@ STAGE PLANS: valueColumns: 1:int, 2:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: timestamp) - Reducer 3 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -680,7 +746,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -734,7 +800,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select * from (select item1.S_ID S_ID, ytday1.D_DATE D_DATE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index b6682d8667ff..5bbdded348a2 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -761,10 +761,10 @@ STAGE PLANS: keys: 0 _col0 (type: date) 1 _col0 (type: date) - Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -910,10 +910,10 @@ STAGE PLANS: keys: 0 _col0 (type: timestamp) 1 _col0 (type: timestamp) - Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index e470e4ea7aa1..964a40da2c92 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -1,25 +1,24 @@ CTE Suggestion: HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveFilter(condition=[sq_count_check($0)]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CTE Suggestion: HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[385][bigTable=?] in task 'Map 13' is a cross product -Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Map 12' is a cross product +Warning: Map Join MAPJOIN[393][bigTable=?] in task 'Reducer 11' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -38,19 +37,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), <=(*(0.9:DECIMAL(1, 1), $1), $3), <=($3, *(1.1:DECIMAL(2, 1), $1)), <=(*(0.9:DECIMAL(1, 1), $3), $1), <=($1, *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -65,19 +64,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -91,19 +90,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[cte, cte_suggestion_1]], table:alias=[cte_suggestion_1]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out index 1cb25790654e..ff9ac3ce98d7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product { "CBOPlan": { "rels": [ @@ -720,7 +721,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1143,7 +1144,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1555,7 +1556,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1793,25 +1794,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "16", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "9", - "15" - ], - "rowCount": 59169.69 - }, - { - "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1861,7 +1843,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "18", + "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -1874,6 +1856,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "18", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "15", + "17" + ], + "rowCount": 9861.615 + }, { "id": "19", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -1898,7 +1899,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "16", + "9", "18" ], "rowCount": 8.75263053674025E7 @@ -2136,13 +2137,13 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -3116,25 +3117,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "40", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "34", - "39" - ], - "rowCount": 59169.69 - }, - { - "id": "41", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -3184,7 +3166,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "42", + "id": "41", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -3197,6 +3179,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "42", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "39", + "41" + ], + "rowCount": 9861.615 + }, { "id": "43", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -3221,7 +3222,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "40", + "34", "42" ], "rowCount": 8.75263053674025E7 @@ -3979,25 +3980,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "63", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "57", - "62" - ], - "rowCount": 59169.69 - }, - { - "id": "64", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -4047,7 +4029,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "65", + "id": "64", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -4060,6 +4042,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "65", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "62", + "64" + ], + "rowCount": 9861.615 + }, { "id": "66", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -4084,7 +4085,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "63", + "57", "65" ], "rowCount": 8.75263053674025E7 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index 9664e4762b89..de98d243fa41 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -16,19 +17,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -45,19 +46,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -73,19 +74,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out index 2effa2e3433c..5091dfb25a49 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out @@ -42,13 +42,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -78,13 +78,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -180,17 +180,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Reducer 3 Execution mode: llap @@ -202,13 +202,13 @@ STAGE PLANS: 0 _col0 (type: bigint), _col1 (type: date) 1 _col0 (type: bigint), _col1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: date) null sort order: az sort order: ++ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint) - Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(27,2)), _col3 (type: bigint), _col4 (type: date), _col5 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -216,7 +216,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: bigint), VALUE._col4 (type: date), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -242,25 +242,25 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 4460994065 Data size: 1570269910880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (max_window_0 > max_window_1) (type: boolean) - Statistics: Num rows: 1486998021 Data size: 523423303392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date) null sort order: zz - Statistics: Num rows: 1486998021 Data size: 523423303392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator expressions: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ - Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) Reducer 5 Execution mode: vectorized, llap @@ -268,7 +268,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: date), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1486998021 Data size: 761342986752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE @@ -287,7 +287,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -310,17 +310,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index a4158d0ee7f9..31dc5e2d3eca 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,15 +9,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 6 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 9 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 10 <- Map 9 (SIMPLE_EDGE) + Map 1 <- Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 10 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 13 <- Reducer 6 (BROADCAST_EDGE) + Map 15 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 3 <- Reducer 14 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 7 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,8 +53,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 5 - Statistics: Num rows: 47305620952 Data size: 5664627767248 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 8 + Statistics: Num rows: 3532295 Data size: 28258472 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -56,25 +63,197 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 11 - Statistics: Num rows: 47305620952 Data size: 10016744894832 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 3532295 Data size: 353229612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.92992544 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 9685119072 Data size: 2053245243264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9685119072 Data size: 2053245243264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 6777167 Data size: 54217448 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 6777167 Data size: 677716812 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.9634768 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 + input vertices: + 0 Reducer 6 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 1773711 Data size: 14189800 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 1773711 Data size: 177371212 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.86044854 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 17 Map Operator Tree: TableScan alias: item @@ -110,10 +289,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null or ((d_date = DATE'1998-02-19') and d_week_seq is not null) or (d_date = DATE'1998-02-19')) (type: boolean) + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: d_date is not null (type: boolean) + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) @@ -142,6 +321,13 @@ STAGE PLANS: Partition key expr: cs_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 1 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -164,7 +350,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 6 + Target Vertex: Map 10 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -187,28 +373,23 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 9 - Filter Operator - predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 15 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_date = DATE'1998-02-19') or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_date = DATE'1998-02-19') (type: boolean) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -217,13 +398,30 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 8 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -236,159 +434,139 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 input vertices: - 1 Reducer 4 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 Reducer 5 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 90761969900 Data size: 10673440481760 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 90761969900 Data size: 19023541712560 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 18393755964 Data size: 3899476264368 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18393755964 Data size: 3899476264368 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Reducer 11 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) + Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Map 3 - Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 23754102805 Data size: 2850189729064 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 23754102805 Data size: 5035567187124 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4869044604 Data size: 1032237456048 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 16 + Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: decimal(17,2)) + null sort order: zz + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4869044604 Data size: 1032237456048 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) + Reducer 12 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 10 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 14 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 16 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -424,7 +602,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -437,78 +615,75 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 1 Reducer 7 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) - Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: sq_count_check(_col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + 0 + 1 + outputColumnNames: _col1 input vertices: - 1 Reducer 10 - Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: decimal(17,2)) - null sort order: zz - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 8 + 1 Map 8 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index 6eaa80a5a0fe..c25dea04819d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,17 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 12 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 3 <- Map 8 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Map 8 <- Map 11 (BROADCAST_EDGE) - Reducer 10 <- Map 8 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) + Map 1 <- Map 15 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 13 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 15 <- Reducer 10 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 3 <- Map 9 (BROADCAST_EDGE) + Map 6 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -37,7 +39,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 8 + 1 Map 15 Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -48,7 +50,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 4753078211 Data size: 56690586512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1183036 Data size: 9464292 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -57,21 +59,21 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 4753078211 Data size: 493973781924 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 1183036 Data size: 118303604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.7907722 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 477721320 Data size: 51593902560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 477721320 Data size: 51593902560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -83,26 +85,32 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 12 + Map 13 Map Operator Tree: TableScan alias: web_returns @@ -119,7 +127,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 9 + 1 Map 15 Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -129,8 +137,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 2269082656 Data size: 27034571380 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 564772 Data size: 4518180 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -139,30 +147,121 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 2269082656 Data size: 235790175732 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 564772 Data size: 56477204 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5617275 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 228217128 Data size: 24647449824 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 228217128 Data size: 24647449824 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: wr_returned_date_sk (bigint) + Target Input: web_returns + Partition key expr: wr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 6 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 3 Map Operator Tree: TableScan alias: date_dim filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:2.7378882667798324E-4 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -179,23 +278,38 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 8 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: item @@ -227,7 +341,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -244,7 +358,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 10 + 1 Map 15 Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -254,8 +368,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 9165855478 Data size: 109382664916 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 2281371 Data size: 18250972 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -264,29 +378,29 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 9165855478 Data size: 952641368892 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 2281371 Data size: 228137104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.8915021 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 921284328 Data size: 99498707424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 921284328 Data size: 99498707424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -303,137 +417,104 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 11 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 12 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 11 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d_date is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: wr_returned_date_sk (bigint) - Target Input: web_returns - Partition key expr: wr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 12 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Reducer 13 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -469,7 +550,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -496,7 +590,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5, _col6 input vertices: - 1 Reducer 13 + 1 Reducer 14 Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ @@ -514,7 +608,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -531,19 +625,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) Stage: Stage-0 Fetch Operator