diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index d2948cb7057a..6ffc21a8a497 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -28,6 +28,7 @@ import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -1195,7 +1196,7 @@ private static boolean areMergeableExtendedCheck(ParseContext pctx, SharedWorkOp Operator op = dppsOp1.get(i); if (op instanceof ReduceSinkOperator) { Set> ascendants = - findAscendantWorkOperators(pctx, optimizerCache, op); + findAscendantOperators(optimizerCache, op); if (ascendants.contains(tsOp2)) { // This should not happen, we cannot merge return false; @@ -1206,7 +1207,7 @@ private static boolean areMergeableExtendedCheck(ParseContext pctx, SharedWorkOp Operator op = dppsOp2.get(i); if (op instanceof ReduceSinkOperator) { Set> ascendants = - findAscendantWorkOperators(pctx, optimizerCache, op); + findAscendantOperators(optimizerCache, op); if (ascendants.contains(tsOp1)) { // This should not happen, we cannot merge return false; @@ -1633,8 +1634,7 @@ private static Set> gatherDPPBranchOps(ParseContext pctx, Collection> c = optimizerCache.tableScanToDPPSource .get((TableScanOperator) op); for (Operator dppSource : c) { - Set> ascendants = - findAscendantWorkOperators(pctx, optimizerCache, dppSource); + Set> ascendants = findAscendantOperators(optimizerCache, dppSource); if (!Collections.disjoint(ascendants, discardedOps)) { // Remove branch removeBranch(dppSource, dppBranches, ops, optimizerCache); @@ -1938,97 +1938,28 @@ public boolean accept(Operator s, Operator t, OpEdge opEdge) { } - private static Set> findParentWorkOperators(ParseContext pctx, - SharedWorkOptimizerCache optimizerCache, Operator start) { - return findParentWorkOperators(pctx, optimizerCache, start, ImmutableSet.of()); - } - - private static Set> findParentWorkOperators(ParseContext pctx, - SharedWorkOptimizerCache optimizerCache, Operator start, - Set> excludeOps) { - // Find operators in work - Set> workOps = findWorkOperators(optimizerCache, start); - // Gather input works operators - Set> set = new HashSet>(); - for (Operator op : workOps) { - if (op.getParentOperators() != null) { - for (Operator parent : op.getParentOperators()) { - if (parent instanceof ReduceSinkOperator && !excludeOps.contains(parent)) { - set.addAll(findWorkOperators(optimizerCache, parent)); - } - } - } - if (op instanceof TableScanOperator) { - // Check for DPP and semijoin DPP - for (Operator parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) { - if (!excludeOps.contains(parent)) { - set.addAll(findWorkOperators(optimizerCache, parent)); - } - } - } - } - return set; - } - - private static Set> findAscendantWorkOperators(ParseContext pctx, - SharedWorkOptimizerCache optimizerCache, Operator start) { - // Find operators in work - Set> workOps = findWorkOperators(optimizerCache, start); - // Gather input works operators - Set> result = new HashSet>(); - Set> set; - while (!workOps.isEmpty()) { - set = new HashSet>(); - for (Operator op : workOps) { + private static Set> findAscendantOperators(SharedWorkOptimizerCache optimizerCache, + Operator start) { + Set> visited = new HashSet<>(); + visited.add(start); + + // Gather input operators + Queue> remaining = new LinkedList<>(start.getParentOperators()); + while (!remaining.isEmpty()) { + Operator op = remaining.poll(); + if (!visited.contains(op)) { + visited.add(op); if (op.getParentOperators() != null) { - for (Operator parent : op.getParentOperators()) { - if (parent instanceof ReduceSinkOperator) { - set.addAll(findWorkOperators(optimizerCache, parent)); - } - } - } else if (op instanceof TableScanOperator) { + remaining.addAll(op.getParentOperators()); + } + if (op instanceof TableScanOperator) { // Check for DPP and semijoin DPP - for (Operator parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) { - set.addAll(findWorkOperators(optimizerCache, parent)); - } + remaining.addAll(optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)); } } - workOps = set; - result.addAll(set); } - return result; - } - private static Set> findChildWorkOperators(ParseContext pctx, - SharedWorkOptimizerCache optimizerCache, Operator start, boolean traverseEventOperators) { - // Find operators in work - Set> workOps = findWorkOperators(optimizerCache, start); - // Gather output works operators - Set> set = new HashSet>(); - for (Operator op : workOps) { - if (op instanceof ReduceSinkOperator) { - if (op.getChildOperators() != null) { - // All children of RS are descendants - for (Operator child : op.getChildOperators()) { - set.addAll(findWorkOperators(optimizerCache, child)); - } - } - // Semijoin DPP work is considered a child because work needs - // to finish for it to execute - SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op); - if (sjbi != null) { - set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp())); - } - } else if(op.getConf() instanceof DynamicPruningEventDesc) { - // DPP work is considered a child because work needs - // to finish for it to execute - if (traverseEventOperators) { - set.addAll(findWorkOperators( - optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan())); - } - } - } - return set; + return visited; } private static Set> findDescendantWorkOperators(ParseContext pctx, diff --git a/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q b/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q new file mode 100644 index 000000000000..1988b8b220d9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sharedwork_dpp_removal_hive_28490.q @@ -0,0 +1,79 @@ +create table x2_date_dim (d_date_sk bigint, d_week_seq string, d_date string); +create table x2_item (i_item_sk bigint, i_item_id string); +create table x2_store_returns +(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int, sr_some_field string, sr_other_field string); +create table x2_catalog_returns +(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int, cr_some_field string, cr_other_field string); + +alter table x2_date_dim update statistics set('numRows'='35', 'rawDataSize'='81449'); +alter table x2_item update statistics set('numRows'='12345', 'rawDataSize'='123456'); +alter table x2_store_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567'); +alter table x2_catalog_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567'); + +set hive.auto.convert.join=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.tez.bigtable.minsize.semijoin.reduction=30; -- This should be less than numRows of x2_date_dim +set hive.tez.dynamic.semijoin.reduction.threshold=0.0; -- In order not to remove any SemiJoin branch +set hive.tez.dynamic.semijoin.reduction.for.mapjoin=true; -- In order not to remove any SemiJoin branch + +-- To check whether the original query plan contains the following pattern: +-- date_dim ─┐ +-- date_dim ─┴ MapJoin ─(DPP)─ date_dim ─ (... catalog_returns) +-- date_dim ─┐ +-- date_dim ─┴ MapJoin ─(DPP)─ date_dim ─ (... store_returns) + +set hive.optimize.shared.work=false; +explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id; + +set hive.optimize.shared.work=true; +explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id; diff --git a/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out b/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out new file mode 100644 index 000000000000..000ccd2f8da9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/sharedwork_dpp_removal_hive_28490.q.out @@ -0,0 +1,1433 @@ +PREHOOK: query: create table x2_date_dim (d_date_sk bigint, d_week_seq string, d_date string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x2_date_dim +POSTHOOK: query: create table x2_date_dim (d_date_sk bigint, d_week_seq string, d_date string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x2_date_dim +PREHOOK: query: create table x2_item (i_item_sk bigint, i_item_id string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x2_item +POSTHOOK: query: create table x2_item (i_item_sk bigint, i_item_id string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x2_item +PREHOOK: query: create table x2_store_returns +(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int, sr_some_field string, sr_other_field string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x2_store_returns +POSTHOOK: query: create table x2_store_returns +(sr_returned_date_sk bigint, sr_item_sk bigint, sr_return_quantity int, sr_some_field string, sr_other_field string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x2_store_returns +PREHOOK: query: create table x2_catalog_returns +(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int, cr_some_field string, cr_other_field string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x2_catalog_returns +POSTHOOK: query: create table x2_catalog_returns +(cr_returned_date_sk bigint, cr_item_sk bigint, cr_return_quantity int, cr_some_field string, cr_other_field string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x2_catalog_returns +PREHOOK: query: alter table x2_date_dim update statistics set('numRows'='35', 'rawDataSize'='81449') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@x2_date_dim +PREHOOK: Output: default@x2_date_dim +POSTHOOK: query: alter table x2_date_dim update statistics set('numRows'='35', 'rawDataSize'='81449') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@x2_date_dim +POSTHOOK: Output: default@x2_date_dim +PREHOOK: query: alter table x2_item update statistics set('numRows'='12345', 'rawDataSize'='123456') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@x2_item +PREHOOK: Output: default@x2_item +POSTHOOK: query: alter table x2_item update statistics set('numRows'='12345', 'rawDataSize'='123456') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@x2_item +POSTHOOK: Output: default@x2_item +PREHOOK: query: alter table x2_store_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@x2_store_returns +PREHOOK: Output: default@x2_store_returns +POSTHOOK: query: alter table x2_store_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@x2_store_returns +POSTHOOK: Output: default@x2_store_returns +PREHOOK: query: alter table x2_catalog_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@x2_catalog_returns +PREHOOK: Output: default@x2_catalog_returns +POSTHOOK: query: alter table x2_catalog_returns update statistics set('numRows'='123456', 'rawDataSize'='1234567') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@x2_catalog_returns +POSTHOOK: Output: default@x2_catalog_returns +PREHOOK: query: explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id +PREHOOK: type: QUERY +PREHOOK: Input: default@x2_catalog_returns +PREHOOK: Input: default@x2_date_dim +PREHOOK: Input: default@x2_item +PREHOOK: Input: default@x2_store_returns +#### A masked pattern was here #### +POSTHOOK: query: explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x2_catalog_returns +POSTHOOK: Input: default@x2_date_dim +POSTHOOK: Input: default@x2_item +POSTHOOK: Input: default@x2_store_returns +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Map 11 <- Map 14 (BROADCAST_EDGE), Map 16 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) + Map 14 <- Reducer 19 (BROADCAST_EDGE) + Map 18 <- Map 20 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) + Map 3 <- Reducer 8 (BROADCAST_EDGE) + Map 5 <- Reducer 13 (BROADCAST_EDGE) + Map 7 <- Map 9 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) + Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) + Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE) + Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x2_catalog_returns + filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null and cr_returned_date_sk BETWEEN DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN DynamicValue(RS_13_x2_item_i_item_sk_min) AND DynamicValue(RS_13_x2_item_i_item_sk_max) and in_bloom_filter(cr_returned_date_sk, DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_sk_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_215_container, bigKeyColName:cr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709 + Statistics: Num rows: 123456 Data size: 2345700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cr_item_sk is not null and cr_returned_date_sk is not null and cr_returned_date_sk BETWEEN DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN DynamicValue(RS_13_x2_item_i_item_sk_min) AND DynamicValue(RS_13_x2_item_i_item_sk_max) and in_bloom_filter(cr_returned_date_sk, DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cr_returned_date_sk (type: bigint), cr_item_sk (type: bigint), cr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 122223 Data size: 2322276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col4, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col6 (type: string), _col4 (type: string) + outputColumnNames: _col2, _col4, _col6 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col6 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + input vertices: + 1 Map 7 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: x2_store_returns + filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null and sr_returned_date_sk BETWEEN DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN DynamicValue(RS_51_x2_item_i_item_sk_min) AND DynamicValue(RS_51_x2_item_i_item_sk_max) and in_bloom_filter(sr_returned_date_sk, DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_sk_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_218_container, bigKeyColName:sr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709 + Statistics: Num rows: 123456 Data size: 2345700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_returned_date_sk is not null and sr_returned_date_sk BETWEEN DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN DynamicValue(RS_51_x2_item_i_item_sk_min) AND DynamicValue(RS_51_x2_item_i_item_sk_max) and in_bloom_filter(sr_returned_date_sk, DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: bigint), sr_item_sk (type: bigint), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Map 14 + Statistics: Num rows: 122223 Data size: 2322276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col4, _col6 + input vertices: + 1 Map 16 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col6 (type: string), _col4 (type: string) + outputColumnNames: _col2, _col4, _col6 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col6 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + input vertices: + 1 Map 18 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 14 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 35 Data size: 6720 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 16 + Map Operator Tree: + TableScan + alias: x2_item + filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 12345 Data size: 2251968 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 18 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_63_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_63_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_63_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_220_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:1.0285714285714285 + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_63_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_63_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_63_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 20 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 20 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_31_x2_date_dim_d_date_min) AND DynamicValue(RS_31_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_31_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 35 Data size: 6720 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_31_x2_date_dim_d_date_min) AND DynamicValue(RS_31_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_31_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: x2_item + filterExpr: (i_item_sk is not null and i_item_id is not null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean) + Statistics: Num rows: 12345 Data size: 2251968 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_item_id is not null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_217_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:1.0285714285714285 + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 9 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 9 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 12 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 13 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 15 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 17 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 19 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 12 + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 21 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id +PREHOOK: type: QUERY +PREHOOK: Input: default@x2_catalog_returns +PREHOOK: Input: default@x2_date_dim +PREHOOK: Input: default@x2_item +PREHOOK: Input: default@x2_store_returns +#### A masked pattern was here #### +POSTHOOK: query: explain +with sr_items as ( + select i_item_id item_id, sum(sr_return_quantity) sr_item_qty + from x2_store_returns, x2_item, x2_date_dim + where + sr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + sr_returned_date_sk = d_date_sk group by i_item_id +), +cr_items as ( + select i_item_id item_id, sum(cr_return_quantity) cr_item_qty + from x2_catalog_returns, x2_item, x2_date_dim + where + cr_item_sk = i_item_sk and + d_date in ( + select d_date from x2_date_dim + where d_week_seq in ( + select d_week_seq from x2_date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and + cr_returned_date_sk = d_date_sk group by i_item_id +) +select sr_items.item_id, sr_item_qty, cr_item_qty +from sr_items, cr_items +where sr_items.item_id=cr_items.item_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x2_catalog_returns +POSTHOOK: Input: default@x2_date_dim +POSTHOOK: Input: default@x2_item +POSTHOOK: Input: default@x2_store_returns +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 12 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 12 <- Reducer 6 (BROADCAST_EDGE) + Map 3 <- Reducer 11 (BROADCAST_EDGE) + Map 5 <- Map 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Map 9 <- Map 12 (BROADCAST_EDGE), Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) + Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) + Reducer 14 <- Map 12 (CUSTOM_SIMPLE_EDGE) + Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x2_catalog_returns + filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null and cr_returned_date_sk BETWEEN DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN DynamicValue(RS_13_x2_item_i_item_sk_min) AND DynamicValue(RS_13_x2_item_i_item_sk_max) and in_bloom_filter(cr_returned_date_sk, DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_sk_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_215_container, bigKeyColName:cr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709 + Statistics: Num rows: 123456 Data size: 2345700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cr_item_sk is not null and cr_returned_date_sk is not null and cr_returned_date_sk BETWEEN DynamicValue(RS_10_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_10_x2_date_dim_d_date_sk_max) and cr_item_sk BETWEEN DynamicValue(RS_13_x2_item_i_item_sk_min) AND DynamicValue(RS_13_x2_item_i_item_sk_max) and in_bloom_filter(cr_returned_date_sk, DynamicValue(RS_10_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(cr_item_sk, DynamicValue(RS_13_x2_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cr_returned_date_sk (type: bigint), cr_item_sk (type: bigint), cr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Map 12 + Statistics: Num rows: 122223 Data size: 2322276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col4, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col6 (type: string), _col4 (type: string) + outputColumnNames: _col2, _col4, _col6 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col6 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + input vertices: + 1 Map 5 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 12 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 35 Data size: 6720 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_date is not null and d_date BETWEEN DynamicValue(RS_69_x2_date_dim_d_date_min) AND DynamicValue(RS_69_x2_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_69_x2_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 6336 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 15 + Map Operator Tree: + TableScan + alias: x2_item + filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 12345 Data size: 2251968 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: x2_item + filterExpr: (i_item_sk is not null and i_item_id is not null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean) + Statistics: Num rows: 12345 Data size: 2251968 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_item_id is not null and i_item_id BETWEEN DynamicValue(RS_77_x2_item_i_item_id_min) AND DynamicValue(RS_77_x2_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_77_x2_item_i_item_id_bloom_filter))) (type: boolean) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11111 Data size: 2026862 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_25_x2_date_dim_d_week_seq_min) AND DynamicValue(RS_25_x2_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_25_x2_date_dim_d_week_seq_bloom_filter))) (type: boolean) + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 12144 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 7 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 36 Data size: 13358 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: x2_date_dim + filterExpr: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 35 Data size: 12880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 5152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 9 + Map Operator Tree: + TableScan + alias: x2_store_returns + filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null and sr_returned_date_sk BETWEEN DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN DynamicValue(RS_51_x2_item_i_item_sk_min) AND DynamicValue(RS_51_x2_item_i_item_sk_max) and in_bloom_filter(sr_returned_date_sk, DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_sk_bloom_filter))) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_218_container, bigKeyColName:sr_returned_date_sk, smallTablePos:1, keyRatio:0.9900126360808709 + Statistics: Num rows: 123456 Data size: 2345700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_returned_date_sk is not null and sr_returned_date_sk BETWEEN DynamicValue(RS_48_x2_date_dim_d_date_sk_min) AND DynamicValue(RS_48_x2_date_dim_d_date_sk_max) and sr_item_sk BETWEEN DynamicValue(RS_51_x2_item_i_item_sk_min) AND DynamicValue(RS_51_x2_item_i_item_sk_max) and in_bloom_filter(sr_returned_date_sk, DynamicValue(RS_48_x2_date_dim_d_date_sk_bloom_filter)) and in_bloom_filter(sr_item_sk, DynamicValue(RS_51_x2_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: bigint), sr_item_sk (type: bigint), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 111112 Data size: 2111160 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Map 12 + Statistics: Num rows: 122223 Data size: 2322276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col4, _col6 + input vertices: + 1 Map 15 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col6 (type: string), _col4 (type: string) + outputColumnNames: _col2, _col4, _col6 + Statistics: Num rows: 134445 Data size: 2554503 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col6 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + input vertices: + 1 Map 5 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 147889 Data size: 2809953 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 11 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 13 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 14 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 16 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73944 Data size: 1404966 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 10 + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 81338 Data size: 1545462 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +