Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-24167: Compilation fails due to equivalence mapping violation when CTE materialization is enabled #5452

Merged
merged 6 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
Expand Down Expand Up @@ -100,7 +99,6 @@
import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer;
import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication;
Expand All @@ -117,7 +115,6 @@
import org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
Expand All @@ -139,7 +136,6 @@
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
Expand Down Expand Up @@ -991,35 +987,6 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
ogw.startWalking(topNodes, null);
}

private static class CollectAll implements SemanticNodeProcessor {
private PlanMapper planMapper;

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
planMapper = pCtx.getContext().getPlanMapper();
FilterOperator fop = (FilterOperator) nd;
OpTreeSignature sig = planMapper.getSignatureOf(fop);
List<EquivGroup> ar = getGroups(planMapper, HiveFilter.class);


return nd;
}

private List<EquivGroup> getGroups(PlanMapper planMapper2, Class<HiveFilter> class1) {
Iterator<EquivGroup> it = planMapper.iterateGroups();
List<EquivGroup> ret = new ArrayList<PlanMapper.EquivGroup>();
while (it.hasNext()) {
EquivGroup g = it.next();
if (g.getAll(class1).size() > 0) {
ret.add(g);
}
}
return ret;
}
}

private static class MarkRuntimeStatsAsIncorrect implements SemanticNodeProcessor {

private PlanMapper planMapper;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Enables to connect related objects to eachother.
*
* Most importantly it aids to connect Operators to OperatorStats and probably RelNodes.
*/
public class PlanMapper {
private static final Logger LOG = LoggerFactory.getLogger(PlanMapper.class);

Set<EquivGroup> groups = new HashSet<>();
private Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
Expand Down Expand Up @@ -217,7 +221,9 @@ private void link(Object o1, Object o2, boolean mayMerge) {
}
if (mGroups.size() > 1) {
if (!mayMerge) {
throw new RuntimeException("equivalence mapping violation");
LOG.warn("Illegally linking {} and {}", o1, o2);
mGroups.forEach(g -> g.add(new OperatorStats.IncorrectRuntimeStatsMarker()));
return;
}
EquivGroup newGrp = new EquivGroup();
newGrp.add(o1);
Expand Down Expand Up @@ -256,20 +262,15 @@ public <T> List<T> getAll(Class<T> clazz) {
return ret;
}

public void runMapper(GroupTransformer mapper) {
for (EquivGroup equivGroup : groups) {
mapper.map(equivGroup);
}
}

public <T> List<T> lookupAll(Class<T> clazz, Object key) {
private <T> List<T> lookupAll(Class<T> clazz, Object key) {
EquivGroup group = objectMap.get(key);
if (group == null) {
throw new NoSuchElementException(Objects.toString(key));
}
return group.getAll(clazz);
}

@VisibleForTesting
public <T> T lookup(Class<T> clazz, Object key) {
List<T> all = lookupAll(clazz, key);
if (all.size() != 1) {
Expand All @@ -279,7 +280,6 @@ public <T> T lookup(Class<T> clazz, Object key) {
return all.get(0);
}

@VisibleForTesting
public Iterator<EquivGroup> iterateGroups() {
return groups.iterator();

Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/cbo_query14.q
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester;
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain cbo
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain
Expand Down
Loading
Loading