Skip to content

Commit

Permalink
HIVE-24167: TPC-DS query 14 fails while generating plan for the filter
Browse files Browse the repository at this point in the history
  • Loading branch information
okumin committed Sep 18, 2024
1 parent 6e1f1cc commit d35216b
Show file tree
Hide file tree
Showing 18 changed files with 1,542 additions and 897 deletions.
4 changes: 4 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,10 @@ public static enum ConfVars {
HIVE_IN_TEST_REPL("hive.in.repl.test", false, "internal usage only, true in replication test mode", true),
HIVE_IN_TEST_IDE("hive.in.ide.test", false, "internal usage only, true if test running in ide",
true),
HIVE_IN_TEST_PLANMAPPER_STRICT_VALIDATION("hive.in.test.planmapper.strict.validation", false,
"internal use only, whether to raise an error when unexpected links are found. We ignore equivalence mapping "
+ "violation because it introduces only minor problems. But we want to strictly check it in qtest so that we "
+ "can prevent further degradations"),
HIVE_TESTING_SHORT_LOGS("hive.testing.short.logs", false,
"internal usage only, used only in test mode. If set true, when requesting the " +
"operation logs the short version (generated by LogDivertAppenderForTest) will be " +
Expand Down
5 changes: 5 additions & 0 deletions data/conf/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Properties for test folders -->
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
Expand Down
5 changes: 5 additions & 0 deletions data/conf/iceberg/llap/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
5 changes: 5 additions & 0 deletions data/conf/iceberg/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
5 changes: 5 additions & 0 deletions data/conf/llap/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
7 changes: 6 additions & 1 deletion data/conf/perf/tpcds30tb/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
<value>true</value>
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>
<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>false</value>
<description>We can enable it once we resolve the problem of query14.q</description>
</property>
<property>
<name>hive.rpc.query.plan</name>
<value>true</value>
Expand Down Expand Up @@ -211,4 +216,4 @@
<name>hive.explain.user</name>
<value>false</value>
</property>
</configuration>
</configuration>
5 changes: 5 additions & 0 deletions data/conf/rlist/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
5 changes: 5 additions & 0 deletions data/conf/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.in.test.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
5 changes: 4 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/Context.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public class Context {
private WmContext wmContext;

private boolean isExplainPlan = false;
private PlanMapper planMapper = new PlanMapper();
private PlanMapper planMapper;
private StatsSource statsSource;
private int executionIndex;

Expand Down Expand Up @@ -423,6 +423,7 @@ private Context(Configuration conf, String executionId) {
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SQL) ||
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SUBQUERY_SQL);
scheduledQuery = false;
planMapper = new PlanMapper(conf);
}

protected Context(Context ctx) {
Expand Down Expand Up @@ -470,6 +471,8 @@ protected Context(Context ctx) {
this.opContext = new CompilationOpContext();
this.enableUnparse = ctx.enableUnparse;
this.scheduledQuery = ctx.scheduledQuery;
// Don't inherit the original plan mapper
this.planMapper = new PlanMapper(ctx.conf);
}

public Map<String, Path> getFsScratchDirs() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ public class RuntimeStatsPersistenceCheckerHook implements ExecuteWithHookContex
public void run(HookContext hookContext) throws Exception {

PlanMapper pm = ((PrivateHookContext) hookContext).getContext().getPlanMapper();
if (pm.isBroken()) {
LOG.warn("Skip checking signatures. The PlanMapper is broken");
return;
}

List<OpTreeSignature> sigs = pm.getAll(OpTreeSignature.class);

Expand Down
33 changes: 0 additions & 33 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
Expand Down Expand Up @@ -100,7 +99,6 @@
import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer;
import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication;
Expand All @@ -117,7 +115,6 @@
import org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
Expand All @@ -139,7 +136,6 @@
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
Expand Down Expand Up @@ -991,35 +987,6 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
ogw.startWalking(topNodes, null);
}

private static class CollectAll implements SemanticNodeProcessor {
private PlanMapper planMapper;

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
planMapper = pCtx.getContext().getPlanMapper();
FilterOperator fop = (FilterOperator) nd;
OpTreeSignature sig = planMapper.getSignatureOf(fop);
List<EquivGroup> ar = getGroups(planMapper, HiveFilter.class);


return nd;
}

private List<EquivGroup> getGroups(PlanMapper planMapper2, Class<HiveFilter> class1) {
Iterator<EquivGroup> it = planMapper.iterateGroups();
List<EquivGroup> ret = new ArrayList<PlanMapper.EquivGroup>();
while (it.hasNext()) {
EquivGroup g = it.next();
if (g.getAll(class1).size() > 0) {
ret.add(g);
}
}
return ret;
}
}

private static class MarkRuntimeStatsAsIncorrect implements SemanticNodeProcessor {

private PlanMapper planMapper;
Expand Down
37 changes: 26 additions & 11 deletions ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,34 @@
import java.util.Objects;
import java.util.Set;

import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Enables to connect related objects to eachother.
*
* Most importantly it aids to connect Operators to OperatorStats and probably RelNodes.
*/
public class PlanMapper {
private static final Logger LOG = LoggerFactory.getLogger(PlanMapper.class);

Set<EquivGroup> groups = new HashSet<>();
private Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
private final Set<EquivGroup> groups = new HashSet<>();
private final Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
private final boolean failsWithIllegalLink;
private final AtomicBoolean isBroken = new AtomicBoolean(false);

public PlanMapper(Configuration conf) {
failsWithIllegalLink = HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST_PLANMAPPER_STRICT_VALIDATION);
}

/**
* Specialized class which can compare by identity or value; based on the key type.
Expand Down Expand Up @@ -217,7 +230,11 @@ private void link(Object o1, Object o2, boolean mayMerge) {
}
if (mGroups.size() > 1) {
if (!mayMerge) {
throw new RuntimeException("equivalence mapping violation");
LOG.warn("Illegally linking {} and {}", o1, o2);
if (failsWithIllegalLink) {
throw new RuntimeException("equivalence mapping violation");
}
isBroken.set(true);
}
EquivGroup newGrp = new EquivGroup();
newGrp.add(o1);
Expand Down Expand Up @@ -248,6 +265,10 @@ private Object getKeyFor(Object o) {
return o;
}

public boolean isBroken() {
return isBroken.get();
}

public <T> List<T> getAll(Class<T> clazz) {
List<T> ret = new ArrayList<>();
for (EquivGroup g : groups) {
Expand All @@ -256,20 +277,15 @@ public <T> List<T> getAll(Class<T> clazz) {
return ret;
}

public void runMapper(GroupTransformer mapper) {
for (EquivGroup equivGroup : groups) {
mapper.map(equivGroup);
}
}

public <T> List<T> lookupAll(Class<T> clazz, Object key) {
private <T> List<T> lookupAll(Class<T> clazz, Object key) {
EquivGroup group = objectMap.get(key);
if (group == null) {
throw new NoSuchElementException(Objects.toString(key));
}
return group.getAll(clazz);
}

@VisibleForTesting
public <T> T lookup(Class<T> clazz, Object key) {
List<T> all = lookupAll(clazz, key);
if (all.size() != 1) {
Expand All @@ -279,7 +295,6 @@ public <T> T lookup(Class<T> clazz, Object key) {
return all.get(0);
}

@VisibleForTesting
public Iterator<EquivGroup> iterateGroups() {
return groups.iterator();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public static StatsSource getStatsSourceContaining(StatsSource currentStatsSourc

private static ImmutableList<PersistedRuntimeStats> extractStatsFromPlanMapper(PlanMapper pm) {
Builder<PersistedRuntimeStats> li = ImmutableList.builder();
if (pm.isBroken()) {
LOG.warn("Don't generate any stats. This PlanMapper is broken");
return li.build();
}

Iterator<EquivGroup> it = pm.iterateGroups();
while (it.hasNext()) {
EquivGroup e = it.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ public void prepareToReExecute() {

@Override
public boolean shouldReExecuteAfterCompile(int executionNum, PlanMapper oldPlanMapper, PlanMapper newPlanMapper) {
if (oldPlanMapper.isBroken() || newPlanMapper.isBroken()) {
LOG.warn(
"Giving up a re-execution. The old plan mapper is {}, and the new one is {}",
oldPlanMapper.isBroken() ? "broken" : "not broken",
newPlanMapper.isBroken() ? "broken" : "not broken");
return false;
}

boolean planDidChange = !planEquals(oldPlanMapper, newPlanMapper);
LOG.info("planDidChange: {}", planDidChange);
return planDidChange;
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/cbo_query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain cbo
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain
Expand Down
Loading

0 comments on commit d35216b

Please sign in to comment.