From 9022bab2e6305ae7a0d04a9bdc8a7af436b561e4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 28 Oct 2022 19:04:09 -0600 Subject: [PATCH] Generate expected benchmark query results --- benchmarks/entrypoint.sh | 22 ---------------------- benchmarks/tpch-gen.sh | 17 ++++++++++++----- benchmarks/tpchgen.dockerfile | 32 -------------------------------- 3 files changed, 12 insertions(+), 59 deletions(-) delete mode 100755 benchmarks/entrypoint.sh delete mode 100644 benchmarks/tpchgen.dockerfile diff --git a/benchmarks/entrypoint.sh b/benchmarks/entrypoint.sh deleted file mode 100755 index 312376fed35a..000000000000 --- a/benchmarks/entrypoint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e -cd /tpch-dbgen -./dbgen -vf -s $1 -mv *.tbl /data \ No newline at end of file diff --git a/benchmarks/tpch-gen.sh b/benchmarks/tpch-gen.sh index 2f79ade769fd..90230e666e4c 100755 --- a/benchmarks/tpch-gen.sh +++ b/benchmarks/tpch-gen.sh @@ -16,20 +16,27 @@ # specific language governing permissions and limitations # under the License. -#set -e +mkdir -p data/answers 2>/dev/null + +set -e pushd .. . ./dev/build-set-env.sh popd -docker build -f tpchgen.dockerfile -t datafusion-tpchgen:$DATAFUSION_VERSION . - # Generate data into the ./data directory if it does not already exist FILE=./data/supplier.tbl if test -f "$FILE"; then echo "$FILE exists." else - mkdir data 2>/dev/null - docker run -v `pwd`/data:/data -it --rm datafusion-tpchgen:$DATAFUSION_VERSION $1 + docker run -v `pwd`/data:/data -it --rm ghcr.io/databloom-ai/tpch-docker:main -vf -s $1 ls -l data +fi + +# Copy expected answers (at SF=1) into the ./data/answers directory if it does not already exist +FILE=./data/answers/q1.out +if test -f "$FILE"; then + echo "$FILE exists." +else + docker run -v `pwd`/data:/data -it --entrypoint /bin/bash --rm ghcr.io/databloom-ai/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/" fi \ No newline at end of file diff --git a/benchmarks/tpchgen.dockerfile b/benchmarks/tpchgen.dockerfile deleted file mode 100644 index 30acdead7cec..000000000000 --- a/benchmarks/tpchgen.dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM ubuntu - -RUN apt-get update && \ - apt-get install -y git build-essential - -RUN git clone https://github.com/databricks/tpch-dbgen.git && \ - cd tpch-dbgen && \ - make - -WORKDIR /tpch-dbgen -ADD entrypoint.sh /tpch-dbgen/ - -VOLUME /data - -ENTRYPOINT [ "bash", "./entrypoint.sh" ]