From 47b172179728c6889abe46c5939112611f9440c3 Mon Sep 17 00:00:00 2001 From: co0lster Date: Sun, 26 Mar 2023 12:42:38 +0200 Subject: [PATCH 1/4] Move protoc generation to binary crate (#5718) --- Cargo.toml | 1 + datafusion/proto/Cargo.toml | 5 --- datafusion/proto/README.md | 11 ++++++ datafusion/proto/gen/Cargo.toml | 34 +++++++++++++++++++ .../proto/{build.rs => gen/src/main.rs} | 31 ++++++----------- datafusion/proto/regen.sh | 21 ++++++++++++ 6 files changed, 77 insertions(+), 26 deletions(-) create mode 100644 datafusion/proto/gen/Cargo.toml rename datafusion/proto/{build.rs => gen/src/main.rs} (68%) create mode 100644 datafusion/proto/regen.sh diff --git a/Cargo.toml b/Cargo.toml index e53865a36881..728a82468985 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "datafusion/optimizer", "datafusion/physical-expr", "datafusion/proto", + "datafusion/proto/gen", "datafusion/row", "datafusion/sql", "datafusion/substrait", diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 4129980bf703..f174aa90d594 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -54,8 +54,3 @@ serde_json = { version = "1.0", optional = true } [dev-dependencies] doc-comment = "0.3" tokio = "1.18" - -[build-dependencies] -# Pin these dependencies so that the generated output is deterministic -pbjson-build = { version = "=0.5.1" } -prost-build = { version = "=0.11.7" } diff --git a/datafusion/proto/README.md b/datafusion/proto/README.md index 4b9afd3c3405..015ded8b0d70 100644 --- a/datafusion/proto/README.md +++ b/datafusion/proto/README.md @@ -94,4 +94,15 @@ async fn main() -> Result<()> { ``` +## Generated Code + +The prost/tonic code can be generated by running, which in turn invokes the Rust binary located in [gen](./gen) + +This is necessary after modifying the protobuf definitions or altering the dependencies of [gen](./gen), and requires a +valid installation of [protoc](https://github.com/protocolbuffers/protobuf#protocol-compiler-installation). + +```bash +./regen.sh +``` + [df]: https://crates.io/crates/datafusion diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml new file mode 100644 index 000000000000..af88eb049206 --- /dev/null +++ b/datafusion/proto/gen/Cargo.toml @@ -0,0 +1,34 @@ +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "gen" +description = "Code generation for proto" +version = "0.1.0" +edition = "2021" +rust-version = "1.62" +authors = ["Apache Arrow "] +homepage = "https://github.com/apache/arrow-datafusion" +repository = "https://github.com/apache/arrow-datafusion" +license = "Apache-2.0" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +# Pin these dependencies so that the generated output is deterministic +pbjson-build = { version = "=0.5.1" } +prost-build = { version = "=0.11.7" } diff --git a/datafusion/proto/build.rs b/datafusion/proto/gen/src/main.rs similarity index 68% rename from datafusion/proto/build.rs rename to datafusion/proto/gen/src/main.rs index 7d3582d2b224..a8936a2cb9cc 100644 --- a/datafusion/proto/build.rs +++ b/datafusion/proto/gen/src/main.rs @@ -15,42 +15,31 @@ // specific language governing permissions and limitations // under the License. -use std::path::{Path, PathBuf}; +use std::path::Path; type Error = Box; type Result = std::result::Result; fn main() -> Result<(), String> { - // for use in docker build where file changes can be wonky - println!("cargo:rerun-if-env-changed=FORCE_REBUILD"); + let proto_dir = Path::new("proto"); + let proto_path = Path::new("proto/datafusion.proto"); - // We don't include the proto files in releases so that downstreams - // do not need to have PROTOC included - if Path::new("proto/datafusion.proto").exists() { - println!("cargo:rerun-if-changed=proto/datafusion.proto"); - build()? - } - - Ok(()) -} - -fn build() -> Result<(), String> { - let out: PathBuf = std::env::var("OUT_DIR") - .expect("Cannot find OUT_DIR environment variable") - .into(); - let descriptor_path = out.join("proto_descriptor.bin"); + // proto definitions has to be there + let descriptor_path = proto_dir.join("proto_descriptor.bin"); prost_build::Config::new() .file_descriptor_set_path(&descriptor_path) + .out_dir("src") .compile_well_known_types() .extern_path(".google.protobuf", "::pbjson_types") - .compile_protos(&["proto/datafusion.proto"], &["proto"]) + .compile_protos(&[proto_path], &["proto"]) .map_err(|e| format!("protobuf compilation failed: {e}"))?; let descriptor_set = std::fs::read(&descriptor_path) .unwrap_or_else(|e| panic!("Cannot read {:?}: {}", &descriptor_path, e)); pbjson_build::Builder::new() + .out_dir("src") .register_descriptors(&descriptor_set) .unwrap_or_else(|e| { panic!("Cannot register descriptors {:?}: {}", &descriptor_set, e) @@ -58,8 +47,8 @@ fn build() -> Result<(), String> { .build(&[".datafusion"]) .map_err(|e| format!("pbjson compilation failed: {e}"))?; - let prost = out.join("datafusion.rs"); - let pbjson = out.join("datafusion.serde.rs"); + let prost = Path::new("src/datafusion.rs"); + let pbjson = Path::new("src/datafusion.serde.rs"); std::fs::copy(prost, "src/generated/prost.rs").unwrap(); std::fs::copy(pbjson, "src/generated/pbjson.rs").unwrap(); diff --git a/datafusion/proto/regen.sh b/datafusion/proto/regen.sh new file mode 100644 index 000000000000..4b7ad4d58533 --- /dev/null +++ b/datafusion/proto/regen.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR && cargo run --manifest-path gen/Cargo.toml \ No newline at end of file From 7df2c3d9a53fee8b290995c9c8d5c8578e7b5327 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz?= Date: Sun, 26 Mar 2023 16:32:34 +0200 Subject: [PATCH 2/4] Update license in Cargo.toml --- datafusion/proto/gen/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml index af88eb049206..a792eb790423 100644 --- a/datafusion/proto/gen/Cargo.toml +++ b/datafusion/proto/gen/Cargo.toml @@ -1,3 +1,4 @@ +# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file From ba21d649286ac9655a42f0f9f6fa2e77d7cc9de7 Mon Sep 17 00:00:00 2001 From: co0lster Date: Mon, 27 Mar 2023 17:09:26 +0200 Subject: [PATCH 3/4] Generate proto in CI --- .github/workflows/rust.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8a59cae01967..fab054eef0a0 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -320,6 +320,20 @@ jobs: - name: Run datafusion-common tests run: cargo test -p datafusion-common --features=pyarrow + vendor: + name: Verify Vendored Code + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v3 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + - name: Run gen + run: ./regen.sh + working-directory: ./datafusion/proto + - name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results) + run: git diff --exit-code check-fmt: name: Check cargo fmt From caf175496b3ebd1d725dd6fcb80a439f86157df7 Mon Sep 17 00:00:00 2001 From: co0lster Date: Mon, 27 Mar 2023 17:17:09 +0200 Subject: [PATCH 4/4] Make regen.sh executable by CI --- datafusion/proto/regen.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 datafusion/proto/regen.sh diff --git a/datafusion/proto/regen.sh b/datafusion/proto/regen.sh old mode 100644 new mode 100755