From 06b48c9706b228ec869a11c9a7768328ce8da770 Mon Sep 17 00:00:00 2001 From: Qingping Hou Date: Sun, 19 Apr 2020 12:24:42 -0700 Subject: [PATCH] initial commit --- .gitignore | 1 + Cargo.toml | 22 + LICENSE | 21 + TODO | 8 + src/delta.rs | 757 ++++++++++++++++++ src/lib.rs | 8 + src/main.rs | 19 + src/storage.rs | 185 +++++ ...-a43c-3eda0d2a499d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a8f0-e65b746382dd-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9074-a278c24c4449-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-b38a-6ee7e24456b1-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-adae-ce66d1fcaef6-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-88a6-abcfb049d3b4-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9c85-f34969ad3aa9-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a42b-9731b2e490ae-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a923-f6f89930a5c1-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-8d07-599a21197296-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-98f6-5e6cfa3ae45d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-861f-5a649e3d9674-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-b3cc-84502b0c314f-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a080-73e02491c643-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-8498-7bfb2940713b-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-b9a1-7e717b67f294-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-81ef-5223cf40f025-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9e83-e31021a93cf9-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-98f5-2fccfa1b123f-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-8d34-a0018cf73b70-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a888-81565a40161d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-8475-e21d2a2935f8-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9403-53e33b3778ac-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-816f-cbd30a3f8c1b-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-93f6-0acf11199a0d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-be15-135e15b398f4-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9acd-623e740be992-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-8cd4-6688aad8585d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9909-78da7294ffbd-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a8b4-578c9e9a218d-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-b07f-975d2226b800-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-aa43-993cdf937fd3-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-9613-f5ad1940b689-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a03d-e356fcd1564a-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-befa-90f056c2d77a-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a3d3-8dc112766ff5-c000.snappy.parquet.crc | Bin 0 -> 12 bytes .../_delta_log/00000000000000000000.json | 9 + .../_delta_log/00000000000000000001.json | 27 + .../_delta_log/00000000000000000002.json | 29 + .../_delta_log/00000000000000000003.json | 5 + .../_delta_log/00000000000000000004.json | 4 + ...4768-a43c-3eda0d2a499d-c000.snappy.parquet | Bin 0 -> 262 bytes ...41fe-a8f0-e65b746382dd-c000.snappy.parquet | Bin 0 -> 262 bytes ...4ca1-9074-a278c24c4449-c000.snappy.parquet | Bin 0 -> 262 bytes ...4790-b38a-6ee7e24456b1-c000.snappy.parquet | Bin 0 -> 262 bytes ...40ba-adae-ce66d1fcaef6-c000.snappy.parquet | Bin 0 -> 429 bytes ...47c3-88a6-abcfb049d3b4-c000.snappy.parquet | Bin 0 -> 429 bytes ...4df2-9c85-f34969ad3aa9-c000.snappy.parquet | Bin 0 -> 429 bytes ...4e2b-a42b-9731b2e490ae-c000.snappy.parquet | Bin 0 -> 429 bytes ...4c2c-a923-f6f89930a5c1-c000.snappy.parquet | Bin 0 -> 429 bytes ...4f13-8d07-599a21197296-c000.snappy.parquet | Bin 0 -> 429 bytes ...4562-98f6-5e6cfa3ae45d-c000.snappy.parquet | Bin 0 -> 429 bytes ...420c-861f-5a649e3d9674-c000.snappy.parquet | Bin 0 -> 429 bytes ...4581-b3cc-84502b0c314f-c000.snappy.parquet | Bin 0 -> 429 bytes ...4bd9-a080-73e02491c643-c000.snappy.parquet | Bin 0 -> 429 bytes ...4dda-8498-7bfb2940713b-c000.snappy.parquet | Bin 0 -> 429 bytes ...482d-b9a1-7e717b67f294-c000.snappy.parquet | Bin 0 -> 429 bytes ...41b6-81ef-5223cf40f025-c000.snappy.parquet | Bin 0 -> 429 bytes ...4b00-9e83-e31021a93cf9-c000.snappy.parquet | Bin 0 -> 429 bytes ...40af-98f5-2fccfa1b123f-c000.snappy.parquet | Bin 0 -> 429 bytes ...45b1-8d34-a0018cf73b70-c000.snappy.parquet | Bin 0 -> 429 bytes ...49c1-a888-81565a40161d-c000.snappy.parquet | Bin 0 -> 429 bytes ...4148-8475-e21d2a2935f8-c000.snappy.parquet | Bin 0 -> 429 bytes ...492b-9403-53e33b3778ac-c000.snappy.parquet | Bin 0 -> 429 bytes ...4643-816f-cbd30a3f8c1b-c000.snappy.parquet | Bin 0 -> 429 bytes ...48ee-93f6-0acf11199a0d-c000.snappy.parquet | Bin 0 -> 429 bytes ...4f4c-be15-135e15b398f4-c000.snappy.parquet | Bin 0 -> 429 bytes ...4fde-9acd-623e740be992-c000.snappy.parquet | Bin 0 -> 429 bytes ...4f34-8cd4-6688aad8585d-c000.snappy.parquet | Bin 0 -> 429 bytes ...48ce-9909-78da7294ffbd-c000.snappy.parquet | Bin 0 -> 429 bytes ...4a90-a8b4-578c9e9a218d-c000.snappy.parquet | Bin 0 -> 429 bytes ...43fb-b07f-975d2226b800-c000.snappy.parquet | Bin 0 -> 429 bytes ...4193-aa43-993cdf937fd3-c000.snappy.parquet | Bin 0 -> 429 bytes ...4871-9613-f5ad1940b689-c000.snappy.parquet | Bin 0 -> 429 bytes ...48fb-a03d-e356fcd1564a-c000.snappy.parquet | Bin 0 -> 429 bytes ...4c02-befa-90f056c2d77a-c000.snappy.parquet | Bin 0 -> 429 bytes ...461d-a3d3-8dc112766ff5-c000.snappy.parquet | Bin 0 -> 429 bytes tests/read_simple_table_test.rs | 19 + 86 files changed, 1114 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 TODO create mode 100644 src/delta.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/storage.rs create mode 100644 tests/data/simple_table/.part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00001-bb70d2ba-c196-4df2-9c85-f34969ad3aa9-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00003-508ae4aa-801c-4c2c-a923-f6f89930a5c1-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00004-80938522-09c0-420c-861f-5a649e3d9674-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00045-332fe409-7705-45b1-8d34-a0018cf73b70-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00068-90650739-6a8e-492b-9403-53e33b3778ac-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00069-c78b4dd8-f955-4643-816f-cbd30a3f8c1b-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00116-bc66759e-6381-4f34-8cd4-6688aad8585d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/.part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet.crc create mode 100644 tests/data/simple_table/_delta_log/00000000000000000000.json create mode 100644 tests/data/simple_table/_delta_log/00000000000000000001.json create mode 100644 tests/data/simple_table/_delta_log/00000000000000000002.json create mode 100644 tests/data/simple_table/_delta_log/00000000000000000003.json create mode 100644 tests/data/simple_table/_delta_log/00000000000000000004.json create mode 100644 tests/data/simple_table/part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00001-bb70d2ba-c196-4df2-9c85-f34969ad3aa9-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00003-508ae4aa-801c-4c2c-a923-f6f89930a5c1-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00004-80938522-09c0-420c-861f-5a649e3d9674-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00045-332fe409-7705-45b1-8d34-a0018cf73b70-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00068-90650739-6a8e-492b-9403-53e33b3778ac-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00069-c78b4dd8-f955-4643-816f-cbd30a3f8c1b-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00116-bc66759e-6381-4f34-8cd4-6688aad8585d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet create mode 100644 tests/data/simple_table/part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet create mode 100644 tests/read_simple_table_test.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..ea8c4bf7f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000000..238c97ebed --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "delta" +version = "0.1.0" +authors = ["Qingping Hou "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0" +thiserror = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +# parquet = "1.0.0-SNAPSHOT" +parquet = { git = "https://github.com/apache/arrow", brach = "6d15de4752cf632f8b6b5bfff16c2f5abaa34e76" } +rusoto_core = "0.43" +rusoto_s3 = "0.43" +tokio = "0.2.10" +tokio-io = "0.2.0-alpha.6" +futures = "0.3.1" +bytes = "0.5.3" +log = "*" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..ee36a66a2e --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) QP Hou + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/TODO b/TODO new file mode 100644 index 0000000000..4076e808c8 --- /dev/null +++ b/TODO @@ -0,0 +1,8 @@ +* track tombstones +* replace all unwrap with Result +* support write, currently blocked on adding write support to parquet crate +* make all IO async +* handle commitInfo action +* handle dataChange field +* raise error if delta log is not valid +* use list api to find transaction logs to apply diff --git a/src/delta.rs b/src/delta.rs new file mode 100644 index 0000000000..a795a8fc56 --- /dev/null +++ b/src/delta.rs @@ -0,0 +1,757 @@ +#![allow(non_snake_case, non_camel_case_types)] + +extern crate parquet; + +use parquet::file::reader::{FileReader, SerializedFileReader}; +use parquet::record::{ListAccessor, MapAccessor, RowAccessor}; + +use std::collections::HashMap; +use std::error::Error; +use std::fmt; +use std::io::{prelude::*, BufReader, Cursor}; + +use thiserror::Error; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use super::storage::{ + parse_uri, FileStorageBackend, S3StorageBackend, StorageBackend, StorageError, Uri, +}; + +type GUID = String; +type DeltaDataTypeLong = i64; +type DeltaVersionType = DeltaDataTypeLong; +type DeltaDataTypeInt = i32; + +#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy)] +pub struct LastCheckPoint { + version: DeltaVersionType, // 20 digits decimals + size: DeltaDataTypeLong, + parts: Option, // 10 digits decimals +} + +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +pub struct Format { + // Name of the encoding for files in this table + provider: String, + // A map containing configuration options for the format + options: Option>, +} + +// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Schema-Serialization-Format +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct SchemaTypeStruct { + // type field is alwsy the string "struct", so we are ignoring it here + r#type: String, + fields: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct SchemaTypeStructField { + // Name of this (possibly nested) column + name: String, + // String containing the name of a primitive type, a struct definition, an array definition or + // a map definition + r#type: SchemaType, + // Boolean denoting whether this field can be null + nullable: bool, + // A JSON map containing information about this column. Keys prefixed with Delta are reserved + // for the implementation. + metadata: HashMap, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct SchemaTypeArray { + // type field is alwsy the string "array", so we are ignoring it here + r#type: String, + // The type of element stored in this array represented as a string containing the name of a + // primitive type, a struct definition, an array definition or a map definition + elementType: Box, + // Boolean denoting whether this array can contain one or more null values + containsNull: bool, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct SchemaTypeMap { + r#type: String, + // The type of element used for the key of this map, represented as a string containing the + // name of a primitive type, a struct definition, an array definition or a map definition + keyType: Box, + // The type of element used for the key of this map, represented as a string containing the + // name of a primitive type, a struct definition, an array definition or a map definition + valueType: Box, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub enum SchemaType { + primitive(String), + r#struct(SchemaTypeStruct), + array(SchemaTypeArray), + map(SchemaTypeMap), +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Schema { + r#type: String, + fields: Vec, +} + +fn populate_hashmap_from_parquet_map( + map: &mut HashMap, + pmap: &parquet::record::Map, +) { + let keys = pmap.get_keys(); + let values = pmap.get_values(); + for j in 0..pmap.len() { + map.entry(keys.get_string(j).unwrap().clone()) + .or_insert(values.get_string(j).unwrap().clone()); + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct ActionAdd { + // A relative path, from the root of the table, to a file that should be added to the table + path: String, + // The size of this file in bytes + size: DeltaDataTypeLong, + // A map from partition column to value for this file + partitionValues: HashMap, + // The time this file was created, as milliseconds since the epoch + modificationTime: DeltaDataTypeLong, + // When false the file must already be present in the table or the records in the added file + // must be contained in one or more remove actions in the same version + dataChange: bool, + // Contains statistics (e.g., count, min/max values for columns) about the data in this file + stats: Option, + // Map containing metadata about this file + tags: Option>, +} + +impl ActionAdd { + fn from_parquet_record(record: &parquet::record::Row) -> Self { + let mut re = Self { + ..Default::default() + }; + + for (i, (name, _)) in record.get_column_iter().enumerate() { + match name.as_str() { + "path" => { + re.path = record.get_string(i).unwrap().clone(); + } + "size" => { + re.size = record.get_long(i).unwrap(); + } + "modificationTime" => { + re.modificationTime = record.get_long(i).unwrap(); + } + "dataChange" => { + re.dataChange = record.get_bool(i).unwrap(); + } + "partitionValues" => { + let parquetMap = record.get_map(i).unwrap(); + let key = parquetMap.get_keys().get_string(0).unwrap().clone(); + let value = parquetMap.get_values().get_string(0).unwrap().clone(); + re.partitionValues.entry(key).or_insert(value); + } + "tags" => match record.get_map(i) { + Ok(tags_map) => { + let mut tags = HashMap::new(); + populate_hashmap_from_parquet_map(&mut tags, tags_map); + re.tags = Some(tags); + } + _ => { + re.tags = None; + } + }, + "stats" => { + re.stats = Some(record.get_string(i).unwrap().clone()); + } + _ => { + panic!("invalid add record field: {}", name); + } + } + } + + return re; + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct ActionMetaData { + // Unique identifier for this table + id: GUID, + // User-provided identifier for this table + name: Option, + // User-provided description for this table + description: Option, + // Specification of the encoding for the files stored in the table + format: Format, + // Schema of the table + schemaString: String, + // An array containing the names of columns by which the data should be partitioned + partitionColumns: Vec, + // NOTE: this field is undocumented + configuration: HashMap, + // NOTE: this field is undocumented + createdTime: DeltaDataTypeLong, +} + +impl ActionMetaData { + fn from_parquet_record(record: &parquet::record::Row) -> Self { + let mut re = Self { + ..Default::default() + }; + + for (i, (name, _)) in record.get_column_iter().enumerate() { + match name.as_str() { + "id" => { + re.id = record.get_string(i).unwrap().clone(); + } + "name" => match record.get_string(i) { + Ok(s) => re.name = Some(s.clone()), + _ => re.name = None, + }, + "description" => match record.get_string(i) { + Ok(s) => re.description = Some(s.clone()), + _ => re.description = None, + }, + "partitionColumns" => { + let columns_list = record.get_list(i).unwrap(); + for j in 0..columns_list.len() { + re.partitionColumns + .push(columns_list.get_string(j).unwrap().clone()); + } + } + "schemaString" => { + re.schemaString = record.get_string(i).unwrap().clone(); + } + "createdTime" => { + re.createdTime = record.get_long(i).unwrap(); + } + "configuration" => { + let configuration_map = record.get_map(i).unwrap(); + populate_hashmap_from_parquet_map(&mut re.configuration, configuration_map); + } + "format" => { + let format_record = record.get_group(i).unwrap(); + re.format.provider = format_record.get_string(0).unwrap().clone(); + match record.get_map(1) { + Ok(options_map) => { + let mut options = HashMap::new(); + populate_hashmap_from_parquet_map(&mut options, options_map); + re.format.options = Some(options); + } + _ => { + re.format.options = None; + } + } + } + _ => { + panic!("invalid protocol record field: {}", name); + } + } + } + + return re; + } + + fn get_schema(&self) -> Result { + serde_json::from_str(&self.schemaString) + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct ActionRemove { + path: String, + deletionTimestamp: DeltaDataTypeLong, + dataChange: bool, +} + +impl ActionRemove { + fn from_parquet_record(record: &parquet::record::Row) -> Self { + let mut re = Self { + ..Default::default() + }; + + for (i, (name, _)) in record.get_column_iter().enumerate() { + match name.as_str() { + "path" => { + re.path = record.get_string(i).unwrap().clone(); + } + "dataChange" => { + re.dataChange = record.get_bool(i).unwrap(); + } + "deletionTimestamp" => { + re.deletionTimestamp = record.get_long(i).unwrap(); + } + _ => { + panic!("invalid remove record field: {}", name); + } + } + } + + return re; + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct ActionTxn { + appId: String, + version: DeltaVersionType, + // NOTE: undocumented field + lastUpdated: DeltaDataTypeLong, +} + +impl ActionTxn { + fn from_parquet_record(record: &parquet::record::Row) -> Self { + let mut re = Self { + ..Default::default() + }; + + for (i, (name, _)) in record.get_column_iter().enumerate() { + match name.as_str() { + "appId" => { + re.appId = record.get_string(i).unwrap().clone(); + } + "version" => { + re.version = record.get_long(i).unwrap(); + } + "lastUpdated" => { + re.lastUpdated = record.get_long(i).unwrap(); + } + _ => { + panic!("invalid txn record field: {}", name); + } + } + } + + return re; + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct ActionProtocol { + minReaderVersion: DeltaDataTypeInt, + minWriterVersion: DeltaDataTypeInt, +} + +impl ActionProtocol { + fn from_parquet_record(record: &parquet::record::Row) -> Self { + let mut re = Self { + ..Default::default() + }; + + for (i, (name, _)) in record.get_column_iter().enumerate() { + match name.as_str() { + "minReaderVersion" => { + re.minReaderVersion = record.get_int(i).unwrap(); + } + "minWriterVersion" => { + re.minWriterVersion = record.get_int(i).unwrap(); + } + _ => { + panic!("invalid protocol record"); + } + } + } + + return re; + } +} + +#[derive(Serialize, Deserialize, Debug)] +enum Action { + metaData(ActionMetaData), + add(ActionAdd), + remove(ActionRemove), + txn(ActionTxn), + protocol(ActionProtocol), + commitInfo(Value), +} + +impl Action { + fn from_parquet_record( + schema: &parquet::schema::types::Type, + record: &parquet::record::Row, + ) -> Self { + let (col_idx, col_data) = { + let mut col_idx = None; + let mut col_data = None; + for i in 0..record.len() { + match record.get_group(i) { + Ok(group) => { + col_idx = Some(i); + col_data = Some(group); + } + _ => { + continue; + } + } + } + match col_data { + Some(group) => (col_idx.unwrap(), group), + None => { + panic!("FIXME: invalid record"); + } + } + }; + + let fields = schema.get_fields(); + let field = &fields[col_idx]; + match field.get_basic_info().name() { + "add" => { + return Action::add(ActionAdd::from_parquet_record(col_data)); + } + "metaData" => { + return Action::metaData(ActionMetaData::from_parquet_record(col_data)); + } + "remove" => { + return Action::remove(ActionRemove::from_parquet_record(col_data)); + } + "txn" => { + return Action::txn(ActionTxn::from_parquet_record(col_data)); + } + "protocol" => { + return Action::protocol(ActionProtocol::from_parquet_record(col_data)); + } + "commitInfo" => { + panic!("FIXME: implement commitInfo"); + } + _ => { + panic!("FIXME: invalid action: {:#?}", field); + } + } + } +} + +pub struct DeltaTableMetaData { + // Unique identifier for this table + pub id: GUID, + // User-provided identifier for this table + pub name: Option, + // User-provided description for this table + pub description: Option, + // Specification of the encoding for the files stored in the table + pub format: Format, + // Schema of the table + pub schema: Schema, + // An array containing the names of columns by which the data should be partitioned + pub partitionColumns: Vec, + // NOTE: this field is undocumented + pub configuration: HashMap, +} + +impl fmt::Display for DeltaTableMetaData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "GUID={}, name={:?}, description={:?}, partitionColumns={:?}, configuration={:?}", + self.id, self.name, self.description, self.partitionColumns, self.configuration + ) + } +} + +#[derive(Debug)] +pub enum ApplyLogError { + EndOfLog, + InvalidJSON(String), + Unknown(String), +} + +impl Error for ApplyLogError {} + +impl fmt::Display for ApplyLogError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ApplyLogError::EndOfLog => write!(f, "End of transaction log"), + ApplyLogError::InvalidJSON(s) => write!(f, "{}", s), + ApplyLogError::Unknown(s) => write!(f, "{}", s), + } + } +} + +impl From for ApplyLogError { + fn from(error: StorageError) -> Self { + match error { + StorageError::NotFound => ApplyLogError::EndOfLog, + StorageError::Unknown(s) => ApplyLogError::Unknown(format!("Storage error: {}", s)), + } + } +} + +impl From for ApplyLogError { + fn from(error: serde_json::error::Error) -> Self { + ApplyLogError::InvalidJSON(format!("Invalid json log record: {}", error)) + } +} + +impl From for ApplyLogError { + fn from(error: std::io::Error) -> Self { + ApplyLogError::Unknown(format!("failed to read line from log record: {:#?}", error)) + } +} + +#[derive(Debug)] +pub enum LoadCheckpointError { + NotFound, + InvalidJSON(String), + Unknown(String), +} + +impl Error for LoadCheckpointError {} + +impl fmt::Display for LoadCheckpointError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LoadCheckpointError::NotFound => write!(f, "no checkpoint found"), + LoadCheckpointError::InvalidJSON(s) => write!(f, "{}", s), + LoadCheckpointError::Unknown(s) => write!(f, "{}", s), + } + } +} + +impl From for LoadCheckpointError { + fn from(error: StorageError) -> Self { + match error { + StorageError::NotFound => LoadCheckpointError::NotFound, + StorageError::Unknown(s) => { + LoadCheckpointError::Unknown(format!("unknown storage error: {}", s)) + } + } + } +} + +impl From for LoadCheckpointError { + fn from(error: serde_json::error::Error) -> Self { + LoadCheckpointError::InvalidJSON(format!("Invalid checkpoint: {}", error)) + } +} + +#[derive(Error, Debug)] +pub enum DeltaTableError { + #[error("Failed to apply transaction log: {}", .source)] + ApplyLog { + #[from] + source: ApplyLogError, + }, + + #[error("Failed to load checkpoint: {}", .source)] + LoadCheckpoint { + #[from] + source: LoadCheckpointError, + }, +} + +pub struct DeltaTable { + pub files: Vec, + pub version: DeltaVersionType, + pub tombstones: Vec, // files that were recently deleted + pub min_reader_version: i32, + pub min_writer_version: i32, + + // metadata + // application_transactions + table_path: String, + storage: Box, + + app_transaction_version: HashMap, + commit_infos: Vec, + current_metadata: Option, + last_check_point: Option, +} + +impl DeltaTable { + fn version_to_log_path(&self, version: DeltaVersionType) -> String { + return format!("{}/_delta_log/{:020}.json", self.table_path, version); + } + + fn get_checkpoint_data_paths( + checkpoint_prefix: &str, + checkpoint_parts: Option, + ) -> Vec { + let mut checkpoint_data_paths = Vec::new(); + + match checkpoint_parts { + None => { + checkpoint_data_paths.push(format!("{}.checkpoint.parquet", checkpoint_prefix)); + } + Some(parts) => { + for i in 0..parts { + checkpoint_data_paths.push(format!( + "{}.checkpoint.{:010}.{:010}.parquet", + checkpoint_prefix, + i + 1, + parts + )); + } + } + } + + return checkpoint_data_paths; + } + + fn load_last_checkpoint( + &self, + delta_log_dir: &str, + ) -> Result { + let last_checkpoint_path = format!("{}/_last_checkpoint", delta_log_dir); + let data = self.storage.get_obj(&last_checkpoint_path)?; + return Ok(serde_json::from_slice(&data)?); + } + + fn process_action(&mut self, action: &Action) { + // FIXME: support dataChange field + match action { + Action::add(v) => { + self.files.push(v.path.clone()); + } + Action::remove(v) => { + self.files.retain(|e| *e != v.path); + } + Action::protocol(v) => { + self.min_reader_version = v.minReaderVersion; + self.min_writer_version = v.minWriterVersion; + } + Action::metaData(v) => { + self.current_metadata = Some(DeltaTableMetaData { + id: v.id.clone(), + name: v.name.clone(), + description: v.description.clone(), + format: v.format.clone(), + schema: serde_json::from_str(&v.schemaString).unwrap(), + partitionColumns: v.partitionColumns.clone(), + configuration: v.configuration.clone(), + }); + } + Action::txn(v) => { + self.app_transaction_version + .entry(v.appId.clone()) + .or_insert(v.version); + } + Action::commitInfo(v) => { + self.commit_infos.push(v.clone()); + } + } + } + + fn apply_log(&mut self, version: DeltaVersionType) -> Result<(), ApplyLogError> { + let log_path = self.version_to_log_path(version); + let commit_log_bytes = self.storage.get_obj(&log_path)?; + let reader = BufReader::new(Cursor::new(commit_log_bytes)); + for line in reader.lines() { + let action: Action = serde_json::from_str(line?.as_str())?; + self.process_action(&action); + } + return Ok(()); + } + + pub fn load(&mut self) -> Result<(), DeltaTableError> { + let delta_log_dir = format!("{}/_delta_log", self.table_path); + match self.load_last_checkpoint(&delta_log_dir) { + Ok(last_check_point) => { + self.last_check_point = Some(last_check_point); + let checkpoint_data_paths = Self::get_checkpoint_data_paths( + &format!("{}/{:020}", delta_log_dir, last_check_point.version), + last_check_point.parts, + ); + + // process acttions from checkpoint + for f in &checkpoint_data_paths { + let obj = self.storage.get_obj(&f).unwrap(); + let preader = SerializedFileReader::new(Cursor::new(obj)).unwrap(); + let schema = preader.metadata().file_metadata().schema(); + if !schema.is_group() { + panic!("invalid checkpoint data file"); + } + let mut iter = preader.get_row_iter(None).unwrap(); + while let Some(record) = iter.next() { + self.process_action(&Action::from_parquet_record(&schema, &record)); + } + } + + self.version = last_check_point.version; + } + Err(LoadCheckpointError::NotFound) => { + // no checkpoint, start with version 0 + self.version = 0; + } + Err(e) => { + return Err(DeltaTableError::LoadCheckpoint { source: e }); + } + } + + // replay logs after checkpoint + loop { + match self.apply_log(self.version) { + Ok(_) => { + self.version += 1; + } + Err(e) => { + match e { + ApplyLogError::EndOfLog => { + self.version -= 1; + } + _ => { + panic!("Apply error: {:#?}", e); + } + } + break; + } + } + } + + return Ok(()); + } + + pub fn new( + table_path: &str, + storage_backend: Box, + ) -> Result { + let mut table = Self { + version: 0, + files: Vec::new(), + storage: storage_backend, + tombstones: Vec::new(), + table_path: table_path.to_string(), + min_reader_version: 0, + min_writer_version: 0, + current_metadata: None, + commit_infos: Vec::new(), + app_transaction_version: HashMap::new(), + last_check_point: None, + }; + table.load()?; + Ok(table) + } +} + +impl fmt::Display for DeltaTable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DeltaTable({})\n", self.table_path)?; + write!(f, "\tversion: {}\n", self.version)?; + write!( + f, + "\tmetadata: {}\n", + self.current_metadata.as_ref().unwrap() + )?; + write!( + f, + "\tmin_version: read={}, write={}\n", + self.min_reader_version, self.min_writer_version + )?; + write!(f, "\tfiles count: {}\n", self.files.len()) + } +} + +pub fn open_table(table_path: &str) -> Result { + let storage_backend: Box; + let uri = parse_uri(table_path).unwrap(); + match uri { + Uri::LocalPath(_) => storage_backend = Box::new(FileStorageBackend::new()), + Uri::S3Object(_) => storage_backend = Box::new(S3StorageBackend::new()), + } + + DeltaTable::new(table_path, storage_backend) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000000..8a492792a1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,8 @@ +#[macro_use] +extern crate log; + +mod delta; +mod storage; + +pub use self::delta::*; +pub use self::storage::*; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000000..ffe069f1c0 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,19 @@ +use std::env; +use anyhow; +use delta; + +fn main() -> anyhow::Result<()> { + let args: Vec = env::args().collect(); + if args.len() < 2 { + println!("USAGE: {} TABLE_PATH", args[0]); + std::process::exit(1); + } + let table_path = &args[1]; + + let table = delta::open_table(table_path)?; + + println!("{}", table); + println!("{:#?}", table.files); + + Ok(()) +} diff --git a/src/storage.rs b/src/storage.rs new file mode 100644 index 0000000000..038d55d388 --- /dev/null +++ b/src/storage.rs @@ -0,0 +1,185 @@ +use std::{fmt, fs}; +use std::error::Error; + +use rusoto_core::{Region, RusotoError}; +use rusoto_s3::{GetObjectRequest, S3Client, S3}; + +use tokio::io::AsyncReadExt; +use tokio::runtime; + +#[derive(Debug, PartialEq)] +pub struct S3Object<'a> { + bucket: &'a str, + key: &'a str, +} + +#[derive(Debug)] +pub enum Uri<'a> { + LocalPath(&'a str), + S3Object(S3Object<'a>), +} + +impl<'a> Uri<'a> { + pub fn as_s3object(self) -> S3Object<'a> { + match self { + Uri::S3Object(x) => x, + _ => panic!("Not a S3 Object"), + } + } + + pub fn as_localpath(self) -> &'a str { + match self { + Uri::LocalPath(x) => x, + _ => panic!("Not a S3 Object"), + } + } +} + +pub fn parse_uri<'a>(path: &'a str) -> Result, &'static str> { + let parts: Vec<&'a str> = path.split("://").collect(); + + if parts.len() == 1 { + return Ok(Uri::LocalPath(parts[0])); + } + + match parts[0] { + "s3" => { + let mut path_parts = parts[1].splitn(2, "/"); + let bucket = path_parts.next().unwrap(); + let key = path_parts.next().unwrap(); + + return Ok(Uri::S3Object(S3Object { bucket, key })); + } + "file" => { + return Ok(Uri::LocalPath(parts[1])); + } + _ => { + panic!("invalid uri scheme: {}", parts[0]); + } + } +} + +#[derive(Debug)] +pub enum StorageError { + NotFound, + Unknown(String), +} + +impl Error for StorageError {} + +impl fmt::Display for StorageError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + StorageError::NotFound => write!(f, "Object not found"), + StorageError::Unknown(s) => write!(f, "Unkown error: {}", s), + } + } +} + +impl From for StorageError { + fn from(error: std::io::Error) -> Self { + match error.kind() { + std::io::ErrorKind::NotFound => { + return StorageError::NotFound; + } + _ => { + return StorageError::Unknown(format!("{:#?}", error)); + } + } + } +} + +impl From> for StorageError { + fn from(error: RusotoError) -> Self { + match error { + RusotoError::Service(rusoto_s3::GetObjectError::NoSuchKey(_)) => StorageError::NotFound, + _ => StorageError::Unknown(format!("{:#?}", error)), + } + } +} + +pub trait StorageBackend { + fn get_obj(&self, path: &str) -> Result, StorageError>; +} + +pub struct FileStorageBackend {} + +impl FileStorageBackend { + pub fn new() -> Self { + Self {} + } +} + +impl StorageBackend for FileStorageBackend { + fn get_obj(&self, path: &str) -> Result, StorageError> { + fs::read(path).map_err(|e| StorageError::from(e)) + } +} + +pub struct S3StorageBackend { + client: rusoto_s3::S3Client, +} + +impl S3StorageBackend { + pub fn new() -> Self { + let client = S3Client::new(Region::UsEast2); + Self { client } + } +} + +impl StorageBackend for S3StorageBackend { + fn get_obj(&self, path: &str) -> Result, StorageError> { + debug!("fetching s3 object: {}...", path); + + let uri = parse_uri(path).unwrap().as_s3object(); + let get_req = GetObjectRequest { + bucket: uri.bucket.to_string(), + key: uri.key.to_string(), + ..Default::default() + }; + + let mut rt = runtime::Builder::new() + .enable_time() + .enable_io() + .basic_scheduler() + .build() + .unwrap(); + + let result = rt.block_on(self.client.get_object(get_req))?; + + debug!("streaming data from {}...", path); + let mut buf = Vec::new(); + let stream = result.body.unwrap(); + rt.block_on(stream.into_async_read().read_to_end(&mut buf)) + .unwrap(); + + debug!("s3 object fetched: {}", path); + Ok(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_uri_local_file() { + let uri = parse_uri("foo/bar").unwrap(); + assert_eq!(uri.as_localpath(), "foo/bar"); + + let uri2 = parse_uri("file:///foo/bar").unwrap(); + assert_eq!(uri2.as_localpath(), "/foo/bar"); + } + + #[test] + fn test_parse_object_uri() { + let uri = parse_uri("s3://foo/bar").unwrap(); + assert_eq!( + uri.as_s3object(), + S3Object { + bucket: "foo", + key: "bar", + } + ); + } +} diff --git a/tests/data/simple_table/.part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..52512aa8c3c83a142b74d851ec1980a37760a2ce GIT binary patch literal 12 TcmYc;N@ieSU}E^9U literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..0cd5190c2cd66d480bcd0ab44dfc07108ce4da02 GIT binary patch literal 12 TcmYc;N@ieSU}A7p?>h_t5aYh5D%6#oPH literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..673f94d1016e3f233cb93600b00bfc5e97044612 GIT binary patch literal 12 TcmYc;N@ieSU}8|XnPCY45tIVe literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..aa9bdb761fdbe535f443626e0f0d5e0b2f9c6958 GIT binary patch literal 12 TcmYc;N@ieSU}CuQ?&>7~7B&Q| literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..3a0fbe2b5ff5a7bcfa593ee3a4ad08849bc6c9f6 GIT binary patch literal 12 TcmYc;N@ieSU}Ct|a#;)j6d(hB literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..551c8f5742f5d4ae1907f4bd4ca5cab1c1b2fca0 GIT binary patch literal 12 TcmYc;N@ieSU}EUIpH6A%N5 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..0cd5190c2cd66d480bcd0ab44dfc07108ce4da02 GIT binary patch literal 12 TcmYc;N@ieSU}A7p?>h_t5a9U literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..551c8f5742f5d4ae1907f4bd4ca5cab1c1b2fca0 GIT binary patch literal 12 TcmYc;N@ieSU}EUI7~7B&Q| literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..47ce4c1985bb07429638275c0facbbeb52ac24fe GIT binary patch literal 12 TcmYc;N@ieSU}DhPoc9+15*h=8 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..3a4bed33c582f3176c8280f0b973658d166a8651 GIT binary patch literal 12 TcmYc;N@ieSU}9+5;QAQ=6CDGm literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..4c990a139198109596705fc833bb98d76463bfac GIT binary patch literal 12 TcmYc;N@ieSU}D%@W%>pH6A%N5 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..bcfd8e1eee114c8030bfc90c61d37436b455baa1 GIT binary patch literal 12 TcmYc;N@ieSU}BK1lzRjK5PbsC literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..b8335adf96c3073519cbc38bc59a5723a8286160 GIT binary patch literal 12 TcmYc;N@ieSU}9Ll&LRZ>68Zxy literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..12395f193e81440352d18c9c08b34442ce074eae GIT binary patch literal 12 TcmYc;N@ieSU}AW!vh*wf6R!ij literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..22a136f3ec18ea87827bf8719b3e5568ade79679 GIT binary patch literal 12 TcmYc;N@ieSU}D%QV5|WE5h4O# literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/.part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet.crc b/tests/data/simple_table/.part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..213b253a30f355e0c54a046f4f34e79eae16eb66 GIT binary patch literal 12 TcmYc;N@ieSU}9iCF1ii?5V-=# literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/_delta_log/00000000000000000000.json b/tests/data/simple_table/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..47dd9bd9db --- /dev/null +++ b/tests/data/simple_table/_delta_log/00000000000000000000.json @@ -0,0 +1,9 @@ +{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}} +{"add":{"path":"part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet","partitionValues":{},"size":262,"modificationTime":1587968586000,"dataChange":true}} +{"add":{"path":"part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968586000,"dataChange":true}} +{"add":{"path":"part-00003-508ae4aa-801c-4c2c-a923-f6f89930a5c1-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968586000,"dataChange":true}} +{"add":{"path":"part-00004-80938522-09c0-420c-861f-5a649e3d9674-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968586000,"dataChange":true}} +{"add":{"path":"part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968586000,"dataChange":true}} +{"add":{"path":"part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968586000,"dataChange":true}} diff --git a/tests/data/simple_table/_delta_log/00000000000000000001.json b/tests/data/simple_table/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..0af2ff5ac4 --- /dev/null +++ b/tests/data/simple_table/_delta_log/00000000000000000001.json @@ -0,0 +1,27 @@ +{"commitInfo":{"timestamp":1587968596254,"operation":"MERGE","operationParameters":{"predicate":"(oldData.`id` = newData.`id`)"},"readVersion":0,"isBlindAppend":false}} +{"remove":{"path":"part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet","deletionTimestamp":1587968596250,"dataChange":true}} +{"remove":{"path":"part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet","deletionTimestamp":1587968596253,"dataChange":true}} +{"remove":{"path":"part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet","deletionTimestamp":1587968596253,"dataChange":true}} +{"remove":{"path":"part-00003-508ae4aa-801c-4c2c-a923-f6f89930a5c1-c000.snappy.parquet","deletionTimestamp":1587968596253,"dataChange":true}} +{"remove":{"path":"part-00004-80938522-09c0-420c-861f-5a649e3d9674-c000.snappy.parquet","deletionTimestamp":1587968596253,"dataChange":true}} +{"add":{"path":"part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet","partitionValues":{},"size":262,"modificationTime":1587968595000,"dataChange":true}} +{"add":{"path":"part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00045-332fe409-7705-45b1-8d34-a0018cf73b70-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00068-90650739-6a8e-492b-9403-53e33b3778ac-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00069-c78b4dd8-f955-4643-816f-cbd30a3f8c1b-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00116-bc66759e-6381-4f34-8cd4-6688aad8585d-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} +{"add":{"path":"part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968596000,"dataChange":true}} diff --git a/tests/data/simple_table/_delta_log/00000000000000000002.json b/tests/data/simple_table/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..8eeebb28d1 --- /dev/null +++ b/tests/data/simple_table/_delta_log/00000000000000000002.json @@ -0,0 +1,29 @@ +{"commitInfo":{"timestamp":1587968604143,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet","partitionValues":{},"size":262,"modificationTime":1587968602000,"dataChange":true}} +{"add":{"path":"part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968602000,"dataChange":true}} +{"add":{"path":"part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968602000,"dataChange":true}} +{"add":{"path":"part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968602000,"dataChange":true}} +{"add":{"path":"part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968602000,"dataChange":true}} +{"add":{"path":"part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968602000,"dataChange":true}} +{"remove":{"path":"part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00005-94a0861b-6455-4bd9-a080-73e02491c643-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00068-90650739-6a8e-492b-9403-53e33b3778ac-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00045-332fe409-7705-45b1-8d34-a0018cf73b70-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00116-bc66759e-6381-4f34-8cd4-6688aad8585d-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00069-c78b4dd8-f955-4643-816f-cbd30a3f8c1b-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00154-4630673a-5227-48fb-a03d-e356fcd1564a-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00150-ec6643fc-4963-4871-9613-f5ad1940b689-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} +{"remove":{"path":"part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet","deletionTimestamp":1587968604143,"dataChange":true}} diff --git a/tests/data/simple_table/_delta_log/00000000000000000003.json b/tests/data/simple_table/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..7c411a8f24 --- /dev/null +++ b/tests/data/simple_table/_delta_log/00000000000000000003.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1587968614187,"operation":"UPDATE","operationParameters":{"predicate":"((id#697L % cast(2 as bigint)) = cast(0 as bigint))"},"readVersion":2,"isBlindAppend":false}} +{"remove":{"path":"part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet","deletionTimestamp":1587968614096,"dataChange":true}} +{"remove":{"path":"part-00006-46f2ff20-eb5d-4dda-8498-7bfb2940713b-c000.snappy.parquet","deletionTimestamp":1587968614096,"dataChange":true}} +{"add":{"path":"part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968614000,"dataChange":true}} +{"add":{"path":"part-00001-bb70d2ba-c196-4df2-9c85-f34969ad3aa9-c000.snappy.parquet","partitionValues":{},"size":429,"modificationTime":1587968614000,"dataChange":true}} diff --git a/tests/data/simple_table/_delta_log/00000000000000000004.json b/tests/data/simple_table/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..8911adfbb2 --- /dev/null +++ b/tests/data/simple_table/_delta_log/00000000000000000004.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1587968626537,"operation":"DELETE","operationParameters":{"predicate":"[\"((`id` % CAST(2 AS BIGINT)) = CAST(0 AS BIGINT))\"]"},"readVersion":3,"isBlindAppend":false}} +{"remove":{"path":"part-00001-bb70d2ba-c196-4df2-9c85-f34969ad3aa9-c000.snappy.parquet","deletionTimestamp":1587968626536,"dataChange":true}} +{"remove":{"path":"part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet","deletionTimestamp":1587968626536,"dataChange":true}} +{"add":{"path":"part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet","partitionValues":{},"size":262,"modificationTime":1587968626000,"dataChange":true}} diff --git a/tests/data/simple_table/part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet b/tests/data/simple_table/part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..37061709636c6f5050fdadc50e72d8549ec85e03 GIT binary patch literal 262 zcmXw#ze~h06vsnxytUFohY}Dxs3EbKyL59Cr{e|}IgX@xJ&Wm&OHxHj|8XayoWmR5 z`+R@E+db}C9Hqt6%8k;#A6@+hos1(CU)@AW)M|8%(p8!z>%pFg9A)6-PfFa`kl;B_ z;E5eRNe5nP>1FaVWB)Y*uCViVsy!}n(}K|sSFbbfW%r9(9nKe-8TwP$`pFonOrXIA z1^y4j)x0c+1trgyy1UNiA7EWO^f@Dp5|-bpsWm#6TcMk6BSd+xI5eD>rKX&60S~km VY9lsOfzoW9W*I{0JIpS8;0O1OPTT+h literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet b/tests/data/simple_table/part-00000-a72b1fb3-f2df-41fe-a8f0-e65b746382dd-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..37061709636c6f5050fdadc50e72d8549ec85e03 GIT binary patch literal 262 zcmXw#ze~h06vsnxytUFohY}Dxs3EbKyL59Cr{e|}IgX@xJ&Wm&OHxHj|8XayoWmR5 z`+R@E+db}C9Hqt6%8k;#A6@+hos1(CU)@AW)M|8%(p8!z>%pFg9A)6-PfFa`kl;B_ z;E5eRNe5nP>1FaVWB)Y*uCViVsy!}n(}K|sSFbbfW%r9(9nKe-8TwP$`pFonOrXIA z1^y4j)x0c+1trgyy1UNiA7EWO^f@Dp5|-bpsWm#6TcMk6BSd+xI5eD>rKX&60S~km VY9lsOfzoW9W*I{0JIpS8;0O1OPTT+h literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet b/tests/data/simple_table/part-00000-a922ea3b-ffc2-4ca1-9074-a278c24c4449-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..37061709636c6f5050fdadc50e72d8549ec85e03 GIT binary patch literal 262 zcmXw#ze~h06vsnxytUFohY}Dxs3EbKyL59Cr{e|}IgX@xJ&Wm&OHxHj|8XayoWmR5 z`+R@E+db}C9Hqt6%8k;#A6@+hos1(CU)@AW)M|8%(p8!z>%pFg9A)6-PfFa`kl;B_ z;E5eRNe5nP>1FaVWB)Y*uCViVsy!}n(}K|sSFbbfW%r9(9nKe-8TwP$`pFonOrXIA z1^y4j)x0c+1trgyy1UNiA7EWO^f@Dp5|-bpsWm#6TcMk6BSd+xI5eD>rKX&60S~km VY9lsOfzoW9W*I{0JIpS8;0O1OPTT+h literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet b/tests/data/simple_table/part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..37061709636c6f5050fdadc50e72d8549ec85e03 GIT binary patch literal 262 zcmXw#ze~h06vsnxytUFohY}Dxs3EbKyL59Cr{e|}IgX@xJ&Wm&OHxHj|8XayoWmR5 z`+R@E+db}C9Hqt6%8k;#A6@+hos1(CU)@AW)M|8%(p8!z>%pFg9A)6-PfFa`kl;B_ z;E5eRNe5nP>1FaVWB)Y*uCViVsy!}n(}K|sSFbbfW%r9(9nKe-8TwP$`pFonOrXIA z1^y4j)x0c+1trgyy1UNiA7EWO^f@Dp5|-bpsWm#6TcMk6BSd+xI5eD>rKX&60S~km VY9lsOfzoW9W*I{0JIpS8;0O1OPTT+h literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet b/tests/data/simple_table/part-00000-f17fcbf5-e0dc-40ba-adae-ce66d1fcaef6-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..466ecb955860fdef9846accbb4a7f79bc1debe46 GIT binary patch literal 429 zcmZ`$&r8EF82uW@=s~=tG>}6HI5w!Eb*@{77jNQeh-uGU}?CLhefGPHTj11^vo4WQ3{GJ1Hp#KMJrcN%9xsR;91S8^KAJu{eZ0M3Li;;tzPs{nTdiLeP@__y^C)5N6vmRh8 zM?k1MD60h*OJSV(PY7PGG&gBht!j8xzVlo)LW)MX_iNUCEESuwx~bZE!+e%zO3Ru} zAJ(i8`L>lwL+{3PSu9Ldv|5Wqo34>5pY0d2>3Xwy+<<$tvpdTx?@d*8Ru*2!!+?ig yKWQ^9Ju!)8I!WVrc%DQmjiTXD22l{jY81@lWE_u!L?tpD5K7JK?ks?r`KWKLq*>$u literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet b/tests/data/simple_table/part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2964979ef6db7dca39bfa9bbe4d6dae04308352b GIT binary patch literal 429 zcmZ`$&r8EF7)|3GJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baWPjQtxP{A>ND z8@p%@;eGG>-uGU}?D{Swh%n;$1Ow<2o4NKe`aK8ap#C2$Or2h0xR0#8gkb95oYYHM zy*}6T7oAIj5$T*^7iS7Nm@>+GHX|K6vQd|AS%Mwxd|J+z)w8b-mIw5YIb{$S!Fq(Y za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkweOUA6W2yO+*G<*V8}9Qo(?->N z`ncwW%(tyf0lgbDWwC&&XpNDHfiB3j&-V-YbiLU;ZIFArvpdf#?@d>ARu*0;!a#&x yKWQ_gJUNL~I!WVrc#%XpjiTXD1yK;idKAp#WE_u!L?6THq>_zvA2+=DSZY4ybyKzThWk9tv{5ym zK5cj*^Ia=bK<~#)SuCI`T4Q8lpbIkX^TR?u-E6ndTjbvD?Jn}lTkERM%EAjp7>Lm8 yCv9ewCnvE=CutlHFOx{8Q8XN?APS;bkAiuejN@^T=tPAB${4)v!9r*TAN38y^jY=* literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet b/tests/data/simple_table/part-00001-c506e79a-0bf8-4e2b-a42b-9731b2e490ae-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4cb7aae46305259afd5baa77685636cf4b1261e0 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&3oI2689?#|AaD&UI_?;!Qja5icT=F6k_$?baWPjQtxP{A>Lt z8@p%@;pKhb``$}3zqtxdjQ{DXzIvnveyiFJ?=O#R!_W+m&l zmuB&*3rR2{y>sm1Tp(<>eC%du!Eg1%lWnj_Vv;7i2gCB3<4tTdWf&p}i&0O0;zw3w`)c=EpuCpr)`-yds5KR5s(|ReZ zx0ibUs&h#&BAs*W;!Gh2Q$|_OW~4($HtNzHOR$5TPs{nTdiM3v@`(O1rwjrk7?03S zjuB<Lm8 yCv9ewCnvE=CutlHFOx{8Q8XN?APS;bkAiuejN@^T=tPAB${5`40Umb-AN398aaJ<` literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet b/tests/data/simple_table/part-00003-53f42606-6cda-4f13-8d07-599a21197296-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a1caecfbace73080e53a4875572a74b87c91aca7 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baU?8T&Up_}BVP zH+In+!u#I$z3;t{+0A`O5Mjjg2?o$5HgoMR`aMVFp#C2$Or2d}xKFHugkb95p4LlQ zy}i`)SDj0O5$T*`7iS7Nm@>+GHX|K6vQd}rSb`nwd|J+z)w8dUmPhoDIb{$S!Fq&t za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkwecbToW2yO+*G<*V8}9Qo(?->N z`n2JN%y+F!0lgnHWwC&&XpNDHfiB3j&kqavbhF()Z;^Yqx4XzIZ>_63D+@0aVIV@U ypR}1#o}9!gouqL*yi6jUM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%t2MeJYeAG7|$W~wg literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet b/tests/data/simple_table/part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1cd670d1938d11793908ad9575052e04c257dcbe GIT binary patch literal 429 zcmZ`$&r8EF7)|4tJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baWPjQtxP{A>ND z8@p%@;eGG>-uGU}?D{Swh%n;$1Ow<2o4NKe`aK8ap#C2$Or2h0xR0#8gkb95oYYHM zy*}6T7oAIj5$T*^7iS7Nm@>+GHX|K6vQd|AS%Mwxd|J+z)w8b-mIw5YIb{$S!Fq(Y za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkweOUA6W2yO+*G<*V8}9Qo(?->N z`ncwW%(tyf0lgbDWwC&&XpNDHfiB3j&-V-YbiLU;ZIFArvpdf#?@d>ARu*0;!a#&x yKWQ_gJUNL~I!WVrc#%XpjiTXD1yK;idKAp#WE_u!L?}6HI5w!Eb*`?%i#PE!M7)Sdx}>w1wp)KFGWKtH@UJ!1 zja@XSm-oH*y%#dSz6&v6iaj4A1Nzveu8rXL9FPOUKUkwWx#+}4*4}~<@o$dnm8f2y z>&1)81xA2-r^rQ_1P2jH=)k79M$T>l&AL?$ugZ6xt42uC2=`&bnva!YGgdcMyJ(ot(oAVtv)SW@ z6(a9OnKblHrpsb!ZbhrLNVJ(6oATLyA)9Ts+ovtKx4XEryz<^uRcB@4g**&+=na!L w)6x^uSfqRoq|zuFk7W=AQLHAxB2F&j%OFvS3`c}g^SXNrU~WF@8}S)dJOBUy literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet b/tests/data/simple_table/part-00004-95c9bc2c-ac85-4581-b3cc-84502b0c314f-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5770a94ab6136340e11bdadaaeeba5dcf36b772f GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ%|@cX&{FZaBNUR>s+@EFW$t{5b+`+>5|T3+HU=!$k@N(!N1mT zy0MGq5Z?E`?|tuu%&zZ3f(Rp?PcVQkv6*Y1qTh2s4(k8G!qmwnhWp6cO9-a^&2hby z)$4OTf6=)l7?I8yc5$YVgDIn|XEV~FBO7(;mL=H1&Zp&kSv~veV0l3Qm{SIU5v)gO zE60d3cTiRfA(s-I#ZL%PuZ(~+t5!FnD&Iw}8>wU?-G?=AK9-tKdEHd)yx~4iGi_AO zr;lr1$b8$%6wtddQx*%Tiq;sJ80dmb`+UEUPuH8x(+0V>JG=9|^4@e+XJz4qA`C?6 z^^-O;%9E2=rIR#{hZjkt(Lt z8@p%@;pKhb``$}3zqtxdjQ{DXzIvnveyiFJ?=O#R!_W+m&l zmuB&*3rR2{y>sm1Tp(<>eC%du!Eg1%lWnj_Vv;7i2gCB3<4t}6HI5w!Eb*@{77jNQeh*fmWoYT-Bj(oVLnSUrDe^g zPa9T!>8U^z>8OP%wQHcx(gi`am2Mb_kKI$7c7*=us literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet b/tests/data/simple_table/part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ec63649165cdab46a6642fe3e32f8be13979b618 GIT binary patch literal 429 zcmZ`$&r8EF82y@!(SvwPX&{FZaBNUR>s+@EFW$t{5b+`+>5|T3+HU=!$k@N(!N1l_ zH+In+!u#I$z3;t{`So3h0aNVx7#YyRHg)X|{GKCnVDJZPrcN)BxjWWDf)Vj=PU@AY zUZ3m5i^>H?fV*eNMVSN#5lU#^rnpPSHuT7j#mGU_gD8k%H4YYWGKnWaq7oSn38m(B_ZGn1eAGAm!B#~8 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet b/tests/data/simple_table/part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..533801e17f8f556a8e034d9bd7f7411d33ed8dfe GIT binary patch literal 429 zcmZ`$&r8EF7)|3C6~tRg138p{V}lx6=el)x@g|;zh!+t_mvk1>cIyvC#{LZt{TdWf&p}i&0PB!{hkAIQ2!4WrcN(0+(*`4LNN7jPU@ws zUZ3mvi_Rs%h;+`di!+5BOc`Z8n~@G3*{DmmEWr+TJ}u|V>e*KZ%LDqyoH7WEU_C-x zIYyMZgR)u(xs>26enN!xbw4flDPX`^aB zeO&WG=G#`LfZmOnvRFV>w8qH9Ko?}%=lg|xy54M_Hpso**`4Q=_ok~lD+@0aVIV@U ypR}1#o}9!gouqL*yhtLQM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%tdkdi%eAG8Wz*c(z literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet b/tests/data/simple_table/part-00007-94f725e2-3963-4b00-9e83-e31021a93cf9-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea2985c2bc2c2730d8d4442a6eb4e120b8e51c10 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baU?8T&Up_}BVP zH+In+!u#I$z3;t{+0A`O5Mjjg2?o$5HgoL``aMVFp#C2$Or2d}xKFHugkb95p4LlQ zy}i`)SDj0O5$T*`7iS7Nm@>+GHX|K6vQd}rSb`nwd|J+z)w8dUmPhoDIb{$S!Fq&t za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkwecbToW2yO+*G<*V8}9Qo(?->N z`n2JN%y+F!0lgnHWwC&&XpNDHfiB3j&kqavbhF()Z;^Yqx4XzIZ>_63D+@0aVIV@U ypR}1#o}9!gouqL*yi6jUM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%t2MeJYeAG7xcvemT literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet b/tests/data/simple_table/part-00011-42f838f9-a911-40af-98f5-2fccfa1b123f-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1cd670d1938d11793908ad9575052e04c257dcbe GIT binary patch literal 429 zcmZ`$&r8EF7)|4tJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baWPjQtxP{A>ND z8@p%@;eGG>-uGU}?D{Swh%n;$1Ow<2o4NKe`aK8ap#C2$Or2h0xR0#8gkb95oYYHM zy*}6T7oAIj5$T*^7iS7Nm@>+GHX|K6vQd|AS%Mwxd|J+z)w8b-mIw5YIb{$S!Fq(Y za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkweOUA6W2yO+*G<*V8}9Qo(?->N z`ncwW%(tyf0lgbDWwC&&XpNDHfiB3j&-V-YbiLU;ZIFArvpdf#?@d>ARu*0;!a#&x yKWQ_gJUNL~I!WVrc#%XpjiTXD1yK;idKAp#WE_u!L?+GHX|K6vQd}rSb`nwd|J+z)w8dUmPhoDIb{$S!Fq&t za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkwecbToW2yO+*G<*V8}9Qo(?->N z`n2JN%y+F!0lgnHWwC&&XpNDHfiB3j&kqavbhF()Z;^Yqx4XzIZ>_63D+@0aVIV@U ypR}1#o}9!gouqL*yi6jUM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%t2MeJYeAG7|$W~wg literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet b/tests/data/simple_table/part-00049-d3095817-de74-49c1-a888-81565a40161d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..533801e17f8f556a8e034d9bd7f7411d33ed8dfe GIT binary patch literal 429 zcmZ`$&r8EF7)|3C6~tRg138p{V}lx6=el)x@g|;zh!+t_mvk1>cIyvC#{LZt{TdWf&p}i&0PB!{hkAIQ2!4WrcN(0+(*`4LNN7jPU@ws zUZ3mvi_Rs%h;+`di!+5BOc`Z8n~@G3*{DmmEWr+TJ}u|V>e*KZ%LDqyoH7WEU_C-x zIYyMZgR)u(xs>26enN!xbw4flDPX`^aB zeO&WG=G#`LfZmOnvRFV>w8qH9Ko?}%=lg|xy54M_Hpso**`4Q=_ok~lD+@0aVIV@U ypR}1#o}9!gouqL*yhtLQM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%tdkdi%eAG8Wz*c(z literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet b/tests/data/simple_table/part-00058-b462c4cb-0c48-4148-8475-e21d2a2935f8-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..17a7a308f42d4f4eb92ddbf67b9c813a00c6921c GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&52TrGXquz_CFMt#jQvym%8&L&S@Sq)R%BX}k4@B4huC2me~X z>BcUaLwMi&zW2QsGP}MD2_lSmKEVLG#AdF2ihj=lIjH{!3sWbT815r$FCm!vH^=o- zRND z8@p%@;eGG>-uGU}?D{Swh%n;$1Ow<2o4NKe`aK8ap#C2$Or2h0xR0#8gkb95oYYHM zy*}6T7oAIj5$T*^7iS7Nm@>+GHX|K6vQd|AS%Mwxd|J+z)w8b-mIw5YIb{$S!Fq(Y za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkweOUA6W2yO+*G<*V8}9Qo(?->N z`ncwW%(tyf0lgbDWwC&&XpNDHfiB3j&-V-YbiLU;ZIFArvpdf#?@d>ARu*0;!a#&x yKWQ_gJUNL~I!WVrc#%XpjiTXD1yK;idKAp#WE_u!L?TdWf&p}i&0O0;zw3w`)c=EpuCpr)`-yds5KR5s(|ReZ zx0ibUs&h#&BAs*W;!Gh2Q$|_OW~4($HtNzHOR$5TPs{nTdiM3v@`(O1rwjrk7?03S zjuB<Lm8 yCv9ewCnvE=CutlHFOx{8Q8XN?APS;bkAiuejN@^T=tPAB${5`40Umb-AN398aaJ<` literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet b/tests/data/simple_table/part-00077-2fcb1c7c-5390-48ee-93f6-0acf11199a0d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..df7f737162902c4b54c83142f6af578074176961 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CMZ{Z5138p{V}lx6=el)x@g|;zh!+t_mvk1>cIyvC#{LZt{TdWf&p}i&0PB!{hkAIQ2!4WrcN(0+(*`4LNN7jPU@ws zUZ3mvi_Rs%h;+`di!+5BOc`Z8n~@G3*{DmmEWr+TJ}u|V>e*KZ%LDqyoH7WEU_C-x zIYyMZgR)u(xs>26enN!xbw4flDPX`^aB zeO&WG=G#`LfZmOnvRFV>w8qH9Ko?}%=lg|xy54M_Hpso**`4Q=_ok~lD+@0aVIV@U ypR}1#o}9!gouqL*yhtLQM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%tdkdi%eAG8iXjX&( literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet b/tests/data/simple_table/part-00107-3f6c2aa0-fc28-4f4c-be15-135e15b398f4-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ec63649165cdab46a6642fe3e32f8be13979b618 GIT binary patch literal 429 zcmZ`$&r8EF82y@!(SvwPX&{FZaBNUR>s+@EFW$t{5b+`+>5|T3+HU=!$k@N(!N1l_ zH+In+!u#I$z3;t{`So3h0aNVx7#YyRHg)X|{GKCnVDJZPrcN)BxjWWDf)Vj=PU@AY zUZ3m5i^>H?fV*eNMVSN#5lU#^rnpPSHuT7j#mGU_gD8k%H4YYWGKnWaq7oSn38m(B_ZGn1eAGAm!B#~8 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet b/tests/data/simple_table/part-00112-07fd790a-11dc-4fde-9acd-623e740be992-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b5e5edad5e55405cda49241ab0bfa6bc71237653 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJq&Lt4dhS)jty#Po$J=&#hZ8f2XG%Om>7oH7WEU_C-R zIYyMZ!?Icmxsu>4enN!xZK4flDPX`^aB zd))9s=DSv=fZmUpvRFb@w8qH9Ko?}%=ZA%Sw%Kl}6HI5w!Eb*@{77jNQeh*fmWoYT-Bj(oVLnSUrDe^g zPa9T!>8U^z>8OP%wQHcx(gi`am2Mb_kKI$7c7*=us literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet b/tests/data/simple_table/part-00121-d8bc3e53-d2f2-48ce-9909-78da7294ffbd-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..960d2b6e47cb3b2079d3b15800927e18e342929a GIT binary patch literal 429 zcmZ`$&r8EF7)|3C1;txR138p{V}lx6=el)x@g|;zh!+t_mvk1>cIyvC#{LZt{TdWf&p}i&0PB!{hkAIQ2!4WrcN(0+(*`4LNN7jPU@ws zUZ3mvi_Rs%h;+`di!+5BOc`Z8n~@G3*{DmmEWr+TJ}u|V>e*KZ%LDqyoH7WEU_C-x zIYyMZgR)u(xs>26enN!xbw4flDPX`^aB zeO&WG=G#`LfZmOnvRFV>w8qH9Ko?}%=lg|xy54M_Hpso**`4Q=_ok~lD+@0aVIV@U ypR}1#o}9!gouqL*yhtLQM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%tdkdi%eAG8u5LS%< literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet b/tests/data/simple_table/part-00128-b31c3b81-24da-4a90-a8b4-578c9e9a218d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3b91e5be30c42fa560fdfce67f347a692072b1fb GIT binary patch literal 429 zcmZ`$&r8EF82uW@=s~=tG>}6HI5w!Eb*`?%i#PE!M7)Sdx}>w1wp)KFGWKtH@UJ!1 zja@XSm-oH*y%#dSz6&v6iaj4A1Nzveu8rXL9FPOUKUkwWx#+}4*4}~<@o$dnm8f2y z>&1)81xA2-r^rQ_1P2jH=)k79M$T>l&AL?$ugZ6xt42uC2=`&bnva!YGgdcMyJ(ot(oAVtv)SW@ z6(a9OnKblHrpsb!ZbhrLNVJ(6oATLyA)9Ts+ovtKx4XEryz<^uRcB@4g**&+=na!L w)6x^uSfqRoq|zuFk7W=AQLHAxB2F&j%OFvS3`c}g^SXNrU~WF@8}S)dJOBUy literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet b/tests/data/simple_table/part-00140-e9b1971d-d708-43fb-b07f-975d2226b800-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea2985c2bc2c2730d8d4442a6eb4e120b8e51c10 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&3oI2689?#|AaD&UNeX;!Qja5icT=F6k_$?baU?8T&Up_}BVP zH+In+!u#I$z3;t{+0A`O5Mjjg2?o$5HgoL``aMVFp#C2$Or2d}xKFHugkb95p4LlQ zy}i`)SDj0O5$T*`7iS7Nm@>+GHX|K6vQd}rSb`nwd|J+z)w8dUmPhoDIb{$S!Fq&t za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkwecbToW2yO+*G<*V8}9Qo(?->N z`n2JN%y+F!0lgnHWwC&&XpNDHfiB3j&kqavbhF()Z;^Yqx4XzIZ>_63D+@0aVIV@U ypR}1#o}9!gouqL*yi6jUM$vGnf+&b$JqqS=GLFYVq7xMkC}Z%t2MeJYeAG7xcvemT literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet b/tests/data/simple_table/part-00143-03ceb88e-5283-4193-aa43-993cdf937fd3-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..25fe1a24f16f5e8a38ed3c836e4bb61f42ae8138 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ*c;o2689?#|AaD&UNeX;!Qja5icT=F6k_$?baWPjQtxP{A>ND z8@p%@;eGG>-uGU}?D{Swh%n;$1Ow<2o4NK0`aK8ap#C2$Or2h0xR0#8gkb959M?-( zy*}6T7oAIj5$T*^7iS7Nm@>+GHX|K6vQd|AS%Mwxd|J+z)w8b-mIw5YIb{$S!Fq(Y za*QZ*2W7Poaw)-C{Dcto$_PlaYIP&3@?GS*kxDkweOUA6W2yO+*G<*V8}9Qo(?->N z`ncwW%(tyf0lgbDWwC&&XpNDHfiB3j&-V-YbiLU;ZIFArvpdf#?@d>ARu*0;!a#&x yKWQ_gJUNL~I!WVrc#%XpjiTXD1yK;idKAp#WE_u!L?BcUaLwMi&zW2QsGP}MD2_lSmKEVLG#AdF2ihj=lIjH{!3sWbT815r$FCm!vH^=o- zRs+@EFW$t{5b+`+>5|T3+HU=!$k@N(!N1l_ zH+In+!u#I$z3;t{`So3h0aNVx7#YyTHg)YD{GKCnVDJZPrh1pi+$YvSf)Vj=PV1GZ zUZ3m5i^>H?fIDZ%MVSN#5lU#^rnp1KHgw63#mGU_gD8k%H4YYWGKnWaq7oSn38m(B_ZGn1eAG962v(>7 literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet b/tests/data/simple_table/part-00164-bf40481c-4afd-4c02-befa-90f056c2d77a-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c3cf2f57ca0058ef3d77ac1fa8c1e569a05480d2 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&2xC8pxpp92?ZoI@hhki#PE!M7)Sdx}>w1wp)KFGWKtH@UQio zZtS8tg!jGgd*6E@v+KK%Ai{{}6AYkBZ06dh==U6ugZh84Fm-Z?;XbnV5`w9Jb6hWF z_4-`TUvw@BMx=9wU7RW8V9F@#*^G4P$VOedWeIk$^JzI>R?ogVSRT+n=9EET1nUvn z$}ytM9hB8V$fX2l@e@MSDEoIgGT*i`1@vyrl*Iz7qBTY)2D%{AKHo3o)AeTav_bCe&h9*~yfV2p6+}T4>rpU|lW{x_5}l}UKpBJA-CGFF;G@0)h_F_% literal 0 HcmV?d00001 diff --git a/tests/data/simple_table/part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet b/tests/data/simple_table/part-00190-8ac0ae67-fb1d-461d-a3d3-8dc112766ff5-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..be5bf31a8b174d20f341bff6689da4770272c5c5 GIT binary patch literal 429 zcmZ`$&r8EF7)|3CJ&1ToX&{FZaBNUR>s+@EFW$t{5b+`+>5|T3+HU=!$k@N(!N1mT zy0MGq5Z?E`?|tuu%&zZ3f(Rp?PcVQkv6*Y1qTh2s4(k8G!qmwnhWp6cO9-a^&2hby z)$4OTf6=)l7?I8yc5$YVgDIn|XEV~FBO7(;mL=H1&Zp&kSv~veV0l3Qm{SIU5v)gO zE60d3cTiRfA(s-I#ZL%PuZ(~+t5!FnD&Iw}8>wU?-G?=AK9-tKdEHd)yx~4iGi_AO zr;lr1$b8$%6wtddQx*%Tiq;sJ80dmb`+UEUPuH8x(+0V>JG=9|^4@e+XJz4qA`C?6 z^^-O;%9E2=rIR#{hZjkt(