Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disk merge #1678

Merged
merged 26 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
61c2e42
less string clones in node lookups
ljeub-pometry Jun 11, 2024
27fd5d4
add public interface for disk merge and start testing
ljeub-pometry Jul 1, 2024
1a78f5b
more tests
ljeub-pometry Jul 1, 2024
dc32422
actually write all the benchmark results again
ljeub-pometry Jul 2, 2024
a5c0f60
clean up debug output and fix merging bugs
ljeub-pometry Jul 3, 2024
ecea53a
move DiskGraph struct up one level
ljeub-pometry Jul 3, 2024
30cca6b
Expose the merge method to python
ljeub-pometry Jul 4, 2024
a9d8a7c
update storage
ljeub-pometry Jul 4, 2024
7f0630a
fix the features
ljeub-pometry Jul 5, 2024
f097d23
add graph_dir method to DiskGraph
ljeub-pometry Jul 5, 2024
2ba2be3
update submodule
ljeub-pometry Jul 5, 2024
1e9407e
minor fixes
ljeub-pometry Jul 5, 2024
83f233b
try to stop the 143 failures
ljeub-pometry Jul 5, 2024
60cc519
fix the workflow
ljeub-pometry Jul 5, 2024
1e6fdbf
more workflow tweaks
ljeub-pometry Jul 5, 2024
2826f9f
verbose output
ljeub-pometry Jul 5, 2024
33c5d9f
does older ubuntu have the same problem?
ljeub-pometry Jul 5, 2024
fbeab7e
update submodule
ljeub-pometry Jul 8, 2024
3eaa2a4
update submodule
ljeub-pometry Jul 10, 2024
9df678e
update submodule
ljeub-pometry Jul 12, 2024
b8fc2f3
update submodule
ljeub-pometry Jul 12, 2024
3a423b1
Merge master into disk_merge
ljeub-pometry Jul 12, 2024
f979721
update submodule
ljeub-pometry Jul 12, 2024
8217af4
try to fix linker running out of memory
ljeub-pometry Jul 15, 2024
e7504c6
install lld
ljeub-pometry Jul 15, 2024
025bb40
try to get protoc on readthedocs
ljeub-pometry Jul 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ jobs:
- name: Run benchmark (Unix)
run: |
set -o pipefail
cargo bench --bench base -p raphtory-benchmark -- --output-format=bencher | tee benchmark-result.txt
cargo bench --bench algobench -p raphtory-benchmark -- --output-format=bencher | tee benchmark-result.txt
cargo bench --bench base --bench algobench -p raphtory-benchmark -- --output-format=bencher | tee benchmark-result.txt
- name: Delete cargo.lock if it exists
run: |
rm -f Cargo.lock
Expand Down
14 changes: 9 additions & 5 deletions .github/workflows/test_rust_disk_storage_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
strategy:
matrix:
include:
- os: macos-latest
- os: ubuntu-20.04
- os: windows-latest
- { os: macos-latest, flags: "" }
- { os: ubuntu-20.04, flags: "-C link-arg=-fuse-ld=lld" }
- { os: windows-latest, flags: "" }
steps:
- uses: maxim-lobanov/setup-xcode@v1
name: Xcode version
Expand All @@ -43,6 +43,10 @@ jobs:
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Install LLD
if: "contains(matrix.os, 'ubuntu')"
run: |
sudo apt-get install lld
- uses: webfactory/[email protected]
name: Load pometry-storage key
with:
Expand All @@ -56,14 +60,14 @@ jobs:
- name: Install Protoc
uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install nextest
uses: taiki-e/install-action@nextest
- name: Activate pometry-storage in Cargo.toml
run: make pull-storage
- name: Run all Tests (disk_graph)
env:
RUSTFLAGS: -Awarnings
RUSTFLAGS: -Awarnings ${{ matrix.flags }}
TEMPDIR: ${{ runner.temp }}
run: |
cargo nextest run --all --no-default-features --features "storage"
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ display-error-chain = "0.2.0"
polars-arrow = "0.39.2"
polars-parquet = "0.39.2"
polars-utils = "0.39.2"
kdam = { version = "0.5.1", features = ["notebook"] }
kdam = { version = "0.5.1" }
pretty_assertions = "1.4.0"
quickcheck = "1.0.3"
quickcheck_macros = "1.0.0"
Expand Down
7 changes: 0 additions & 7 deletions examples/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,3 @@ name = "crypto"

[[bin]]
name = "pokec"

[target.x86_64-unknown-linux-gnu]
linker = "/usr/bin/clang"
rustflags = ["-Clink-arg=-fuse-ld=lld", "-Clink-arg=-Wl,--no-rosegment"]

[profile.release]
debug = true
2 changes: 1 addition & 1 deletion pometry-storage-private
2 changes: 0 additions & 2 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(non_local_definitions)]

extern crate core;
use pyo3::prelude::*;
use raphtory_core::python::packages::base_modules::{
Expand Down
2 changes: 1 addition & 1 deletion raphtory-cypher/examples/raphtory_cypher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ mod cypher {
use arrow::util::pretty::print_batches;
use clap::Parser;
use futures::{stream, StreamExt};
use raphtory::disk_graph::graph_impl::{DiskGraph, ParquetLayerCols};
use raphtory::disk_graph::{graph_impl::ParquetLayerCols, DiskGraph};
use raphtory_cypher::{run_cypher, run_cypher_to_streams, run_sql};
use serde::{de::DeserializeOwned, Deserialize};

Expand Down
4 changes: 2 additions & 2 deletions raphtory-cypher/src/executor/table_provider/edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use datafusion::{
};
use futures::Stream;
use pometry_storage::prelude::*;
use raphtory::disk_graph::graph_impl::DiskGraph;
use raphtory::disk_graph::DiskGraph;

use crate::executor::{arrow2_to_arrow_buf, ExecError};

Expand Down Expand Up @@ -99,7 +99,7 @@ impl EdgeListTableProvider {

fn lift_nested_arrow_schema(graph: &DiskGraph, layer_id: usize) -> Result<Arc<Schema>, ExecError> {
let arrow2_fields = graph.as_ref().layer(layer_id).edges_data_type();
let a2_dt = crate::arrow2::datatypes::ArrowDataType::Struct(arrow2_fields.clone());
let a2_dt = crate::arrow2::datatypes::ArrowDataType::Struct(arrow2_fields.to_vec());
let a_dt: DataType = a2_dt.into();
let schema = match a_dt {
DataType::Struct(fields) => {
Expand Down
2 changes: 1 addition & 1 deletion raphtory-cypher/src/executor/table_provider/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use futures::Stream;
use pometry_storage::properties::ConstProps;
use raphtory::{
core::entities::VID,
disk_graph::{graph_impl::DiskGraph, prelude::*},
disk_graph::{prelude::*, DiskGraph},
};
use std::{any::Any, fmt::Formatter, sync::Arc};

Expand Down
5 changes: 2 additions & 3 deletions raphtory-cypher/src/hop/execution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,16 @@ use datafusion::{
use datafusion::physical_expr::Partitioning;
use futures::{Stream, StreamExt};

use crate::take_record_batch;
use pometry_storage::graph_fragment::TempColGraphFragment;
use raphtory::{
core::{entities::VID, Direction},
disk_graph::{
graph_impl::DiskGraph,
prelude::{ArrayOps, BaseArrayOps, PrimitiveCol},
DiskGraph,
},
};

use crate::take_record_batch;

use super::operator::HopPlan;

#[derive(Debug)]
Expand Down
2 changes: 1 addition & 1 deletion raphtory-cypher/src/hop/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use datafusion::{
logical_expr::{Expr, LogicalPlan, TableScan, UserDefinedLogicalNodeCore},
};

use raphtory::{core::Direction, disk_graph::graph_impl::DiskGraph};
use raphtory::{core::Direction, disk_graph::DiskGraph};

#[derive(Debug, PartialEq, Hash, Eq)]
pub struct HopPlan {
Expand Down
7 changes: 3 additions & 4 deletions raphtory-cypher/src/hop/rule.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::sync::Arc;

use crate::hop::operator::HopPlan;
use async_trait::async_trait;
use datafusion::{
common::Column,
Expand All @@ -10,9 +11,7 @@ use datafusion::{
physical_plan::ExecutionPlan,
physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner},
};
use raphtory::{core::Direction, disk_graph::graph_impl::DiskGraph};

use crate::hop::operator::HopPlan;
use raphtory::{core::Direction, disk_graph::DiskGraph};

use super::execution::HopExec;

Expand Down Expand Up @@ -141,7 +140,7 @@ impl ExtensionPlanner for HopPlanner {
#[cfg(test)]
mod test {
use arrow::util::pretty::print_batches;
use raphtory::disk_graph::graph_impl::DiskGraph;
use raphtory::disk_graph::DiskGraph;
use tempfile::tempdir;

use crate::prepare_plan;
Expand Down
8 changes: 3 additions & 5 deletions raphtory-cypher/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ mod cypher {
parser::ast::*,
*,
};
use raphtory::disk_graph::graph_impl::DiskGraph;
use raphtory::disk_graph::DiskGraph;

use crate::{
executor::table_provider::node::NodeTableProvider,
Expand Down Expand Up @@ -185,9 +185,8 @@ mod cypher {
use arrow_array::RecordBatch;
use tempfile::tempdir;

use raphtory::{disk_graph::graph_impl::DiskGraph, prelude::*};

use crate::run_cypher;
use raphtory::{disk_graph::DiskGraph, prelude::*};

lazy_static::lazy_static! {
static ref EDGES: Vec<(u64, u64, i64, f64)> = vec![
Expand Down Expand Up @@ -278,10 +277,9 @@ mod cypher {
datatypes::*,
};
use arrow::util::pretty::print_batches;
use raphtory::disk_graph::{graph_impl::ParquetLayerCols, DiskGraph};
use tempfile::tempdir;

use raphtory::disk_graph::graph_impl::{DiskGraph, ParquetLayerCols};

use crate::run_cypher;

fn schema() -> ArrowSchema {
Expand Down
9 changes: 3 additions & 6 deletions raphtory-cypher/src/transpiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use raphtory::{
Direction,
},
db::{api::properties::internal::ConstPropertiesOps, graph::node::NodeView},
disk_graph::graph_impl::DiskGraph,
disk_graph::DiskGraph,
prelude::*,
};
use sqlparser::ast::{
Expand Down Expand Up @@ -1143,18 +1143,15 @@ fn sql_like(

#[cfg(test)]
mod test {

use crate::{parser, transpiler};

use super::*;
use pretty_assertions::assert_eq;
use raphtory::{
db::{api::mutation::AdditionOps, graph::graph::Graph},
disk_graph::DiskGraph,
prelude::NO_PROPS,
};
use tempfile::tempdir;

use pretty_assertions::assert_eq;

#[test]
fn count_all_nodes() {
check_cypher_to_sql(
Expand Down
2 changes: 1 addition & 1 deletion raphtory-graphql/src/data.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use parking_lot::RwLock;
#[cfg(feature = "storage")]
use raphtory::disk_graph::graph_impl::DiskGraph;
use raphtory::disk_graph::DiskGraph;
use raphtory::{
core::Prop,
db::api::view::MaterializedGraph,
Expand Down
2 changes: 1 addition & 1 deletion raphtory-graphql/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ mod graphql_test {
use async_graphql::UploadValue;
use dynamic_graphql::{Request, Variables};
#[cfg(feature = "storage")]
use raphtory::disk_graph::graph_impl::DiskGraph;
use raphtory::disk_graph::DiskGraph;
use raphtory::{
db::{api::view::IntoDynamic, graph::views::deletion_graph::PersistentGraph},
prelude::*,
Expand Down
5 changes: 3 additions & 2 deletions raphtory/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ python = [
"dep:display-error-chain",
"polars-arrow?/compute",
"raphtory-api/python",
"dep:kdam",
"dep:rpds",
"kdam?/notebook"
]

# storage
Expand All @@ -145,7 +145,8 @@ storage = [
arrow = [
"dep:polars-arrow",
"dep:polars-parquet",
"polars-parquet?/compression"
"polars-parquet?/compression",
"dep:kdam",
]

proto = [
Expand Down
4 changes: 2 additions & 2 deletions raphtory/src/core/utils/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ pub enum GraphError {
source: std::io::Error,
},

#[cfg(feature = "python")]
#[cfg(feature = "arrow")]
#[error("Failed to load graph: {0}")]
LoadFailure(String),

#[cfg(feature = "python")]
#[cfg(feature = "arrow")]
#[error(
"Failed to load graph as the following columns are not present within the dataframe: {0}"
)]
Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/db/api/properties/props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use raphtory_api::core::storage::arc_str::ArcStr;
use std::collections::HashMap;

/// View of the properties of an entity (graph|node|edge)
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct Properties<P: PropertiesOps + Clone> {
pub(crate) props: P,
}
Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/db/api/view/internal/materialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use serde::{de::Error, Deserialize, Deserializer, Serialize};
use std::path::Path;

#[cfg(feature = "storage")]
use crate::disk_graph::graph_impl::DiskGraph;
use crate::disk_graph::DiskGraph;

#[enum_dispatch(CoreGraphOps)]
#[enum_dispatch(InternalLayerOps)]
Expand Down
14 changes: 6 additions & 8 deletions raphtory/src/db/graph/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use chrono::{DateTime, Utc};
use raphtory_api::core::storage::arc_str::ArcStr;
use std::{
fmt,
fmt::Debug,
hash::{Hash, Hasher},
sync::Arc,
};
Expand Down Expand Up @@ -72,15 +73,12 @@ impl<G, GH> AsNodeRef for NodeView<G, GH> {
}
}

impl<'graph, G, GH: GraphViewOps<'graph>> fmt::Debug for NodeView<G, GH> {
impl<'graph, G, GH: GraphViewOps<'graph> + Debug> fmt::Debug for NodeView<G, GH> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"NodeView {{ graph: {}{}, node: {} }}",
self.graph.count_nodes(),
self.graph.count_edges(),
self.node.0
)
f.debug_struct("NodeView")
.field("node", &self.node)
.field("graph", &self.graph)
.finish()
}
}

Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/disk_graph/graph_impl/const_properties_ops.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{db::api::properties::internal::ConstPropertiesOps, prelude::Prop};
use raphtory_api::core::storage::arc_str::ArcStr;

use super::DiskGraph;
use crate::disk_graph::DiskGraph;

impl ConstPropertiesOps for DiskGraph {
#[doc = " Find id for property name (note this only checks the meta-data, not if the property actually exists for the entity)"]
Expand Down
8 changes: 4 additions & 4 deletions raphtory/src/disk_graph/graph_impl/core_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ use crate::{
view::{internal::CoreGraphOps, BoxedIter},
},
disk_graph::{
graph_impl::DiskGraph,
storage_interface::{
edge::DiskOwnedEdge,
edges::DiskEdges,
node::{DiskNode, DiskOwnedNode},
nodes::DiskNodesOwned,
},
DiskGraph,
},
};
use itertools::Itertools;
use polars_arrow::datatypes::ArrowDataType;
use pometry_storage::{properties::ConstProps, GidRef, GID};
use pometry_storage::{properties::ConstProps, GidRef};
use raphtory_api::core::{input::input_node::InputNode, storage::arc_str::ArcStr};
use rayon::prelude::*;

Expand Down Expand Up @@ -121,8 +121,8 @@ impl CoreGraphOps for DiskGraph {
fn internalise_node(&self, v: NodeRef) -> Option<VID> {
match v {
NodeRef::Internal(vid) => Some(vid),
NodeRef::External(vid) => self.inner.find_node(&GID::U64(vid)),
NodeRef::ExternalStr(string) => self.inner.find_node(&GID::Str(string.into())),
NodeRef::External(vid) => self.inner.find_node(GidRef::U64(vid)),
NodeRef::ExternalStr(string) => self.inner.find_node(GidRef::Str(string)),
}
}

Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/disk_graph/graph_impl/edge_filter_ops.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::DiskGraph;
use crate::{
core::entities::LayerIds,
db::api::{storage::edges::edge_ref::EdgeStorageRef, view::internal::EdgeFilterOps},
disk_graph::DiskGraph,
};

impl EdgeFilterOps for DiskGraph {
Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/disk_graph/graph_impl/layer_ops.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::DiskGraph;
use crate::{
core::{entities::LayerIds, utils::errors::GraphError},
db::api::view::internal::InternalLayerOps,
disk_graph::DiskGraph,
prelude::Layer,
};
use itertools::Itertools;
Expand Down
2 changes: 1 addition & 1 deletion raphtory/src/disk_graph/graph_impl/list_ops.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
db::api::view::internal::{CoreGraphOps, EdgeList, ListOps, NodeList},
disk_graph::graph_impl::DiskGraph,
disk_graph::DiskGraph,
};
use rayon::prelude::*;

Expand Down
Loading
Loading