From 092195f963526200e8dafb60d5167e36589ec500 Mon Sep 17 00:00:00 2001 From: Pedro Rico Pinazo Date: Mon, 2 Dec 2024 17:01:37 +0000 Subject: [PATCH] add lotr_graph_with_props function (#1881) --- raphtory/src/graph_loader/lotr_graph.rs | 83 +++++++++++++++----- raphtory/src/python/packages/base_modules.rs | 1 + raphtory/src/python/packages/graph_loader.rs | 6 ++ 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/raphtory/src/graph_loader/lotr_graph.rs b/raphtory/src/graph_loader/lotr_graph.rs index 5dcc3ee672..372b60fbcb 100644 --- a/raphtory/src/graph_loader/lotr_graph.rs +++ b/raphtory/src/graph_loader/lotr_graph.rs @@ -23,7 +23,7 @@ //! ``` use crate::{graph_loader::fetch_file, io::csv_loader::CsvLoader, prelude::*}; use serde::Deserialize; -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf}; use tracing::error; #[derive(Deserialize, std::fmt::Debug)] @@ -47,6 +47,27 @@ pub fn lotr_file() -> Result> { ) } +#[derive(Deserialize, std::fmt::Debug)] +struct Character { + pub name: String, + pub race: String, + pub gender: String, +} + +/// Downloads the LOTR.csv file from Github +/// and returns the path to the file +/// +/// Returns: +/// - A PathBuf to the LOTR.csv file +fn lotr_properties_file() -> Result> { + fetch_file( + "lotr_properties.csv", + true, + "https://raw.githubusercontent.com/Raphtory/Data/main/lotr_properties.csv", + 600, + ) +} + /// Constructs a graph from the LOTR dataset /// Including all edges, nodes and timestamps /// @@ -57,26 +78,46 @@ pub fn lotr_file() -> Result> { /// Returns: /// - A Graph containing the LOTR dataset pub fn lotr_graph() -> Graph { - let graph = { - let g = Graph::new(); + let g = Graph::new(); + CsvLoader::new(lotr_file().unwrap()) + .load_into_graph(&g, |lotr: Lotr, g: &Graph| { + let src_id = lotr.src_id; + let dst_id = lotr.dst_id; + let time = lotr.time; - CsvLoader::new(lotr_file().unwrap()) - .load_into_graph(&g, |lotr: Lotr, g: &Graph| { - let src_id = lotr.src_id; - let dst_id = lotr.dst_id; - let time = lotr.time; + g.add_node(time, src_id.clone(), NO_PROPS, None) + .map_err(|err| error!("{:?}", err)) + .ok(); + g.add_node(time, dst_id.clone(), NO_PROPS, None) + .map_err(|err| error!("{:?}", err)) + .ok(); + g.add_edge(time, src_id.clone(), dst_id.clone(), NO_PROPS, None) + .expect("Error: Unable to add edge"); + }) + .expect("Failed to load graph from CSV data files"); + g +} - g.add_node(time, src_id.clone(), NO_PROPS, None) - .map_err(|err| error!("{:?}", err)) - .ok(); - g.add_node(time, dst_id.clone(), NO_PROPS, None) - .map_err(|err| error!("{:?}", err)) - .ok(); - g.add_edge(time, src_id.clone(), dst_id.clone(), NO_PROPS, None) - .expect("Error: Unable to add edge"); - }) - .expect("Failed to load graph from CSV data files"); - g - }; - graph +/// Constructs a graph from the LOTR dataset +/// Including all edges, nodes, and timestamps with some node types +/// +/// # Arguments +/// +/// - shards: The number of shards to use for the graph +/// +/// Returns: +/// - A Graph containing the LOTR dataset +pub fn lotr_graph_with_props() -> Graph { + let g = lotr_graph(); + CsvLoader::new(lotr_properties_file().unwrap()) + .load_into_graph(&g, |char: Character, g: &Graph| { + if let Some(node) = g.node(char.name) { + let _ = node.add_constant_properties(HashMap::from([ + ("race", char.race), + ("gender", char.gender), + ])); + } + }) + .expect("Failed to load graph from CSV data files"); + g } diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 9422667dae..70cf7d14bc 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -118,6 +118,7 @@ pub fn base_graph_loader_module(py: Python<'_>) -> Result, PyErr add_functions!( &graph_loader_module, lotr_graph, + lotr_graph_with_props, neo4j_movie_graph, stable_coin_graph, reddit_hyperlink_graph, diff --git a/raphtory/src/python/packages/graph_loader.rs b/raphtory/src/python/packages/graph_loader.rs index 3ed52dae8f..e2b14c27df 100644 --- a/raphtory/src/python/packages/graph_loader.rs +++ b/raphtory/src/python/packages/graph_loader.rs @@ -29,6 +29,12 @@ pub fn lotr_graph() -> PyResult> { PyGraph::py_from_db_graph(crate::graph_loader::lotr_graph::lotr_graph()) } +/// Same as `lotr_graph()` but with additional properties race and gender for some of the nodes +#[pyfunction] +pub fn lotr_graph_with_props() -> PyResult> { + PyGraph::py_from_db_graph(crate::graph_loader::lotr_graph::lotr_graph_with_props()) +} + /// Load (a subset of) Reddit hyperlinks dataset into a graph. /// The dataset is available at http://snap.stanford.edu/data/soc-redditHyperlinks-title.tsv /// The hyperlink network represents the directed connections between two subreddits (a subreddit