Skip to content

Commit

Permalink
Merge pull request #5 from gtfierro/gtf-python-fixes
Browse files Browse the repository at this point in the history
Gtf python fixes
  • Loading branch information
gtfierro authored Jan 23, 2025
2 parents 4cde1e0 + 4aa0a55 commit 5561be4
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 72 deletions.
7 changes: 6 additions & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,12 @@ fn main() -> Result<()> {
.get_ontology_by_name(iri.as_ref())
.ok_or(anyhow::anyhow!(format!("Ontology {} not found", iri)))?;
let closure = env.get_dependency_closure(ont.id())?;
let graph = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
let (graph, _successful, failed_imports) = env.get_union_graph(&closure, rewrite_sh_prefixes, remove_owl_imports)?;
if let Some(failed_imports) = failed_imports {
for imp in failed_imports {
eprintln!("{}", imp);
}
}
// write the graph to a file
if let Some(destination) = destination {
write_dataset_to_file(&graph, &destination)?;
Expand Down
55 changes: 41 additions & 14 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use std::fs;
use std::io::{BufReader, Write};
use std::path::Path;
use walkdir::WalkDir;
use std::fmt;
use std::fmt::{self, Display};

// custom derive for ontologies field as vec of Ontology
fn ontologies_ser<S>(
Expand All @@ -56,6 +56,23 @@ where
Ok(map)
}

pub struct FailedImport {
ontology: GraphIdentifier,
error: String,
}

impl FailedImport {
pub fn new(ontology: GraphIdentifier, error: String) -> Self {
Self { ontology, error }
}
}

impl Display for FailedImport {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Failed to import ontology {}: {}", self.ontology, self.error)
}
}

pub struct EnvironmentStatus {
// true if there is an environment that ontoenv can find
exists: bool,
Expand Down Expand Up @@ -831,25 +848,31 @@ impl OntoEnv {
Ok(closure)
}

/// Returns a graph containing the union of all graphs_ids
/// Returns a graph containing the union of all graphs_ids, along with a list of
/// graphs that could and could not be imported.
pub fn get_union_graph(
&self,
graph_ids: &[GraphIdentifier],
rewrite_sh_prefixes: Option<bool>,
remove_owl_imports: Option<bool>,
// TODO: remove_ontology_declarations
) -> Result<Dataset> {
) -> Result<(Dataset, Vec<GraphIdentifier>, Option<Vec<FailedImport>>)> {
// compute union of all graphs
let mut union: Dataset = Dataset::new();
let store = self.store();
let mut failed_imports: Vec<FailedImport> = vec![];
let mut successful_imports: Vec<GraphIdentifier> = vec![];
for id in graph_ids {
let graphname: NamedOrBlankNode = match id.graphname()? {
GraphName::NamedNode(n) => NamedOrBlankNode::NamedNode(n),
_ => continue,
};

if !store.contains_named_graph(graphname.as_ref())? {
return Err(anyhow::anyhow!("Graph not found: {:?}", id));
failed_imports.push(FailedImport {
ontology: id.clone(),
error: "Graph not found".to_string(),
});
continue;
}

let mut count = 0;
Expand Down Expand Up @@ -881,12 +904,9 @@ impl OntoEnv {
ONTOLOGY,
graphname.as_ref(),
);
if !union.remove(to_remove) {
error!("Failed to remove ontology declaration: {:?}", to_remove);
}
union.remove(to_remove);
}


successful_imports.push(id.clone());
info!("Added {} triples from graph: {:?}", count, id);
}
let first_id = graph_ids
Expand All @@ -896,15 +916,22 @@ impl OntoEnv {

// Rewrite sh:prefixes
// defaults to true if not specified
if let Some(true) = rewrite_sh_prefixes.or(Some(true)) {
if rewrite_sh_prefixes.unwrap_or(true) {
transform::rewrite_sh_prefixes(&mut union, root_ontology);
}
// remove owl:imports
if let Some(true) = remove_owl_imports.or(Some(true)) {
transform::remove_owl_imports(&mut union)
if remove_owl_imports.unwrap_or(true) {
let to_remove: Vec<NamedNodeRef> = graph_ids.iter().map(|id| id.into()).collect();
println!("Removing owl:imports: {:?}", to_remove);
transform::remove_owl_imports(&mut union, Some(&to_remove));
}
transform::remove_ontology_declarations(&mut union, root_ontology);
Ok(union)
let failed_imports = if failed_imports.is_empty() {
None
} else {
Some(failed_imports)
};
Ok((union, successful_imports, failed_imports))
}

/// Returns a list of issues with the environment
Expand Down
12 changes: 12 additions & 0 deletions lib/src/ontology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ impl std::fmt::Display for GraphIdentifier {
}
}

impl Into<NamedNode> for GraphIdentifier {
fn into(self) -> NamedNode {
self.name
}
}

impl<'a> Into<NamedNodeRef<'a>> for &'a GraphIdentifier {
fn into(self) -> NamedNodeRef<'a> {
(&self.name).into()
}
}

impl GraphIdentifier {
pub fn new(name: NamedNodeRef) -> Self {
// location is same as name
Expand Down
54 changes: 38 additions & 16 deletions lib/src/transform.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::consts::{DECLARE, IMPORTS, ONTOLOGY, PREFIXES, TYPE};
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef};
use oxigraph::model::{Dataset, Graph, Quad, QuadRef, SubjectRef, Triple, TripleRef, NamedNodeRef, TermRef};

/// Rewrites all sh:prefixes in the graph to point to the provided root
pub fn rewrite_sh_prefixes(graph: &mut Dataset, root: SubjectRef) {
Expand Down Expand Up @@ -68,13 +68,24 @@ pub fn rewrite_sh_prefixes_graph(graph: &mut Graph, root: SubjectRef) {

/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
/// themselves
pub fn remove_owl_imports(graph: &mut Dataset) {
// remove owl:imports
let mut to_remove: Vec<Quad> = vec![];
for quad in graph.quads_for_predicate(IMPORTS) {
to_remove.push(quad.into());
}
/// themselves. If ontologies_to_remove is provided, only remove owl:imports to those ontologies
pub fn remove_owl_imports(graph: &mut Dataset, ontologies_to_remove: Option<&[NamedNodeRef]>) {
let to_remove: Vec<Quad> = graph.quads_for_predicate(IMPORTS)
.filter_map(|quad| {
match quad.object {
TermRef::NamedNode(obj) => {
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
Some(quad.into())
} else {
None
}
}
_ => None
}
})
.collect();

// Remove the collected quads
for quad in to_remove {
graph.remove(quad.as_ref());
}
Expand All @@ -83,14 +94,25 @@ pub fn remove_owl_imports(graph: &mut Dataset) {
/// Remove owl:imports statements from a graph. Can be helpful to do after computing the union of
/// all imports so that downstream tools do not attempt to fetch these graph dependencies
/// themselves
pub fn remove_owl_imports_graph(graph: &mut Graph) {
// remove owl:imports
let mut to_remove: Vec<Triple> = vec![];
for triple in graph.triples_for_predicate(IMPORTS) {
to_remove.push(triple.into());
}
for triple in to_remove {
graph.remove(triple.as_ref());
pub fn remove_owl_imports_graph(graph: &mut Graph, ontologies_to_remove: Option<&[NamedNodeRef]>) {
let to_remove: Vec<Triple> = graph.triples_for_predicate(IMPORTS)
.filter_map(|triple| {
match triple.object {
TermRef::NamedNode(obj) => {
if ontologies_to_remove.map_or(true, |ontologies| ontologies.contains(&obj)) {
Some(triple.into())
} else {
None
}
}
_ => None
}
})
.collect();

// Remove the collected quads
for quad in to_remove {
graph.remove(quad.as_ref());
}
}

Expand Down
72 changes: 38 additions & 34 deletions lib/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Result;

use std::io::Read;
use std::io::{Read, Seek};
use std::path::Path;

use reqwest::header::CONTENT_TYPE;
Expand Down Expand Up @@ -58,6 +58,42 @@ pub fn read_file(file: &Path) -> Result<OxigraphGraph> {
Ok(graph)
}

fn read_format<T: Read + Seek>(mut original_content: BufReader<T>, format: Option<RdfFormat>) -> Result<OxigraphGraph> {
let format = format.unwrap_or(RdfFormat::Turtle);
for format in [
format,
RdfFormat::Turtle,
RdfFormat::RdfXml,
RdfFormat::NTriples,
] {
let content = original_content.get_mut();
content.rewind()?;
let parser = RdfParser::from_format(format);
let mut graph = OxigraphGraph::new();
let parser = parser.for_reader(content);

// Process each quad from the parser
for quad in parser {
match quad {
Ok(q) => {
let triple = Triple::new(q.subject, q.predicate, q.object);
graph.insert(&triple);
}
Err(_) => {
// Break the outer loop if an error occurs
break;
}
}
}

// If we successfully processed quads and did not encounter an error
if !graph.is_empty() {
return Ok(graph);
}
}
Err(anyhow::anyhow!("Failed to parse graph"))
}

pub fn read_url(file: &str) -> Result<OxigraphGraph> {
debug!("Reading url: {}", file);

Expand All @@ -83,39 +119,7 @@ pub fn read_url(file: &str) -> Result<OxigraphGraph> {
});

let content: BufReader<_> = BufReader::new(std::io::Cursor::new(resp.bytes()?));

// if content type is known, use it to parse the graph
if let Some(format) = content_type {
let parser = RdfParser::from_format(format);
let mut graph = OxigraphGraph::new();
let parser = parser.for_reader(content);
for quad in parser {
let quad = quad?;
let triple = Triple::new(quad.subject, quad.predicate, quad.object);
graph.insert(&triple);
}
return Ok(graph);
}

// if content type is unknown, try all formats. Requires us to make a copy of the content
// since we can't rewind the reader
let content_vec: Vec<u8> = content.bytes().map(|b| b.unwrap()).collect();

for format in [RdfFormat::Turtle, RdfFormat::RdfXml, RdfFormat::NTriples] {
let vcontent = BufReader::new(std::io::Cursor::new(&content_vec));
let parser = RdfParser::from_format(format);
let mut graph = OxigraphGraph::new();

// TODO: if there's an error on parser.read_triples, try the next format
let parser = parser.for_reader(vcontent);
for quad in parser {
let quad = quad?;
let triple = Triple::new(quad.subject, quad.predicate, quad.object);
graph.insert(&triple);
}
return Ok(graph);
}
Err(anyhow::anyhow!("Failed to parse graph from {}", file))
read_format(content, content_type)
}

// return a "impl IntoIterator<Item = impl Into<Quad>>" for a graph. Iter through
Expand Down
Loading

0 comments on commit 5561be4

Please sign in to comment.