Skip to content

Commit

Permalink
Cleaning up Dataset class, adding graph tracking to store API, as
Browse files Browse the repository at this point in the history
discussed in #307

Summary of changes:

 * added methods ```add_graph``` and ```remove_graph``` to the Store
   API, implemented these for Sleepycat and IOMemory. A flag,
   ```graph_awareness``` is set on the store if they methods are
   supported, default implementations will raise an exception.

 * made the dataset require a store with the ```graph_awareness```
   flag set.

 * removed the graph-state kept in the ```Dataset``` class directly.

 * removed ```dataset.add_quads```, ```remove_quads``` methods. The
   ```add/remove``` methods of ```ConjunctiveGraph``` are smart enough
   to work with triples or quads.

 * removed the ```dataset.graphs``` method - it now does exactly the
   same as ```contexts```

 * cleaned up a bit more confusion of whether Graph instance or the
   Graph identifiers are passed to store methods. (#225)
  • Loading branch information
gromgull committed Jun 26, 2013
1 parent abf07ed commit faae555
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 134 deletions.
139 changes: 39 additions & 100 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,8 @@ def _spoc(self, triple_or_quad, default=False):
helper method for having methods that support
either triples or quads
"""
if triple_or_quad is None:
return (None, None, None, self.default_context if default else None)
if len(triple_or_quad) == 3:
c = self.default_context if default else None
(s, p, o) = triple_or_quad
Expand Down Expand Up @@ -1329,13 +1331,12 @@ def triples(self, triple_or_quad, context=None):
for (s, p, o), cg in self.store.triples((s, p, o), context=context):
yield s, p, o

def quads(self, pattern=None):
def quads(self, triple_or_quad=None):
"""Iterate over all the quads in the entire conjunctive graph"""
if pattern is None:
s, p, o = (None, None, None)
else:
s, p, o = pattern
for (s, p, o), cg in self.store.triples((s, p, o), context=None):

s,p,o,c = self._spoc(triple_or_quad)

for (s, p, o), cg in self.store.triples((s, p, o), context=c):
for ctx in cg:
yield s, p, o, ctx

Expand All @@ -1356,11 +1357,11 @@ def contexts(self, triple=None):
"""
for context in self.store.contexts(triple):
if isinstance(context, Graph):
# TODO: One of these should never happen and probably
# should raise an exception rather than smoothing over
# the weirdness - see #225
yield context
else:
# TODO: This should never happen and probably should
# raise an exception rather than smoothing over the
# weirdness - see #225
yield self.get_context(context)

def get_context(self, identifier, quoted=False):
Expand Down Expand Up @@ -1415,7 +1416,7 @@ def __reduce__(self):




DATASET_DEFAULT_GRAPH_ID = URIRef('urn:x-rdflib:default')

class Dataset(ConjunctiveGraph):
__doc__ = format_doctest_out("""
Expand All @@ -1438,8 +1439,6 @@ class Dataset(ConjunctiveGraph):
>>> # Create a graph in the dataset, if the graph name has already been
>>> # used, the corresponding graph will be returned
>>> # (ie, the Dataset keeps track of the constituent graphs)
>>> # The special argument Dataset.DEFAULT can be used to return the
>>> # default graph
>>> g = ds.graph(URIRef('http://www.example.com/gr'))
>>>
>>> # add triples to the new graph as usual
Expand All @@ -1448,7 +1447,7 @@ class Dataset(ConjunctiveGraph):
... URIRef('http://example.org/y'),
... Literal('bar')) )
>>> # alternatively: add a quad to the dataset -> goes to the graph
>>> ds.add_quad(
>>> ds.add(
... (URIRef('http://example.org/x'),
... URIRef('http://example.org/z'),
... Literal('foo-bar'),g) )
Expand Down Expand Up @@ -1522,11 +1521,12 @@ class Dataset(ConjunctiveGraph):
.. versionadded:: 4.0
""")

DEFAULT = "DEFAULT"

def __init__(self, store='default'):
super(Dataset, self).__init__(store=store, identifier=None)
self.graph_names = {Dataset.DEFAULT: self}

if not self.store.graph_aware:
raise Exception("DataSet must be backed by a graph-aware store!")
self.default_context = Graph(store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID)

def __str__(self):
pattern = ("[a rdflib:Dataset;rdflib:storage "
Expand All @@ -1540,102 +1540,41 @@ def graph(self, identifier=None):
"genid", "http://rdflib.net" + rdflib_skolem_genid,
override=False)
identifier = BNode().skolemize()
elif identifier == Dataset.DEFAULT:
return self
else:
if isinstance(identifier, BNode):
raise Exception(
"Blank nodes cannot be Graph identifiers in RDF Datasets")
if not isinstance(identifier, URIRef):
identifier = URIRef(identifier)

if identifier in self.graph_names.keys():
return self.graph_names[identifier]
else:
retval = Graph(store=self.store, identifier=identifier)
self.graph_names[identifier] = retval
return retval

g = self.get_context(identifier)
self.store.add_graph(g)
return g

def remove_graph(self, g):
if g is None or g == Dataset.DEFAULT:
# default graph cannot be removed
return
else:
if isinstance(g, Graph):
try:
del self.graph_names[g.identifier]
self.remove_context(g.identifier)
except KeyError:
pass
else:
try:
del self.graph_names[URIRef(g)]
self.remove_context(g)
except KeyError:
pass
if not isinstance(g, Graph):
g = self.get_context(g)

def graphs(self, empty=True):
if empty:
# All graphs should be returned, including the empty ones:
for n in self.graph_names.keys():
yield n
else:
# Only non-empty graphs should be returned; the contexts() call of
# the conjunctive graph does the job
for c in self.contexts():
if isinstance(c.identifier, BNode):
yield Dataset.DEFAULT
else:
yield c.identifier

def add_quad(self, quad):
(s, p, o, g) = quad
if g is None:
self.add((s, p, o))
else:
if isinstance(g, Graph):
try:
self.graph_names[g.identifier].add((s, p, o))
except KeyError:
pass
else:
try:
self.graph_names[URIRef(g)].add((s, p, o))
except KeyError:
pass

def remove_quad(self, (s, p, o, g)):
if g is None:
self.remove((s, p, o))
else:
if isinstance(g, Graph):
try:
self.graph_names[g.identifier].remove((s, p, o))
except KeyError:
pass
else:
try:
self.graph_names[URIRef(g)].remove((s, p, o))
except KeyError:
pass
self.store.remove_graph(g)
if g is None or g == self.default_context:
# default graph cannot be removed
# only triples deleted, so add it back in
self.store.add_graph(self.default_context)

def contexts(self, triple=None):
default = False
for c in super(Dataset, self).contexts(triple):
default|=c.identifier == DATASET_DEFAULT_GRAPH_ID
yield c
if not default: yield self.graph(DATASET_DEFAULT_GRAPH_ID)


def quads(self, quad):
(s, p, o, g) = quad
for s, p, o, c in super(Dataset, self).quads((s, p, o)):
if g is None:
# all quads have to be returned. However, the blank node name
# for the default graph should be removed
if isinstance(c.identifier, BNode):
yield (s, p, o, None)
else:
yield (s, p, o, c.identifier)
elif isinstance(g, Graph):
# only quads of a specific graph should be returned:
if g.identifier == c.identifier:
yield (s, p, o, c.identifier)
for s, p, o, c in super(Dataset, self).quads(quad):
if c.identifier==self.default_context:
yield (s, p, o, None)
else:
if ("%s" % g) == ("%s" % c.identifier):
yield (s, p, o, c.identifier)
yield (s, p, o, c.identifier)


class QuotedGraph(Graph):
Expand Down
38 changes: 28 additions & 10 deletions rdflib/plugins/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ class IOMemory(Store):
"""
context_aware = True
formula_aware = True
graph_aware = True

# The following variable name conventions are used in this class:
#
Expand Down Expand Up @@ -242,7 +243,7 @@ def namespaces(self):
def add(self, triple, context, quoted=False):
Store.add(self, triple, context, quoted)

if context is not None and context not in self.__all_contexts:
if context is not None:
self.__all_contexts.add(context)

enctriple = self.__encodeTriple(triple)
Expand Down Expand Up @@ -285,7 +286,9 @@ def remove(self, triplepat, context=None):

del self.__tripleContexts[enctriple]

if triplepat == (None, None, None) and context in self.__all_contexts:
if triplepat == (None, None, None) and \
context in self.__all_contexts and \
not self.graph_aware:
# remove the whole context
self.__all_contexts.remove(context)

Expand Down Expand Up @@ -340,7 +343,7 @@ def triples(self, triplein, context=None):
if self.__tripleHasContext(enctriple, cid))

def contexts(self, triple=None):
if triple is None:
if triple is None or triple is (None,None,None):
return (context for context in self.__all_contexts)

enctriple = self.__encodeTriple(triple)
Expand All @@ -354,6 +357,20 @@ def __len__(self, context=None):
cid = self.__obj2id(context)
return sum(1 for enctriple, contexts in self.__all_triples(cid))

def add_graph(self, graph):
if not self.graph_aware:
Store.add_graph(self, graph)
else:
self.__all_contexts.add(graph)

def remove_graph(self, graph):
if not self.graph_aware:
Store.remove_graph(self, graph)
else:
self.remove((None,None,None), graph)
self.__all_contexts.remove(graph)


# internal utility methods below

def __addTripleContext(self, enctriple, context, quoted):
Expand Down Expand Up @@ -414,8 +431,8 @@ def __removeTripleContext(self, enctriple, cid):
self.__tripleContexts[enctriple] = ctxs

def __obj2id(self, obj):
"""encode object, storing it in the encoding map if necessary, and
return the integer key"""
"""encode object, storing it in the encoding map if necessary,
and return the integer key"""
if obj not in self.__obj2int:
id = randid()
while id in self.__int2obj:
Expand All @@ -430,20 +447,21 @@ def __encodeTriple(self, triple):
return tuple(map(self.__obj2id, triple))

def __decodeTriple(self, enctriple):
"""decode a whole encoded triple, returning the original triple"""
"""decode a whole encoded triple, returning the original
triple"""
return tuple(map(self.__int2obj.get, enctriple))

def __all_triples(self, cid):
"""return a generator which yields all the triples (unencoded) of
the given context"""
"""return a generator which yields all the triples (unencoded)
of the given context"""
for tset in self.__subjectIndex.values():
for enctriple in tset.copy():
if self.__tripleHasContext(enctriple, cid):
yield self.__decodeTriple(enctriple), self.__contexts(enctriple)

def __contexts(self, enctriple):
"""return a generator for all the non-quoted contexts (unencoded)
the encoded triple appears in"""
"""return a generator for all the non-quoted contexts
(unencoded) the encoded triple appears in"""
return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None)

def __emptygen(self):
Expand Down
7 changes: 7 additions & 0 deletions rdflib/plugins/sleepycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class Sleepycat(Store):
context_aware = True
formula_aware = True
transaction_aware = False
graph_aware = True
db_env = None

def __init__(self, configuration=None, identifier=None):
Expand Down Expand Up @@ -495,6 +496,12 @@ def contexts(self, triple=None):
current = None
cursor.close()

def add_graph(self, graph):
self.__contexts.put(bb(self._to_string(graph)), "")

def remove_graph(self, graph):
self.remove((None, None, None), graph)

def _from_string(self, i):
k = self.__i2k.get(int(i))
return self._loads(k)
Expand Down
1 change: 0 additions & 1 deletion rdflib/plugins/stores/sparqlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ class SPARQLStore(NSSPARQLWrapper, Store):
formula_aware = False
transaction_aware = False
regex_matching = NATIVE_REGEX
batch_unification = False

def __init__(self,
endpoint=None, bNodeAsURI=False,
Expand Down
Loading

0 comments on commit faae555

Please sign in to comment.