Cleaning up Dataset class, adding graph tracking to store API, as

discussed in #307 Summary of changes: * added methods ```add_graph``` and ```remove_graph``` to the Store API, implemented these for Sleepycat and IOMemory. A flag, ```graph_awareness``` is set on the store if they methods are supported, default implementations will raise an exception. * made the dataset require a store with the ```graph_awareness``` flag set. * removed the graph-state kept in the ```Dataset``` class directly. * removed ```dataset.add_quads```, ```remove_quads``` methods. The ```add/remove``` methods of ```ConjunctiveGraph``` are smart enough to work with triples or quads. * removed the ```dataset.graphs``` method - it now does exactly the same as ```contexts``` * cleaned up a bit more confusion of whether Graph instance or the Graph identifiers are passed to store methods. (#225)
RDFLib · Jun 26, 2013 · faae555 · faae555
1 parent abf07ed
commit faae555
Show file tree

Hide file tree

Showing 9 changed files with 256 additions and 134 deletions.
diff --git a/rdflib/graph.py b/rdflib/graph.py
@@ -1246,6 +1246,8 @@ def _spoc(self, triple_or_quad, default=False):
         helper method for having methods that support 
         either triples or quads
         """
+        if triple_or_quad is None: 
+            return (None, None, None, self.default_context if default else None)
         if len(triple_or_quad) == 3: 
             c = self.default_context if default else None
             (s, p, o) = triple_or_quad
@@ -1329,13 +1331,12 @@ def triples(self, triple_or_quad, context=None):
             for (s, p, o), cg in self.store.triples((s, p, o), context=context):
                 yield s, p, o
 
-    def quads(self, pattern=None):
+    def quads(self, triple_or_quad=None):
         """Iterate over all the quads in the entire conjunctive graph"""
-        if pattern is None:
-            s, p, o = (None, None, None)
-        else:
-            s, p, o = pattern
-        for (s, p, o), cg in self.store.triples((s, p, o), context=None):
+
+        s,p,o,c = self._spoc(triple_or_quad)
+
+        for (s, p, o), cg in self.store.triples((s, p, o), context=c):
             for ctx in cg:
                 yield s, p, o, ctx
 
@@ -1356,11 +1357,11 @@ def contexts(self, triple=None):
         """
         for context in self.store.contexts(triple):
             if isinstance(context, Graph):
+                # TODO: One of these should never happen and probably
+                # should raise an exception rather than smoothing over
+                # the weirdness - see #225
                 yield context
             else:
-                # TODO: This should never happen and probably should
-                # raise an exception rather than smoothing over the 
-                # weirdness - see #225
                 yield self.get_context(context)
 
     def get_context(self, identifier, quoted=False):
@@ -1415,7 +1416,7 @@ def __reduce__(self):
 
 
 
-
+DATASET_DEFAULT_GRAPH_ID = URIRef('urn:x-rdflib:default')
 
 class Dataset(ConjunctiveGraph):
     __doc__ = format_doctest_out("""
@@ -1438,8 +1439,6 @@ class Dataset(ConjunctiveGraph):
     >>> # Create a graph in the dataset, if the graph name has already been
     >>> # used, the corresponding graph will be returned
     >>> # (ie, the Dataset keeps track of the constituent graphs)
-    >>> # The special argument Dataset.DEFAULT can be used to return the
-    >>> # default graph
     >>> g = ds.graph(URIRef('http://www.example.com/gr'))
     >>>
     >>> # add triples to the new graph as usual
@@ -1448,7 +1447,7 @@ class Dataset(ConjunctiveGraph):
     ...     URIRef('http://example.org/y'),
     ...     Literal('bar')) )
     >>> # alternatively: add a quad to the dataset -> goes to the graph
-    >>> ds.add_quad(
+    >>> ds.add(
     ...     (URIRef('http://example.org/x'),
     ...     URIRef('http://example.org/z'),
     ...     Literal('foo-bar'),g) )
@@ -1522,11 +1521,12 @@ class Dataset(ConjunctiveGraph):
     .. versionadded:: 4.0
     """)
 
-    DEFAULT = "DEFAULT"
-
     def __init__(self, store='default'):
         super(Dataset, self).__init__(store=store, identifier=None)
-        self.graph_names = {Dataset.DEFAULT: self}
+
+        if not self.store.graph_aware: 
+            raise Exception("DataSet must be backed by a graph-aware store!")
+        self.default_context = Graph(store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID)
 
     def __str__(self):
         pattern = ("[a rdflib:Dataset;rdflib:storage "
@@ -1540,102 +1540,41 @@ def graph(self, identifier=None):
                 "genid", "http://rdflib.net" + rdflib_skolem_genid,
                 override=False)
             identifier = BNode().skolemize()
-        elif identifier == Dataset.DEFAULT:
-            return self
         else:
             if isinstance(identifier, BNode):
                 raise Exception(
                     "Blank nodes cannot be Graph identifiers in RDF Datasets")
             if not isinstance(identifier, URIRef):
                 identifier = URIRef(identifier)
-
-        if identifier in self.graph_names.keys():
-            return self.graph_names[identifier]
-        else:
-            retval = Graph(store=self.store, identifier=identifier)
-            self.graph_names[identifier] = retval
-            return retval
+
+        g = self.get_context(identifier)
+        self.store.add_graph(g)
+        return g
 
     def remove_graph(self, g):
-        if g is None or g == Dataset.DEFAULT:
-            # default graph cannot be removed
-            return
-        else:
-            if isinstance(g, Graph):
-                try:
-                    del self.graph_names[g.identifier]
-                    self.remove_context(g.identifier)
-                except KeyError:
-                    pass
-            else:
-                try:
-                    del self.graph_names[URIRef(g)]
-                    self.remove_context(g)
-                except KeyError:
-                    pass
+        if not isinstance(g, Graph):
+            g = self.get_context(g)
 
-    def graphs(self, empty=True):
-        if empty:
-            # All graphs should be returned, including the empty ones:
-            for n in self.graph_names.keys():
-                yield n
-        else:
-            # Only non-empty graphs should be returned; the contexts() call of
-            # the conjunctive graph does the job
-            for c in self.contexts():
-                if isinstance(c.identifier, BNode):
-                    yield Dataset.DEFAULT
-                else:
-                    yield c.identifier
-
-    def add_quad(self, quad):
-        (s, p, o, g) = quad
-        if g is None:
-            self.add((s, p, o))
-        else:
-            if isinstance(g, Graph):
-                try:
-                    self.graph_names[g.identifier].add((s, p, o))
-                except KeyError:
-                    pass
-            else:
-                try:
-                    self.graph_names[URIRef(g)].add((s, p, o))
-                except KeyError:
-                    pass
-
-    def remove_quad(self, (s, p, o, g)):
-        if g is None:
-            self.remove((s, p, o))
-        else:
-            if isinstance(g, Graph):
-                try:
-                    self.graph_names[g.identifier].remove((s, p, o))
-                except KeyError:
-                    pass
-            else:
-                try:
-                    self.graph_names[URIRef(g)].remove((s, p, o))
-                except KeyError:
-                    pass
+        self.store.remove_graph(g)
+        if g is None or g == self.default_context:
+            # default graph cannot be removed
+            # only triples deleted, so add it back in
+            self.store.add_graph(self.default_context)
+
+    def contexts(self, triple=None): 
+        default = False
+        for c in super(Dataset, self).contexts(triple): 
+            default|=c.identifier == DATASET_DEFAULT_GRAPH_ID
+            yield c
+        if not default: yield self.graph(DATASET_DEFAULT_GRAPH_ID)
+
 
     def quads(self, quad):
-        (s, p, o, g) = quad
-        for s, p, o, c in super(Dataset, self).quads((s, p, o)):
-            if g is None:
-                # all quads have to be returned. However, the blank node name
-                # for the default graph should be removed
-                if isinstance(c.identifier, BNode):
-                    yield (s, p, o, None)
-                else:
-                    yield (s, p, o, c.identifier)
-            elif isinstance(g, Graph):
-                # only quads of a specific graph should be returned:
-                if g.identifier == c.identifier:
-                    yield (s, p, o, c.identifier)
+        for s, p, o, c in super(Dataset, self).quads(quad):
+            if c.identifier==self.default_context:
+                yield (s, p, o, None)
             else:
-                if ("%s" % g) == ("%s" % c.identifier):
-                    yield (s, p, o, c.identifier)
+                yield (s, p, o, c.identifier)
 
 
 class QuotedGraph(Graph):

diff --git a/rdflib/plugins/memory.py b/rdflib/plugins/memory.py
@@ -191,6 +191,7 @@ class IOMemory(Store):
     """
     context_aware = True
     formula_aware = True
+    graph_aware = True
 
     # The following variable name conventions are used in this class:
     #
@@ -242,7 +243,7 @@ def namespaces(self):
     def add(self, triple, context, quoted=False):
         Store.add(self, triple, context, quoted)
 
-        if context is not None and context not in self.__all_contexts:
+        if context is not None:
             self.__all_contexts.add(context)
 
         enctriple = self.__encodeTriple(triple)
@@ -285,7 +286,9 @@ def remove(self, triplepat, context=None):
 
                 del self.__tripleContexts[enctriple]
 
-        if triplepat == (None, None, None) and context in self.__all_contexts:
+        if triplepat == (None, None, None) and \
+                context in self.__all_contexts and \
+                not self.graph_aware: 
             # remove the whole context
             self.__all_contexts.remove(context)
 
@@ -340,7 +343,7 @@ def triples(self, triplein, context=None):
                 if self.__tripleHasContext(enctriple, cid))
 
     def contexts(self, triple=None):
-        if triple is None:
+        if triple is None or triple is (None,None,None):
             return (context for context in self.__all_contexts)
 
         enctriple = self.__encodeTriple(triple)
@@ -354,6 +357,20 @@ def __len__(self, context=None):
         cid = self.__obj2id(context)
         return sum(1 for enctriple, contexts in self.__all_triples(cid))
 
+    def add_graph(self, graph): 
+        if not self.graph_aware:
+            Store.add_graph(self, graph)
+        else:
+            self.__all_contexts.add(graph)
+
+    def remove_graph(self, graph): 
+        if not self.graph_aware:
+            Store.remove_graph(self, graph)
+        else:
+            self.remove((None,None,None), graph)
+            self.__all_contexts.remove(graph)
+
+
     # internal utility methods below
 
     def __addTripleContext(self, enctriple, context, quoted):
@@ -414,8 +431,8 @@ def __removeTripleContext(self, enctriple, cid):
             self.__tripleContexts[enctriple] = ctxs
 
     def __obj2id(self, obj):
-        """encode object, storing it in the encoding map if necessary, and
-           return the integer key"""
+        """encode object, storing it in the encoding map if necessary,
+           and return the integer key"""
         if obj not in self.__obj2int:
             id = randid()
             while id in self.__int2obj:
@@ -430,20 +447,21 @@ def __encodeTriple(self, triple):
         return tuple(map(self.__obj2id, triple))
 
     def __decodeTriple(self, enctriple):
-        """decode a whole encoded triple, returning the original triple"""
+        """decode a whole encoded triple, returning the original
+        triple"""
         return tuple(map(self.__int2obj.get, enctriple))
 
     def __all_triples(self, cid):
-        """return a generator which yields all the triples (unencoded) of
-           the given context"""
+        """return a generator which yields all the triples (unencoded)
+           of the given context"""
         for tset in self.__subjectIndex.values():
             for enctriple in tset.copy():
                 if self.__tripleHasContext(enctriple, cid):
                     yield self.__decodeTriple(enctriple), self.__contexts(enctriple)
 
     def __contexts(self, enctriple):
-        """return a generator for all the non-quoted contexts (unencoded)
-           the encoded triple appears in"""
+        """return a generator for all the non-quoted contexts
+           (unencoded) the encoded triple appears in"""
         return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None)
 
     def __emptygen(self):

diff --git a/rdflib/plugins/sleepycat.py b/rdflib/plugins/sleepycat.py
@@ -43,6 +43,7 @@ class Sleepycat(Store):
     context_aware = True
     formula_aware = True
     transaction_aware = False
+    graph_aware = True
     db_env = None
 
     def __init__(self, configuration=None, identifier=None):
@@ -495,6 +496,12 @@ def contexts(self, triple=None):
                     current = None
                 cursor.close()
 
+    def add_graph(self, graph): 
+        self.__contexts.put(bb(self._to_string(graph)), "")
+
+    def remove_graph(self, graph): 
+        self.remove((None, None, None), graph)
+
     def _from_string(self, i):
         k = self.__i2k.get(int(i))
         return self._loads(k)

diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
@@ -178,7 +178,6 @@ class SPARQLStore(NSSPARQLWrapper, Store):
     formula_aware = False
     transaction_aware = False
     regex_matching = NATIVE_REGEX
-    batch_unification = False
 
     def __init__(self,
                  endpoint=None, bNodeAsURI=False,