diff --git a/rdflib/graph.py b/rdflib/graph.py index 699625609..089f6885b 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -1246,6 +1246,8 @@ def _spoc(self, triple_or_quad, default=False): helper method for having methods that support either triples or quads """ + if triple_or_quad is None: + return (None, None, None, self.default_context if default else None) if len(triple_or_quad) == 3: c = self.default_context if default else None (s, p, o) = triple_or_quad @@ -1329,13 +1331,12 @@ def triples(self, triple_or_quad, context=None): for (s, p, o), cg in self.store.triples((s, p, o), context=context): yield s, p, o - def quads(self, pattern=None): + def quads(self, triple_or_quad=None): """Iterate over all the quads in the entire conjunctive graph""" - if pattern is None: - s, p, o = (None, None, None) - else: - s, p, o = pattern - for (s, p, o), cg in self.store.triples((s, p, o), context=None): + + s,p,o,c = self._spoc(triple_or_quad) + + for (s, p, o), cg in self.store.triples((s, p, o), context=c): for ctx in cg: yield s, p, o, ctx @@ -1356,11 +1357,11 @@ def contexts(self, triple=None): """ for context in self.store.contexts(triple): if isinstance(context, Graph): + # TODO: One of these should never happen and probably + # should raise an exception rather than smoothing over + # the weirdness - see #225 yield context else: - # TODO: This should never happen and probably should - # raise an exception rather than smoothing over the - # weirdness - see #225 yield self.get_context(context) def get_context(self, identifier, quoted=False): @@ -1415,7 +1416,7 @@ def __reduce__(self): - +DATASET_DEFAULT_GRAPH_ID = URIRef('urn:x-rdflib:default') class Dataset(ConjunctiveGraph): __doc__ = format_doctest_out(""" @@ -1438,8 +1439,6 @@ class Dataset(ConjunctiveGraph): >>> # Create a graph in the dataset, if the graph name has already been >>> # used, the corresponding graph will be returned >>> # (ie, the Dataset keeps track of the constituent graphs) - >>> # The special argument Dataset.DEFAULT can be used to return the - >>> # default graph >>> g = ds.graph(URIRef('http://www.example.com/gr')) >>> >>> # add triples to the new graph as usual @@ -1448,7 +1447,7 @@ class Dataset(ConjunctiveGraph): ... URIRef('http://example.org/y'), ... Literal('bar')) ) >>> # alternatively: add a quad to the dataset -> goes to the graph - >>> ds.add_quad( + >>> ds.add( ... (URIRef('http://example.org/x'), ... URIRef('http://example.org/z'), ... Literal('foo-bar'),g) ) @@ -1522,11 +1521,12 @@ class Dataset(ConjunctiveGraph): .. versionadded:: 4.0 """) - DEFAULT = "DEFAULT" - def __init__(self, store='default'): super(Dataset, self).__init__(store=store, identifier=None) - self.graph_names = {Dataset.DEFAULT: self} + + if not self.store.graph_aware: + raise Exception("DataSet must be backed by a graph-aware store!") + self.default_context = Graph(store=self.store, identifier=DATASET_DEFAULT_GRAPH_ID) def __str__(self): pattern = ("[a rdflib:Dataset;rdflib:storage " @@ -1540,102 +1540,41 @@ def graph(self, identifier=None): "genid", "http://rdflib.net" + rdflib_skolem_genid, override=False) identifier = BNode().skolemize() - elif identifier == Dataset.DEFAULT: - return self else: if isinstance(identifier, BNode): raise Exception( "Blank nodes cannot be Graph identifiers in RDF Datasets") if not isinstance(identifier, URIRef): identifier = URIRef(identifier) - - if identifier in self.graph_names.keys(): - return self.graph_names[identifier] - else: - retval = Graph(store=self.store, identifier=identifier) - self.graph_names[identifier] = retval - return retval + + g = self.get_context(identifier) + self.store.add_graph(g) + return g def remove_graph(self, g): - if g is None or g == Dataset.DEFAULT: - # default graph cannot be removed - return - else: - if isinstance(g, Graph): - try: - del self.graph_names[g.identifier] - self.remove_context(g.identifier) - except KeyError: - pass - else: - try: - del self.graph_names[URIRef(g)] - self.remove_context(g) - except KeyError: - pass + if not isinstance(g, Graph): + g = self.get_context(g) - def graphs(self, empty=True): - if empty: - # All graphs should be returned, including the empty ones: - for n in self.graph_names.keys(): - yield n - else: - # Only non-empty graphs should be returned; the contexts() call of - # the conjunctive graph does the job - for c in self.contexts(): - if isinstance(c.identifier, BNode): - yield Dataset.DEFAULT - else: - yield c.identifier - - def add_quad(self, quad): - (s, p, o, g) = quad - if g is None: - self.add((s, p, o)) - else: - if isinstance(g, Graph): - try: - self.graph_names[g.identifier].add((s, p, o)) - except KeyError: - pass - else: - try: - self.graph_names[URIRef(g)].add((s, p, o)) - except KeyError: - pass - - def remove_quad(self, (s, p, o, g)): - if g is None: - self.remove((s, p, o)) - else: - if isinstance(g, Graph): - try: - self.graph_names[g.identifier].remove((s, p, o)) - except KeyError: - pass - else: - try: - self.graph_names[URIRef(g)].remove((s, p, o)) - except KeyError: - pass + self.store.remove_graph(g) + if g is None or g == self.default_context: + # default graph cannot be removed + # only triples deleted, so add it back in + self.store.add_graph(self.default_context) + + def contexts(self, triple=None): + default = False + for c in super(Dataset, self).contexts(triple): + default|=c.identifier == DATASET_DEFAULT_GRAPH_ID + yield c + if not default: yield self.graph(DATASET_DEFAULT_GRAPH_ID) + def quads(self, quad): - (s, p, o, g) = quad - for s, p, o, c in super(Dataset, self).quads((s, p, o)): - if g is None: - # all quads have to be returned. However, the blank node name - # for the default graph should be removed - if isinstance(c.identifier, BNode): - yield (s, p, o, None) - else: - yield (s, p, o, c.identifier) - elif isinstance(g, Graph): - # only quads of a specific graph should be returned: - if g.identifier == c.identifier: - yield (s, p, o, c.identifier) + for s, p, o, c in super(Dataset, self).quads(quad): + if c.identifier==self.default_context: + yield (s, p, o, None) else: - if ("%s" % g) == ("%s" % c.identifier): - yield (s, p, o, c.identifier) + yield (s, p, o, c.identifier) class QuotedGraph(Graph): diff --git a/rdflib/plugins/memory.py b/rdflib/plugins/memory.py index 307b554ed..18a520b24 100644 --- a/rdflib/plugins/memory.py +++ b/rdflib/plugins/memory.py @@ -191,6 +191,7 @@ class IOMemory(Store): """ context_aware = True formula_aware = True + graph_aware = True # The following variable name conventions are used in this class: # @@ -242,7 +243,7 @@ def namespaces(self): def add(self, triple, context, quoted=False): Store.add(self, triple, context, quoted) - if context is not None and context not in self.__all_contexts: + if context is not None: self.__all_contexts.add(context) enctriple = self.__encodeTriple(triple) @@ -285,7 +286,9 @@ def remove(self, triplepat, context=None): del self.__tripleContexts[enctriple] - if triplepat == (None, None, None) and context in self.__all_contexts: + if triplepat == (None, None, None) and \ + context in self.__all_contexts and \ + not self.graph_aware: # remove the whole context self.__all_contexts.remove(context) @@ -340,7 +343,7 @@ def triples(self, triplein, context=None): if self.__tripleHasContext(enctriple, cid)) def contexts(self, triple=None): - if triple is None: + if triple is None or triple is (None,None,None): return (context for context in self.__all_contexts) enctriple = self.__encodeTriple(triple) @@ -354,6 +357,20 @@ def __len__(self, context=None): cid = self.__obj2id(context) return sum(1 for enctriple, contexts in self.__all_triples(cid)) + def add_graph(self, graph): + if not self.graph_aware: + Store.add_graph(self, graph) + else: + self.__all_contexts.add(graph) + + def remove_graph(self, graph): + if not self.graph_aware: + Store.remove_graph(self, graph) + else: + self.remove((None,None,None), graph) + self.__all_contexts.remove(graph) + + # internal utility methods below def __addTripleContext(self, enctriple, context, quoted): @@ -414,8 +431,8 @@ def __removeTripleContext(self, enctriple, cid): self.__tripleContexts[enctriple] = ctxs def __obj2id(self, obj): - """encode object, storing it in the encoding map if necessary, and - return the integer key""" + """encode object, storing it in the encoding map if necessary, + and return the integer key""" if obj not in self.__obj2int: id = randid() while id in self.__int2obj: @@ -430,20 +447,21 @@ def __encodeTriple(self, triple): return tuple(map(self.__obj2id, triple)) def __decodeTriple(self, enctriple): - """decode a whole encoded triple, returning the original triple""" + """decode a whole encoded triple, returning the original + triple""" return tuple(map(self.__int2obj.get, enctriple)) def __all_triples(self, cid): - """return a generator which yields all the triples (unencoded) of - the given context""" + """return a generator which yields all the triples (unencoded) + of the given context""" for tset in self.__subjectIndex.values(): for enctriple in tset.copy(): if self.__tripleHasContext(enctriple, cid): yield self.__decodeTriple(enctriple), self.__contexts(enctriple) def __contexts(self, enctriple): - """return a generator for all the non-quoted contexts (unencoded) - the encoded triple appears in""" + """return a generator for all the non-quoted contexts + (unencoded) the encoded triple appears in""" return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None) def __emptygen(self): diff --git a/rdflib/plugins/sleepycat.py b/rdflib/plugins/sleepycat.py index 9c5d9e617..1591bf25a 100644 --- a/rdflib/plugins/sleepycat.py +++ b/rdflib/plugins/sleepycat.py @@ -43,6 +43,7 @@ class Sleepycat(Store): context_aware = True formula_aware = True transaction_aware = False + graph_aware = True db_env = None def __init__(self, configuration=None, identifier=None): @@ -495,6 +496,12 @@ def contexts(self, triple=None): current = None cursor.close() + def add_graph(self, graph): + self.__contexts.put(bb(self._to_string(graph)), "") + + def remove_graph(self, graph): + self.remove((None, None, None), graph) + def _from_string(self, i): k = self.__i2k.get(int(i)) return self._loads(k) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 23225e1de..97837f5b2 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -178,7 +178,6 @@ class SPARQLStore(NSSPARQLWrapper, Store): formula_aware = False transaction_aware = False regex_matching = NATIVE_REGEX - batch_unification = False def __init__(self, endpoint=None, bNodeAsURI=False, diff --git a/rdflib/store.py b/rdflib/store.py index 82ef74050..f4f4aa21e 100644 --- a/rdflib/store.py +++ b/rdflib/store.py @@ -20,6 +20,8 @@ ``Transaction-capable``: capable of providing transactional integrity to the RDF operations performed on it. +``Graph-aware``: capable of keeping track of empty graphs. + ------ """ @@ -111,7 +113,7 @@ class Store(object): context_aware = False formula_aware = False transaction_aware = False - batch_unification = False + graph_aware = False def __init__(self, configuration=None, identifier=None): """ @@ -268,9 +270,9 @@ def triples(self, triple_pattern, context=None): for example, REGEXTerm, URIRef, Literal, BNode, Variable, Graph, QuotedGraph, Date? DateRange? - A conjunctive query can be indicated by either providing a value of - None for the context or the identifier associated with the Conjunctive - Graph (if it is context aware). + :param context: A conjunctive query can be indicated by either + providing a value of None, or a specific context can be + queries by passing a Graph instance (if store is context aware). """ subject, predicate, object = triple_pattern @@ -282,12 +284,18 @@ def __len__(self, context=None): quoted (asserted) statements if the context is not specified, otherwise it should return the number of statements in the formula or context given. + + :param context: a graph instance to query or None """ def contexts(self, triple=None): """ Generator over all contexts in the graph. If triple is specified, a generator over all contexts the triple is in. + + if store is graph_aware, may also return empty contexts + + :returns: a generator over Nodes """ def query(self, query, initNs, initBindings, queryGraph, **kwargs): @@ -347,3 +355,22 @@ def commit(self): def rollback(self): """ """ + + # Optional graph methods + + def add_graph(self, graph): + """ + Add a graph to the store, no effect if the graph already + exists. + :param graph: a Graph instance + """ + raise Exception("Graph method called on non-graph_aware store") + + def remove_graph(self, graph): + """ + Remove a graph from the store, this shoud also remove all + triples in the graph + + :param graphid: a Graph instance + """ + raise Exception("Graph method called on non-graph_aware store") diff --git a/test/test_conjunctive_graph.py b/test/test_conjunctive_graph.py index 6014383d6..8bd7d2f8f 100644 --- a/test/test_conjunctive_graph.py +++ b/test/test_conjunctive_graph.py @@ -1,3 +1,7 @@ +""" +Tests for ConjunctiveGraph that do not depend on the underlying store +""" + from rdflib import ConjunctiveGraph, Graph from rdflib.term import Identifier, URIRef, BNode from rdflib.parser import StringInputSource diff --git a/test/test_dataset.py b/test/test_dataset.py new file mode 100644 index 000000000..f769d49e5 --- /dev/null +++ b/test/test_dataset.py @@ -0,0 +1,131 @@ +import sys +import os +import unittest + +from tempfile import mkdtemp, mkstemp +import shutil +from rdflib import Graph, Dataset, URIRef, BNode, plugin +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID + +from nose.exc import SkipTest + + +class DatasetTestCase(unittest.TestCase): + store = 'default' + slow = True + tmppath = None + + def setUp(self): + try: + self.graph = Dataset(store=self.store) + except ImportError: + raise SkipTest( + "Dependencies for store '%s' not available!" % self.store) + if self.store == "SQLite": + _, self.tmppath = mkstemp( + prefix='test', dir='/tmp', suffix='.sqlite') + else: + self.tmppath = mkdtemp() + self.graph.open(self.tmppath, create=True) + self.michel = URIRef(u'michel') + self.tarek = URIRef(u'tarek') + self.bob = URIRef(u'bob') + self.likes = URIRef(u'likes') + self.hates = URIRef(u'hates') + self.pizza = URIRef(u'pizza') + self.cheese = URIRef(u'cheese') + + self.c1 = URIRef(u'context-1') + self.c2 = URIRef(u'context-2') + + # delete the graph for each test! + self.graph.remove((None, None, None)) + + def tearDown(self): + self.graph.close() + if os.path.isdir(self.tmppath): + shutil.rmtree(self.tmppath) + else: + os.remove(self.tmppath) + + + def testGraphAware(self): + if not self.graph.store.graph_aware: return + + g = self.graph + g1 = g.graph(self.c1) + + + # added graph exists + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([self.c1, DATASET_DEFAULT_GRAPH_ID])) + + # added graph is empty + self.assertEquals(len(g1), 0) + + g1.add( (self.tarek, self.likes, self.pizza) ) + + # added graph still exists + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([self.c1, DATASET_DEFAULT_GRAPH_ID])) + + # added graph contains one triple + self.assertEquals(len(g1), 1) + + g1.remove( (self.tarek, self.likes, self.pizza) ) + + # added graph is empty + self.assertEquals(len(g1), 0) + + # graph still exists, although empty + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([self.c1, DATASET_DEFAULT_GRAPH_ID])) + + g.remove_graph(self.c1) + + # graph is gone + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([DATASET_DEFAULT_GRAPH_ID])) + + def testDefaultGraph(self): + + self.graph.add(( self.tarek, self.likes, self.pizza)) + self.assertEquals(len(self.graph), 1) + # only default exists + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([DATASET_DEFAULT_GRAPH_ID])) + + # removing default graph removes triples but not actual graph + self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) + + self.assertEquals(len(self.graph), 0) + # default still exists + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([DATASET_DEFAULT_GRAPH_ID])) + + + + + +# dynamically create classes for each registered Store + +pluginname = None +if __name__ == '__main__': + if len(sys.argv) > 1: + pluginname = sys.argv[1] + +tests = 0 +for s in plugin.plugins(pluginname, plugin.Store): + if s.name in ('default', 'IOMemory', 'Auditable', + 'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'): + continue # these are tested by default + if not s.getClass().graph_aware: + continue + + locals()["t%d" % tests] = type("%sContextTestCase" % s.name, ( + DatasetTestCase,), {"store": s.name}) + tests += 1 + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_graph.py b/test/test_graph.py index d35c0a090..46aaf9fd3 100644 --- a/test/test_graph.py +++ b/test/test_graph.py @@ -177,8 +177,8 @@ def testConnected(self): self.assertEquals(False, graph.connected()) def testSub(self): - g1 = Graph() - g2 = Graph() + g1 = self.graph + g2 = Graph(store=g2.store) tarek = self.tarek # michel = self.michel @@ -210,8 +210,8 @@ def testSub(self): self.assertEquals((bob, likes, cheese) in g1, False) def testGraphAdd(self): - g1 = Graph() - g2 = Graph() + g1 = self.graph + g2 = Graph(store=g1.store) tarek = self.tarek # michel = self.michel @@ -242,8 +242,8 @@ def testGraphAdd(self): self.assertEquals((bob, likes, cheese) in g1, True) def testGraphIntersection(self): - g1 = Graph() - g2 = Graph() + g1 = self.graph + g2 = Graph(store=g1.store) tarek = self.tarek michel = self.michel @@ -279,7 +279,7 @@ def testGraphIntersection(self): self.assertEquals((bob, likes, cheese) in g1, False) self.assertEquals((michel, likes, cheese) in g1, True) - + # dynamically create classes for each registered Store diff --git a/test/test_graph_context.py b/test/test_graph_context.py index 5f3673c4f..fce1626d6 100644 --- a/test/test_graph_context.py +++ b/test/test_graph_context.py @@ -47,12 +47,6 @@ def tearDown(self): else: os.remove(self.tmppath) - def get_context(self, identifier): - assert isinstance(identifier, URIRef) or \ - isinstance(identifier, BNode), type(identifier) - return Graph(store=self.graph.store, identifier=identifier, - namespace_manager=self) - def addStuff(self): tarek = self.tarek michel = self.michel @@ -126,15 +120,15 @@ def testLenInOneContext(self): c1 = self.c1 # make sure context is empty - self.graph.remove_context(self.get_context(c1)) + self.graph.remove_context(self.graph.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) - self.assertEquals(len(self.get_context(c1)), oldLen + 10) - self.graph.remove_context(self.get_context(c1)) + self.assertEquals(len(self.graph.get_context(c1)), oldLen + 10) + self.graph.remove_context(self.graph.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) @@ -194,9 +188,9 @@ def testRemoveContext(self): self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) - self.assertEquals(len(self.get_context(c1)), 1) + self.assertEquals(len(self.graph.get_context(c1)), 1) - self.graph.remove_context(self.get_context(c1)) + self.graph.remove_context(self.graph.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): @@ -292,7 +286,7 @@ def testTriples(self): # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) - for c in [graph, self.get_context(c1)]: + for c in [graph, self.graph.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) @@ -343,6 +337,9 @@ def testTriples(self): asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0) + + + # dynamically create classes for each registered Store pluginname = None