Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset cleanup - Store API for graph method #309

Merged
merged 5 commits into from
Aug 11, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
339 changes: 163 additions & 176 deletions rdflib/graph.py

Large diffs are not rendered by default.

42 changes: 32 additions & 10 deletions rdflib/plugins/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ class IOMemory(Store):
"""
context_aware = True
formula_aware = True
graph_aware = True

# The following variable name conventions are used in this class:
#
Expand Down Expand Up @@ -242,7 +243,7 @@ def namespaces(self):
def add(self, triple, context, quoted=False):
Store.add(self, triple, context, quoted)

if context is not None and context not in self.__all_contexts:
if context is not None:
self.__all_contexts.add(context)

enctriple = self.__encodeTriple(triple)
Expand Down Expand Up @@ -285,7 +286,9 @@ def remove(self, triplepat, context=None):

del self.__tripleContexts[enctriple]

if triplepat == (None, None, None) and context in self.__all_contexts:
if triplepat == (None, None, None) and \
context in self.__all_contexts and \
not self.graph_aware:
# remove the whole context
self.__all_contexts.remove(context)

Expand Down Expand Up @@ -340,7 +343,7 @@ def triples(self, triplein, context=None):
if self.__tripleHasContext(enctriple, cid))

def contexts(self, triple=None):
if triple is None:
if triple is None or triple is (None,None,None):
return (context for context in self.__all_contexts)

enctriple = self.__encodeTriple(triple)
Expand All @@ -354,6 +357,24 @@ def __len__(self, context=None):
cid = self.__obj2id(context)
return sum(1 for enctriple, contexts in self.__all_triples(cid))

def add_graph(self, graph):
if not self.graph_aware:
Store.add_graph(self, graph)
else:
self.__all_contexts.add(graph)

def remove_graph(self, graph):
if not self.graph_aware:
Store.remove_graph(self, graph)
else:
self.remove((None,None,None), graph)
try:
self.__all_contexts.remove(graph)
except KeyError:
pass # we didn't know this graph, no problem



# internal utility methods below

def __addTripleContext(self, enctriple, context, quoted):
Expand Down Expand Up @@ -414,8 +435,8 @@ def __removeTripleContext(self, enctriple, cid):
self.__tripleContexts[enctriple] = ctxs

def __obj2id(self, obj):
"""encode object, storing it in the encoding map if necessary, and
return the integer key"""
"""encode object, storing it in the encoding map if necessary,
and return the integer key"""
if obj not in self.__obj2int:
id = randid()
while id in self.__int2obj:
Expand All @@ -430,20 +451,21 @@ def __encodeTriple(self, triple):
return tuple(map(self.__obj2id, triple))

def __decodeTriple(self, enctriple):
"""decode a whole encoded triple, returning the original triple"""
"""decode a whole encoded triple, returning the original
triple"""
return tuple(map(self.__int2obj.get, enctriple))

def __all_triples(self, cid):
"""return a generator which yields all the triples (unencoded) of
the given context"""
"""return a generator which yields all the triples (unencoded)
of the given context"""
for tset in self.__subjectIndex.values():
for enctriple in tset.copy():
if self.__tripleHasContext(enctriple, cid):
yield self.__decodeTriple(enctriple), self.__contexts(enctriple)

def __contexts(self, enctriple):
"""return a generator for all the non-quoted contexts (unencoded)
the encoded triple appears in"""
"""return a generator for all the non-quoted contexts
(unencoded) the encoded triple appears in"""
return (self.__int2obj.get(cid) for cid in self.__getTripleContexts(enctriple, skipQuoted=True) if cid is not None)

def __emptygen(self):
Expand Down
7 changes: 7 additions & 0 deletions rdflib/plugins/sleepycat.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class Sleepycat(Store):
context_aware = True
formula_aware = True
transaction_aware = False
graph_aware = True
db_env = None

def __init__(self, configuration=None, identifier=None):
Expand Down Expand Up @@ -495,6 +496,12 @@ def contexts(self, triple=None):
current = None
cursor.close()

def add_graph(self, graph):
self.__contexts.put(bb(self._to_string(graph)), "")

def remove_graph(self, graph):
self.remove((None, None, None), graph)

def _from_string(self, i):
k = self.__i2k.get(int(i))
return self._loads(k)
Expand Down
18 changes: 15 additions & 3 deletions rdflib/plugins/sparql/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ def evalClear(ctx, u):
for g in _graphAll(ctx, u.graphiri):
g.remove((None, None, None))

def evalDrop(ctx, u):
"""
http://www.w3.org/TR/sparql11-update/#drop
"""
if ctx.dataset.store.graph_aware:
for g in _graphAll(ctx, u.graphiri):
ctx.dataset.store.remove_graph(g)
else:
evalClear(ctx, u)


def evalInsertData(ctx, u):
"""
Expand Down Expand Up @@ -214,7 +224,10 @@ def evalMove(ctx, u):

dstg += srcg

srcg.remove((None, None, None))
if ctx.dataset.store.graph_aware:
ctx.dataset.store.remove_graph(srcg)
else:
srcg.remove((None, None, None))


def evalCopy(ctx, u):
Expand Down Expand Up @@ -277,8 +290,7 @@ def evalUpdate(graph, update, initBindings=None):
elif u.name == 'Clear':
evalClear(ctx, u)
elif u.name == 'Drop':
# rdflib does not record empty graphs, so clear == drop
evalClear(ctx, u)
evalDrop(ctx, u)
elif u.name == 'Create':
evalCreate(ctx, u)
elif u.name == 'Add':
Expand Down
1 change: 0 additions & 1 deletion rdflib/plugins/stores/sparqlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ class SPARQLStore(NSSPARQLWrapper, Store):
formula_aware = False
transaction_aware = False
regex_matching = NATIVE_REGEX
batch_unification = False

def __init__(self,
endpoint=None, bNodeAsURI=False,
Expand Down
35 changes: 31 additions & 4 deletions rdflib/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
``Transaction-capable``: capable of providing transactional integrity to the
RDF operations performed on it.

``Graph-aware``: capable of keeping track of empty graphs.

------
"""

Expand Down Expand Up @@ -111,7 +113,7 @@ class Store(object):
context_aware = False
formula_aware = False
transaction_aware = False
batch_unification = False
graph_aware = False

def __init__(self, configuration=None, identifier=None):
"""
Expand Down Expand Up @@ -268,9 +270,9 @@ def triples(self, triple_pattern, context=None):
for example, REGEXTerm, URIRef, Literal, BNode, Variable, Graph,
QuotedGraph, Date? DateRange?

A conjunctive query can be indicated by either providing a value of
None for the context or the identifier associated with the Conjunctive
Graph (if it is context aware).
:param context: A conjunctive query can be indicated by either
providing a value of None, or a specific context can be
queries by passing a Graph instance (if store is context aware).
"""
subject, predicate, object = triple_pattern

Expand All @@ -282,12 +284,18 @@ def __len__(self, context=None):
quoted (asserted) statements if the context is not specified,
otherwise it should return the number of statements in the formula or
context given.

:param context: a graph instance to query or None
"""

def contexts(self, triple=None):
"""
Generator over all contexts in the graph. If triple is specified,
a generator over all contexts the triple is in.

if store is graph_aware, may also return empty contexts

:returns: a generator over Nodes
"""

def query(self, query, initNs, initBindings, queryGraph, **kwargs):
Expand Down Expand Up @@ -347,3 +355,22 @@ def commit(self):

def rollback(self):
""" """

# Optional graph methods

def add_graph(self, graph):
"""
Add a graph to the store, no effect if the graph already
exists.
:param graph: a Graph instance
"""
raise Exception("Graph method called on non-graph_aware store")

def remove_graph(self, graph):
"""
Remove a graph from the store, this shoud also remove all
triples in the graph

:param graphid: a Graph instance
"""
raise Exception("Graph method called on non-graph_aware store")
4 changes: 4 additions & 0 deletions test/test_conjunctive_graph.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Tests for ConjunctiveGraph that do not depend on the underlying store
"""

from rdflib import ConjunctiveGraph, Graph
from rdflib.term import Identifier, URIRef, BNode
from rdflib.parser import StringInputSource
Expand Down
136 changes: 136 additions & 0 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import sys
import os
import unittest

from tempfile import mkdtemp, mkstemp
import shutil
from rdflib import Graph, Dataset, URIRef, BNode, plugin
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID

from nose.exc import SkipTest


class DatasetTestCase(unittest.TestCase):
store = 'default'
slow = True
tmppath = None

def setUp(self):
try:
self.graph = Dataset(store=self.store)
except ImportError:
raise SkipTest(
"Dependencies for store '%s' not available!" % self.store)
if self.store == "SQLite":
_, self.tmppath = mkstemp(
prefix='test', dir='/tmp', suffix='.sqlite')
else:
self.tmppath = mkdtemp()
self.graph.open(self.tmppath, create=True)
self.michel = URIRef(u'michel')
self.tarek = URIRef(u'tarek')
self.bob = URIRef(u'bob')
self.likes = URIRef(u'likes')
self.hates = URIRef(u'hates')
self.pizza = URIRef(u'pizza')
self.cheese = URIRef(u'cheese')

self.c1 = URIRef(u'context-1')
self.c2 = URIRef(u'context-2')

# delete the graph for each test!
self.graph.remove((None, None, None))

def tearDown(self):
self.graph.close()
if os.path.isdir(self.tmppath):
shutil.rmtree(self.tmppath)
else:
os.remove(self.tmppath)


def testGraphAware(self):
if not self.graph.store.graph_aware: return

g = self.graph
g1 = g.graph(self.c1)


# added graph exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph is empty
self.assertEquals(len(g1), 0)

g1.add( (self.tarek, self.likes, self.pizza) )

# added graph still exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph contains one triple
self.assertEquals(len(g1), 1)

g1.remove( (self.tarek, self.likes, self.pizza) )

# added graph is empty
self.assertEquals(len(g1), 0)

# graph still exists, although empty
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

g.remove_graph(self.c1)

# graph is gone
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testDefaultGraph(self):

self.graph.add(( self.tarek, self.likes, self.pizza))
self.assertEquals(len(self.graph), 1)
# only default exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

# removing default graph removes triples but not actual graph
self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

self.assertEquals(len(self.graph), 0)
# default still exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testNotUnion(self):
g1 = self.graph.graph(self.c1)
g1.add((self.tarek, self.likes, self.pizza))

self.assertEqual(list(self.graph.objects(self.tarek, None)),
[])
self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])


# dynamically create classes for each registered Store

pluginname = None
if __name__ == '__main__':
if len(sys.argv) > 1:
pluginname = sys.argv[1]

tests = 0
for s in plugin.plugins(pluginname, plugin.Store):
if s.name in ('default', 'IOMemory', 'Auditable',
'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'):
continue # these are tested by default
if not s.getClass().graph_aware:
continue

locals()["t%d" % tests] = type("%sContextTestCase" % s.name, (
DatasetTestCase,), {"store": s.name})
tests += 1


if __name__ == '__main__':
unittest.main()
Loading