From f8bc1ca6b67505fc71cb27a7f6bc20eb1b6b024e Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Wed, 28 Sep 2022 16:57:48 -0400 Subject: [PATCH 001/123] start on dbio.wsgi; create/test broker class --- python/nistoar/midas/dbio/__init__.py | 10 +- python/nistoar/midas/dbio/base.py | 58 +- python/nistoar/midas/dbio/wsgi/base.py | 740 ++++++++++++++++++ python/nistoar/midas/dbio/wsgi/broker.py | 347 ++++++++ .../tests/nistoar/midas/dbio/test_client.py | 3 + .../tests/nistoar/midas/dbio/wsgi/__init__.py | 0 .../nistoar/midas/dbio/wsgi/test_broker.py | 198 +++++ 7 files changed, 1347 insertions(+), 9 deletions(-) create mode 100644 python/nistoar/midas/dbio/wsgi/base.py create mode 100644 python/nistoar/midas/dbio/wsgi/broker.py create mode 100644 python/tests/nistoar/midas/dbio/wsgi/__init__.py create mode 100644 python/tests/nistoar/midas/dbio/wsgi/test_broker.py diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index 75fc0b2..e2c26f2 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -1,11 +1,11 @@ """ dbio: a module for accessing information from a common database. -In the MIDAS framework, a common database model can be used for storing different types of records -that can be created by users. There are two key types supported currently: *DMPs* (Data Management -Plans) and *draft EDIs* (Enterprise Data Inventory records). This module provides an interface to -access and update those records through the life cycle of the records. This includes managing -authorization to access or update the records. +In the MIDAS framework, a common database model can be used for storing different types of _project +records_ that can be created by users. There are two key types supported currently: *DMPs* (Data +Management Plans) and *draft EDIs* (Enterprise Data Inventory records). This module provides an +interface to access and update those records through the life cycle of the records. This includes +managing authorization to access or update the records. ---------------------- Typical Use diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index c1154f3..a620c74 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -31,8 +31,8 @@ PUBLIC_GROUP = DEF_GROUPS_SHOULDER + ":public" # all users are implicitly part of this group ANONYMOUS = PUBLIC_GROUP -__all__ = ["DBClient", "DBClientFactory", "DBGroups", "Group", "ACLs", "PUBLIC_GROUP", "ANONYMOUS", - "DRAFT_PROJECTS", "DMP_PROJECTS"] +__all__ = ["DBClient", "DBClientFactory", "ProjectRecord", "DBGroups", "Group", "ACLs", "PUBLIC_GROUP", + "ANONYMOUS", "DRAFT_PROJECTS", "DMP_PROJECTS"] Permissions = Union[str, Sequence[str], AbstractSet[str]] @@ -91,6 +91,20 @@ def grant_perm_to(self, perm_name, *ids): if id not in self._perms[perm_name]: self._perms[perm_name].append(id) + def revoke_perm_from_all(self, perm_name): + """ + remove the given identities from the list having the given permission. For each given identity + that does not currently have the permission, nothing is done. + :param str perm_name: the permission to be revoked + :param str ids: the identities of the users the permission should be revoked from + :raise NotAuthorized: if the user attached to the underlying :py:class:`DBClient` is not + authorized to grant this permission + """ + if not self._rec.authorized(self.ADMIN): + raise NotAuthorized(self._rec._cli.user_id, "revoke permission") + if perm_name in self._perms: + self._perms[perm_name] = [] + def revoke_perm_from(self, perm_name, *ids): """ remove the given identities from the list having the given permission. For each given identity @@ -253,6 +267,7 @@ def validate(self, errs=None, data=None) -> List[str]: return errs def to_dict(self): + self._data['acls'] = self.acls._data return deepcopy(self._data) class Group(ProtectedRecord): @@ -564,6 +579,13 @@ def name(self) -> str: """ return self._data.get('name', "") + @name.setter + def name(self, val): + """ + assign the given name as the record's mnumonic name + """ + self._data['name'] = val + @property def created(self) -> float: """ @@ -587,6 +609,10 @@ def data(self) -> MutableMapping: """ return self._data['data'] + @data.setter + def data(self, data: Mapping): + self._data['data'] = deepcopy(data) + @property def meta(self) -> MutableMapping: """ @@ -596,6 +622,10 @@ def meta(self) -> MutableMapping: """ return self._data['meta'] + @meta.setter + def meta(self, data: Mapping): + self._data['meta'] = deepcopy(data) + def __str__(self): return "<{} ProjectRecord: {} ({}) owner={}>".format(self._coll.rstrip("s"), self.id, self.name, self.owner) @@ -768,7 +798,7 @@ def get_record_for(self, id: str, perm: str=ACLs.READ) -> ProjectRecord: """ out = self._get_from_coll(self._projcoll, id) if not out: - return None + raise ObjectNotFound(id) out = ProjectRecord(self._projcoll, out, self) if not out.authorized(perm): raise NotAuthorized(self.user_id, perm) @@ -843,7 +873,7 @@ def _get_from_coll(self, collname, id) -> MutableMapping: @abstractmethod def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: """ - return an iterator to the records from a specified collectino that match the set of + return an iterator to the records from a specified collection that match the set of given constraints. :param str collname: the logical name of the database collection (e.g. table, etc.) to pull @@ -944,4 +974,24 @@ class AlreadyExists(DBIOException): """ pass +class ObjectNotFound(DBIOException): + """ + an exception indicating that the requested record, or a requested part of a record, does not exist. + """ + def __init__(self, recid, part=None, message=None): + """ + initialize this exception + :param str recid: the id of the record that was existed + :param str part: the part of the record that was requested. Do not provide this parameter if + the entire record does not exist. + """ + self.record_id = recid + self.record_part = part + + if not message: + if part: + message = "Requested portion of record (id=%s) does not exist: %s" % (recid, part) + else: + message = "Requested record with id=%s does not exist" % recid + super(ObjectNotFound, self).__init__(message) diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py new file mode 100644 index 0000000..348b692 --- /dev/null +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -0,0 +1,740 @@ +""" +A web service interface to various MIDAS project records. + +A _project record_ is a persistable record that is compliant with the MIDAS Common Database project +data model, where examples of "project record" types include DMP records and data publication drafts. +The :py:class:`MIDASProjectApp` encapsulates the handling of requests to create and manipulate project +records. If desired, this class can be specialized for a particular project type, and the easiest way +to do that is by sub-classing the :py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` and +passing that class to the :py:class:`MIDASProjectApp` constructor. This is because the +:py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` class isolates the business logic for +retrieving and manipulating project records. +""" +from logging import Logger +from collections import OrderedDict +from collections.abc import Mapping, Sequence + +from nistoar.pdr.publish SubApp, Handler # use same sevice infrastructure as the publishing service +from nistoar.pdr.utils.webrecord import WebRecorder +from nistoar.midas import dbio +from nistoar.midas.dbio import ProjectRecord +from .project import ProjectRequestHandler + + +class MIDASProjectApp(SubApp): + """ + a base web app for an interface handling project record + """ + def_project_broker_class = ProjectRecordBroker + + def __init__(self, servicetype, log: Logger, dbcli_factory: DBClientFactory, + foruser: str, config: dict={}, project_broker_cls=None): + super(MIDASApp, self).__init__(servicetype, log, config) + + ## create dbio client from config + self._prjbrkr_cls = self.cfg.get('project_handler_class', self.def_project_handler_class) + self._dbfact = dbcli_factory + + def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: + """ + return a handler instance to handle a particular request to a path + :param Mapping env: the WSGI environment containing the request + :param Callable start_resp: the start_resp function to use initiate the response + :param str path: the path to the resource being requested. This is usually + relative to a parent path that this SubApp is configured to + handle. + :param PubAgent who the authenticated user agent making the request + """ + + # set up dbio client and the request handler that will mediate with it + dbcli = self._dbfact.create_client(self._name, who.actor) + pbroker = self._prjbrkr_cls(dbcli, self.cfg, env, self.log) + + # now parse the requested path; we have different handlers for different types of paths + idattrpart = path.split('/', 2) + if len(idattrpart) < 2: + if not idattrpart: + # path is empty: this is used to list all available projects or create a new one + return ProjectSelectionHandler(pbroker, self, env, start_resp, who) + else: + # path is just an ID: + return ProjectHandler(pbroker, self, env, start_resp, who, idattrpart[0]) + + elif idattrpart[1] == "name": + # path=ID/name: get/change the mnumonic name of record ID + return ProjectNameHandler(pbroker, self, env, start_resp, who, idattrpart[0]) + elif idattrpart[1] == "data": + # path=ID/data[/...]: get/change the content of record ID + if len(idattrpart) == 2: + idattrpart.append("") + return ProjectDataHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[2]) + elif idattrpart[1] == "acls": + # path=ID/acls: get/update the access control on record ID + if len(idattrpart) < 3: + idattrpart.append(None) + return ProjectACLsHandler(self, env, start_resp, who, idattrpart[0], idattrpart[2]) + + # the fallback handler will return some arbitrary part of the record + if len(idattrpart) > 2: + idattrpart[1] = "/".join(idattrpart[1:]) + return ProjectInfoHandler(self, env, start_resp, who, idattrpart[0], idattrpart[1]) + +class DBIOHandler(Handler): + """ + a base class for handling requests for DBIO data. It provides some common utililty functions + for sending responses and dealing with errors. + """ + def __init__(self, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, path: str="", + config: dict=None, log: Logger=None): + """ + Initialize this handler with the request particulars. + + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str path: the relative path to be handled by this handler; typically, some starting + portion of the original request path has been stripped away to handle + produce this value. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + self._app = app + if config is None: + config = self._app._cfg + if not who: + who = self._app._dbcli.user_id + if not log: + log = self._app.log + Handler.__init__(self, path, wsgienv, start_resp, who, config, log) + self._dbcli = self._app._dbcli + self._reqrec = None + if hasattr(self._app, "_recorder") and self._app._recorder: + self._reqrec = self._app._recorder.from_wsgi(self._env) + + class FatalError(Exception): + def __init__(self, code, reason, explain=None, id=None): + if not explain: + explain = reason or '' + super(FatalError, self).__init__(explain) + self.code = code + self.reason = reason + self.explain = explain + self.id = id + + def send_fatal_error(self, fatalex: FatalError, ashead=False): + send_error_resp(fatalex.code, fatalex.reason, fatalex.explain, fatalex.id, ashead) + + def send_error_resp(self, code, reason, explain, id=None, ashead=False): + """ + respond to client with a JSON-formated error response. + :param int code: the HTTP code to respond with + :param str reason: the reason to return as the HTTP status message + :param str explain: the more extensive explanation as to the reason for the error; + this is returned only in the body of the message + :param str id: the record ID for the requested record; if None, it is not applicable or known + :param bool ashead: if true, do not send the body as this is a HEAD request + """ + resp = { + 'http:code': code, + 'http:reason': reason, + 'midas:message': explain, + } + if id: + resp['midas:id'] = sipid + + return self.send_json(resp, reason, code, ashead) + + def get_json_body(self): + """ + read in the request body assuming that it is in JSON format + """ + try: + bodyin = self._env.get('wsgi.input') + if bodyin is None: + if self._reqrec: + self._reqrec.record() + raise FatalError(400, "Missing input", "Missing expected input JSON data") + + if self.log.isEnabledFor(logging.DEBUG) or self._reqrec: + body = bodyin.read() + out = json.loads(body, object_pairs_hook=OrderedDict) + else: + out = json.load(bodyin, object_pairs_hook=OrderedDict) + if self._reqrec: + self._reqrec.add_body_text(json.dumps(name, indent=2)).record() + return out + + except (ValueError, TypeError) as ex: + if self.log.isEnabledFor(logging.DEBUG): + self.log.error("Failed to parse input: %s", str(ex)) + self.log.debug("\n%s", body) + if self._reqrec: + self._reqrec.add_body_text(body).record() + raise self.FatalError(400, "Input not parseable as JSON", + "Input document is not parse-able as JSON: "+str(ex), sipid) + + except Exception as ex: + if self._reqrec: + self._reqrec.add_body_text(body).record() + raise + +class ProjectRecordHandler(DBIOHandler): + """ + base handler class for all requests on project records. This base allows requests to be funneled + through a :py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` instance. + """ + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, path: str="", config: dict=None, log: Logger=None): + """ + Initialize this handler with the request particulars. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str path: the relative path to be handled by this handler; typically, some starting + portion of the original request path has been stripped away to handle + produce this value. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + + super(ProjectHandler, self).__init__(subapp, wsgienv, start_resp, who, path, config, log) + self._pbrkr = broker + +class ProjectInfoHandler(ProjectRecordHandler): + """ + handle retrieval of simple parts of a project record + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, attribute: str, config: dict={}, log: Logger=None): + """ + Initialize this handler with the request particulars. This constructor is called + by the webs service SubApp. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + + super(ProjectInfoHandler, self).__init__(broker, subapp, attribute, wsgienv, start_resp, + who, attribute, config, self._app.log) + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + def do_GET(self, path, ashead=False): + if not path: + # programming error + raise ValueError("Missing ProjectRecord attribute") + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + parts = path.split('/') + data = prec.to_dict() + while len(parts) > 0: + attr = parts.pop(0) + if not isinstance(data, Mapping) or attr not in data: + return send_error(404, "Record attribute not available", + "Requested record attribute not found", self._id, ashead=ashead) + data = data[attr] + + return send_json(data, ashead=ashead) + +class ProjectNameHandler(ProjectRecordHandler): + """ + handle retrieval/update of a project records mnumonic name + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, config: dict={}, log: Logger=None): + """ + Initialize this handler with the request particulars. This constructor is called + by the webs service SubApp. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + + super(ProjectNameHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", config, log) + + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + def do_GET(self, path, ashead=False): + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + return self.send_json(prec.name) + + def do_PUT(self, path): + try: + name = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + try: + prec = self._dbcli.get_record_for(self._id) + prec.name = name + if not prec.authorized(dbio.ACLs.ADMIN): + raise dbio.NotAuthorized(self._dbcli.user_id, "change record name") + prec.save() + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + +class ProjectDataHandler(ProjectRecordHandler): + """ + handle retrieval/update of a project record's data content + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, datapath: str, config: dict=None, log: Logger=None): + """ + Initialize this data request handler with the request particulars. This constructor is called + by the webs service SubApp in charge of the project record interface. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param str datapath: the subpath pointing to a particular piece of the project record's data; + this will be a '/'-delimited identifier pointing to an object property + within the data object. This will be an empty string if the full data + object is requested. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + super(ProjectDataHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, + config, log) + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + def do_GET(self, path, ashead=False): + """ + respond to a GET request + :param str path: a path to the portion of the data to get. This is the same as the `datapath` + given to the handler constructor. This will be an empty string if the full + data object is requested. + :param bool ashead: if True, the request is actually a HEAD request for the data + """ + try: + out = self.get_data(self._id, part) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + if ex.record_part: + return send_error_resp(404, "Data property not found", + "No data found at requested property", self._id, ashead=ashead) + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_json(out) + + def do_PUT(self, path): + try: + newdata = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + try: + return self.replace_data(self._id, newdata, path) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + except InvalidUpdate as ex: + return send_error_resp(400, "Invalid Input Data", str(ex)) + except PartNotAccessible as ex: + return send_error_resp(405, "Data part not updatable", + "Requested part of data cannot be updated") + + def do_PATCH(self, path): + try: + newdata = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + try: + return self.update_data(self._id, newdata, path) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + except InvalidUpdate as ex: + return send_error_resp(400, "Invalid Input Data", str(ex)) + except PartNotAccessible as ex: + return send_error_resp(405, "Data part not updatable", + "Requested part of data cannot be updated") + + +class ProjectSelectionHandler(ProjectRecordHandler): + """ + handle collection-level access searching for project records and creating new ones + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, config: dict=None, log: Logger=None): + """ + Initialize this record request handler with the request particulars. This constructor is called + by the webs service SubApp in charge of the project record interface. + + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + super(ProjectSelectionHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", + config, log) + + def do_GET(self, path, ashead=False): + """ + respond to a GET request, interpreted as a search for records accessible by the user + :param str path: a path to the portion of the data to get. This is the same as the `datapath` + given to the handler constructor. This will always be an empty string. + :param bool ashead: if True, the request is actually a HEAD request for the data + """ + perms = [] + qstr = self._env.get('QUERY_STRING') + if qstr: + params = parse_qs(qstr) + perms = params.get('perm') + if not perms: + perms = [ dbio.ACLs.READWRITE ] + + # sort the results by the best permission type permitted + selected = OrderedDict() + for rec in self._dbcli.select_records(perms): + if rec.owner == _dbcli.user_id: + rec['maxperm'] = "owner" + elif rec.authorized(dbio.ACLs.ADMIN): + rec['maxperm'] = dbio.ACLs.ADMIN + elif rec.authorized(dbio.ACLs.WRITE): + rec['maxperm'] = dbio.ACLs.WRITE + else: + rec['maxperm'] = dbio.ACLs.READ + + if rec['perm'] not in selected: + selected[rec['perm']] = [] + selected[rec['perm']].append(rec) + + # order the matched records based on best permissions + out = [] + for perm in ["owner", dbio.ACLs.ADMIN, dbio.ACLs.WRITE, dbio.ACLs.READ]: + for rec in selected.get(perm, []): + out.append(rec.to_dict()) + + return send_json(out, ashead=ashead) + + def do_POST(self, path): + """ + create a new project record given some initial data + """ + try: + newdata = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + if not newdata['name']: + return send_error_resp(400, "Bad POST input", "No mneumonic name provided") + + try: + prec = self.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.AlreadyExists as ex: + return send_error_resp(400, "Name already in use", str(ex)) + + return send_json(prec.to_dict()) + + +class ProjectACLsHandler(ProjectRecordHandler): + """ + handle retrieval/update of a project record's data content + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, datapath: str, config: dict=None, log: Logger=None): + """ + Initialize this data request handler with the request particulars. This constructor is called + by the webs service SubApp in charge of the project record interface. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param str datapath: the subpath pointing to a particular piece of the project record's data; + this will be a '/'-delimited identifier pointing to an object property + within the data object. This will be an empty string if the full data + object is requested. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + super(ProjectDataHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, + config, log) + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + + def do_GET(self, path, ashead=False): + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + recd = prec.to_dict() + if not path: + return self.send_json(recd.get('acls', {})) + + path = path.strip('/') + parts = path.split('/', 1) + acl = recd.get('acls', {}).get(parts[0]) + if acl is None: + if parts[0] not in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + return self.send_error_resp(404, "Unsupported ACL type", "Request for unsupported ACL type") + acl = [] + + if len(parts) < 2: + return self.send_json(acl) + + return self.send_json(parts[1] in acl) + + def do_POST(self, path): + """ + add an identity to the acl for a specified permission. This handles POST ID/acls/PERM; + `path` should be set to PERM. + """ + try: + # the input should be a single string giving a user or group identity to add to PERM ACL + identity = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + # make sure a permission type, and only a permission type, is specified + path = path.strip('/') + if not path or '/' in path: + return self.send_error_resp(405, "POST not allowed", + "ACL POST request should not specify a user/group identifier") + + if not isinstance(identity, str): + return self.send_error_resp(400, "Wrong input data type" + "Input data is not a string providing a user or group identifier") + + # TODO: ensure input value is a bona fide user or group name + + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + if path in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + pres.acls.grant_perm_to(path, identity) + pres.save() + return send_json(prec.to_dict().get('acls', {})) + + return self.send_error_resp(405, "POST not allowed on this permission type", + "Updating specified permission is not allowed") + + def do_PUT(self, path): + """ + replace the list of identities in a particular ACL. This handles PUT ID/acls/PERM; + `path` should be set to PERM. Note that previously set identities are removed. + """ + try: + identities = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + # make sure a permission type, and only a permission type, is specified + path = path.strip('/') + if not path or '/' in path: + return self.send_error_resp(405, "PUT not allowed", "Unable set ACL membership") + + if isinstance(identities, str): + identities = [identities] + if not isinstance(identity, list): + return self.send_error_resp(400, "Wrong input data type" + "Input data is not a string providing a user/group list") + + # TODO: ensure input value is a bona fide user or group name + + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + if path in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + try: + pres.acls.revoke_perm_for_alll(path) + pres.acls.grant_perm_to(path, *identities) + pres.save() + return send_json(prec.to_dict().get('acls', {})) + except dbio.NotAuthorized as ex: + return send_unauthorized() + + return self.send_error_resp(405, "PUT not allowed on this permission type", + "Updating specified permission is not allowed") + + + def do_PATCH(self, path): + """ + fold given list of identities into a particular ACL. This handles PATCH ID/acls/PERM; + `path` should be set to PERM. + """ + try: + # input is a list of user and/or group identities to add the PERM ACL + identities = self.get_json_body() + except FatalError as ex: + return self.send_fatal_error(ex) + + # make sure path is a permission type (PERM), and only a permission type + path = path.strip('/') + if not path or '/' in path: + return self.send_error_resp(405, "PATCH not allowed", + "ACL PATCH request should not a member name") + + if isinstance(identities, str): + identities = [identities] + if not isinstance(identity, list): + return self.send_error_resp(400, "Wrong input data type" + "Input data is not a list of user/group identities") + + # TODO: ensure input value is a bona fide user or group name + + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + if path in in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + try: + pres.acls.grant_perm_to(path, *identities) + pres.save() + return send_json(prec.to_dict().get('acls', {})) + except dbio.NotAuthorized as ex: + return send_unauthorized() + + return self.send_error_resp(405, "PATCH not allowed on this permission type", + "Updating specified permission is not allowed") + + def do_DELETE(self, path): + """ + remove an identity from an ACL. This handles DELETE ID/acls/PERM/USER; `path` should + be set to PERM/USER. + """ + if path is None: + path = "" + + path = path.strip('/') + if not path or '/' not in path: + return self.send_error_resp(405, "DELETE not allowed on permission type", + "DELETE requires a group or user id after the permission type") + parts = path.split('/', 1) + + # TODO: ensure user value is a bona fide user or group name + + # retrieve the record + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return send_unauthorized() + except dbio.ObjectNotFound as ex: + return send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + if path in in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + # remove the identity from the ACL + try: + pres.acls.revoke_perm_from(parts[0], parts[1]) + pres.save() + return send_ok() + except dbio.NotAuthorized as ex: + return send_unauthorized() + + return self.send_error_resp(405, "DELETE not allowed on this permission type", + "Updating specified permission is not allowed") + + + + + + + diff --git a/python/nistoar/midas/dbio/wsgi/broker.py b/python/nistoar/midas/dbio/wsgi/broker.py new file mode 100644 index 0000000..095ca53 --- /dev/null +++ b/python/nistoar/midas/dbio/wsgi/broker.py @@ -0,0 +1,347 @@ +""" +a module providing the :py:class:`ProjectRecordBroker` class, a base for classes that hold the business +logic for creating and updating MIDAS DBIO project records. `ProjectRecordBroker` classes mediate +between a RESTful web interface and the :py:module:`~nistoar.midas.dbio` layer. Broker classes +can be subclassed to provide specialized logic for a particular project record type (e.g. DMP, +EDI draft). +""" +from logging import Logger +from collections import OrderedDict +from collections.abc import Mapping, MutableMapping, Sequence + +from .. import DBClient, ProjectRecord +from ..base import AlreadyExists, NotAuthorized, ObjectNotFound +from ... import MIDASException +from nistoar.pdr.publish.prov import PubAgent + + +class ProjectRecordBroker: + """ + A base class for handling requests to create, access, or update a project record. This generic + base can be used as is or extended and overridden to specialize the business logic for updating + a particular type of project. + """ + + def __init__(self, dbclient: DBClient, config: Mapping={}, who: PubAgent=None, + wsgienv: dict=None, log: Logger=None): + """ + create a request handler + :param DBClient dbclient: the DBIO client instance to use to access and save project records + :param dict config: the handler configuration tuned for the current type of project + :param dict wsgienv: the WSGI request context + :param Logger log: the logger to use for log messages + """ + self.dbcli = dbclient + self.cfg = config + if not who: + who = PubAgent("unkwn", prov.PubAgent.USER, self.dbcli.user_id or "anonymous") + self.who = who + if wsgienv is None: + wsgienv = {} + self.env = wsgienv + self.log = log + + def create_record(self, name, data=None, meta=None): + """ + create a new project record with the given name. An ID will be assigned to the new record. + :param str name: the mnuemonic name to assign to the record. This name cannot match that + of any other record owned by the user. + :param dict data: the initial data content to assign to the new record. + :param dict meta: the initial metadata to assign to the new record. + :raises NotAuthorized: if the authenticated user is not authorized to create a record + :raises AlreadyExists: if a record owned by the user already exists with the given name + """ + shoulder = self._get_id_shoulder(self.who) + prec = self.dbcli.create_record(name, shoulder) + + prec.data = self._new_data_for(prec.id) + if meta: + self._merge_into(self._moderate_metadata(meta), prec.meta) + if data: + self.update_data(prec.id, data, prec=prec) # this will call prec.save() + elif meta: + prec.save() + + return prec + + def _get_id_shoulder(self, user: PubAgent): + """ + return an ID shoulder that is appropriate for the given user agent + :param PubAgent user: the user agent that is creating a record, requiring a shoulder + :raises NotAuthorized: if an uathorized shoulder appropriate for the user cannot be determined. + """ + out = None + client_ctl = self.cfg.get('clients', {}).get(user.group) + if client_ctl is None: + client_ctl = self.cfg.get('clients', {}).get("default") + if client_ctl is None: + self.log.info("No default ID shoulder configured for client group, %s", user.group) + raise NotAuthorized(user.actor, "create record", + "Client group, %s, not recognized" % user.group) + + out = client_ctl.get('default_shoulder') + if not out: + raise NotAuthorized(user.actor, "create record", + "No default shoulder defined for client group, "+user.group) + return out + + def get_record(self, id): + """ + fetch the project record having the given identifier + :raises ObjectNotFound: if a record with that ID does not exist + :raises NotAuthorized: if the record exists but the current user is not authorized to read it. + """ + return self.dbcli.get_record_for(id) + + def get_data(self, id, part=None): + """ + return a data content from the record with the given ID + :param str id: the record's identifier + :param str path: a path to the portion of the data to get. This is the same as the `datapath` + given to the handler constructor. This will be an empty string if the full + data object is requested. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + a non-existent part of the data content. + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if access to the part of the data specified by `part` is not allowed. + """ + prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound + if not part: + return prec.data + return self._extract_data_part(prec.data, part) + + def _extract_data_part(self, data, part): + if not part: + return data + steps = part.split('/') + out = data + while steps: + prop = steps.pop(0) + if prop not in out: + raise ObjectNotFound(id, part) + out = out[prop] + + return out + + def update_data(self, id, newdata, part=None, prec=None): + """ + merge the given data into the currently save data content for the record with the given identifier. + :param str id: the identifier for the record whose data should be updated. + :param str newdata: the data to save as the new content. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + the given `newdata` is a value that should be set to the property pointed + to by `part`. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. + :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + would otherwise result in invalid data content. + """ + if not prec: + prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound/NotAuthorized + + if not part: + # this is a complete replacement; merge it with a starter record + self._merge_into(newdata, prec.data) + + else: + # replacing just a part of the data + steps = part.split('/') + data = prec.data + while steps: + prop = steps.pop(0) + if prop not in data or data[prop] is None: + if not steps: + data[prop] = newdata + else: + data[prop] = {} + elif not steps: + if isinstance(data[prop], Mapping) and isinstance(newdata, Mapping): + self._merge_into(newdata, data[prop]) + else: + data[prop] = newdata + elif not isinstance(data[prop], Mapping): + raise PartNotAccessible(id, part, + "%s: data property, %s, is not in an updatable state") + data = data[prop] + + data = prec.data + + # ensure the replacing data is sufficiently complete and valid and then save it + # If it is invalid, InvalidUpdate is raised. + data = self._save_data(data, prec) + + return self._extract_data_part(data, part) + + + def _merge_into(self, update: Mapping, base: Mapping, depth: int=-1): + if depth == 0: + return + + for prop in update: + if prop in base and isinstance(base[prop], Mapping): + if depth > 1 and isinstance(update[prop], Mapping): + # the properties from the base and update must both be dictionaries; otherwise, + # update is ignored. + self._merge_into(base[prop], update[prop], depth-1) + else: + base[prop] = update[prop] + + def _new_data_for(self, recid): + """ + return an "empty" data object set for a record with the given identifier. The returned + dictionary can contain some minimal or default properties (which may or may not include + the identifier or information based on the identifier). + """ + return OrderedDict() + + def _new_metadata_for(self, recid): + """ + return an "empty" metadata object set for a record with the given identifier. The returned + dictionary can contain some minimal or default properties (which may or may not include + the identifier or information based on the identifier). + + Recall that a project record's "metadata" stores information that helps manage the evolution of + the record, and does not normally contain information set directly from data provided by the + user client. An exception is when a record is created: the client can provide some initial + metadata that gets filtered by :py:method:`_moderate_metadata` + """ + return OrderedDict() + + def _moderate_metadata(self, mdata: MutableMapping): + """ + massage and validate the given record metadata provided by the user client, returning a + valid version of the metadata. The implementation may modify the given dictionary in place. + The default implementation does accepts none of the client-provided properties + + The purpose of this function is to filter out data properties that are not supported or + otherwise should not be settable by the client. + :raises ValueError: if the mdata is disallowed in a way that should abort the entire request. + """ + return OrderedDict() + + def replace_data(self, id, newdata, part=None, prec=None): + """ + Replace the currently stored data content of a record with the given data. It is expected that + the new data will be filtered/cleansed via an internal call to :py:method:`dress_data`. + :param str id: the identifier for the record whose data should be updated. + :param str newdata: the data to save as the new content. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + the given `newdata` is a value that should be set to the property pointed + to by `part`. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. + :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + would otherwise result in invalid data content. + """ + if not prec: + prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound/NotAuthorized + + if not part: + # this is a complete replacement; merge it with a starter record + data = self._new_data_for(id) + self._merge_into(newdata, data) + + else: + # replacing just a part of the data + data = prec.data + steps = part.split('/') + while steps: + prop = steps.pop(0) + if prop not in data or data[prop] is None: + if not steps: + data[prop] = newdata + else: + data[prop] = {} + elif not steps: + data[prop] = newdata + elif not isinstance(data[prop], Mapping): + raise PartNotAccessible(id, part) + data = data[prop] + + data = prec.data + + # ensure the replacing data is sufficiently complete and valid. + # If it is invalid, InvalidUpdate is raised. + data = self._save_data(data, prec) + + return self._extract_data_part(data, part) + + def _save_data(self, indata: Mapping, prec: ProjectRecord = None) -> Mapping: + """ + expand, validate, and save the data modified by the user as the record's data content. + + The given data represents a merging of input from the user with the latest saved data. + This function provides the final transformations and validation checks before being saved. + It may have two side effects: first, as part of the final transformations, the indata + mapping may get updated in place. Second, the function may update the record's metadata + (stored in its `meta` property). + + :param dict indata: the user-provided input merged into the previously saved data. After + final transformations and validation, this will be saved the the + record's `data` property. + :param ProjectRecord prec: the project record object to save the data to. + :return: the (transformed) data that was actually saved + :rtype: dict + :raises InvalidUpdate: if the provided `indata` represents an illegal or forbidden update or + would otherwise result in invalid data content + """ + # this implementation does not transform the data + self._validate_data(indata) # may raise InvalidUpdate + + prec.data = indata + prec.save(); + + return indata + + def _validate_data(self, data): + pass + + +class InvalidUpdate(MIDASException): + """ + an exception indicating that the user-provided data is invalid or otherwise would result in + invalid data content for a record. + """ + def __init__(self, message, recid=None, part=None): + """ + initialize the exception + :param str recid: the id of the record that was existed + :param str part: the part of the record that was requested. Do not provide this parameter if + the entire record does not exist. + """ + super(InvalidUpdate, self).__init__(message) + self.record_id = recid + self.record_part = part + +class PartNotAccessible(MIDASException): + """ + an exception indicating that the user-provided data is invalid or otherwise would result in + invalid data content for a record. + """ + def __init__(self, recid, part, message=None): + """ + initialize the exception + :param str recid: the id of the record that was existed + :param str part: the part of the record that was requested. Do not provide this parameter if + the entire record does not exist. + """ + self.record_id = recid + self.record_part = part + + if not message: + message = "%s: data property, %s, is not in an updateable state" % (recid, part) + super(PartNotAccessible, self).__init__(message) + + + diff --git a/python/tests/nistoar/midas/dbio/test_client.py b/python/tests/nistoar/midas/dbio/test_client.py index fdbc838..71b7a10 100644 --- a/python/tests/nistoar/midas/dbio/test_client.py +++ b/python/tests/nistoar/midas/dbio/test_client.py @@ -75,6 +75,9 @@ def test_create_record(self): self.assertTrue(self.cli.name_exists("test", self.user)) def test_get_record(self): + with self.assertRaises(base.ObjectNotFound): + self.cli.get_record_for("pdr0:0001") + self.cli.create_record("test1") self.cli.create_record("test2") rec = self.fact.create_client(base.DRAFT_PROJECTS, "alice").create_record("goob") diff --git a/python/tests/nistoar/midas/dbio/wsgi/__init__.py b/python/tests/nistoar/midas/dbio/wsgi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py new file mode 100644 index 0000000..6816031 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py @@ -0,0 +1,198 @@ +import os, json, pdb, logging, tempfile +import unittest as test + +from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio.wsgi import broker +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_broker.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestProjectRecordBroker(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + }, + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + self.fact = inmem.InMemoryDBClientFactory(self.cfg, { "nextnum": { "mdm1": 2 }}) + self.dbcli = self.fact.create_client(base.DMP_PROJECTS, nistr.actor) + self.resp = [] + + def create_broker(self, request=None): + self.resp = [] + if not request: + request = {'REQUEST_METHOD': 'GRUB'} + self.broker = broker.ProjectRecordBroker(self.dbcli, self.cfg, nistr, request, + rootlog.getChild("broker")) + return self.broker + + def test_ctor(self): + self.create_broker() + self.assertTrue(self.broker.dbcli) + self.assertEqual(self.broker.cfg, self.cfg) + self.assertEqual(self.broker.who.actor, "nstr1") + self.assertEqual(self.broker.who.group, "midas") + self.assertEqual(self.broker.env, {'REQUEST_METHOD': 'GRUB'}) + self.assertTrue(self.broker.log) + + def test_get_id_shoulder(self): + self.create_broker() + self.assertEqual(self.broker._get_id_shoulder(nistr), "mdm1") + + usr = prov.PubAgent("malware", prov.PubAgent.USER, "nstr1") + self.assertEqual(self.broker._get_id_shoulder(usr), "mdm0") + + del self.cfg['clients']['default']['default_shoulder'] + self.create_broker() + with self.assertRaises(broker.NotAuthorized): + self.broker._get_id_shoulder(usr) + del self.cfg['clients']['default'] + self.create_broker() + with self.assertRaises(broker.NotAuthorized): + self.broker._get_id_shoulder(usr) + + self.assertEqual(self.broker._get_id_shoulder(nistr), "mdm1") + + def test_extract_data_part(self): + data = {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}} + self.create_broker() + self.assertEqual(self.broker._extract_data_part(data, "color"), "red") + self.assertEqual(self.broker._extract_data_part(data, "pos"), + {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}) + self.assertEqual(self.broker._extract_data_part(data, "pos/vec"), [22, 11, 0]) + self.assertEqual(self.broker._extract_data_part(data, "pos/y"), 12) + self.assertEqual(self.broker._extract_data_part(data, "pos/desc/a"), 1) + with self.assertRaises(broker.ObjectNotFound): + self.broker._extract_data_part(data, "pos/desc/b") + + + def test_create_record(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("goob")) + + prec = self.broker.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {}) + self.assertEqual(prec.meta, {}) + self.assertEqual(prec.owner, "nstr1") + + self.assertTrue(self.broker.dbcli.name_exists("goob")) + prec2 = self.broker.get_record(prec.id) + self.assertEqual(prec2.name, "goob") + self.assertEqual(prec2.id, "mdm1:0003") + self.assertEqual(prec2.data, {}) + self.assertEqual(prec2.meta, {}) + self.assertEqual(prec2.owner, "nstr1") + + with self.assertRaises(broker.AlreadyExists): + self.broker.create_record("goob") + + def test_create_record_withdata(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("gurn")) + + prec = self.broker.create_record("gurn", {"color": "red"}, {"temper": "dark"}) + self.assertEqual(prec.name, "gurn") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {"color": "red"}) + self.assertEqual(prec.meta, {}) + + def test_get_data(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("gurn")) + prec = self.broker.create_record("gurn", {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) + self.assertTrue(self.broker.dbcli.name_exists("gurn")) + + self.assertEqual(self.broker.get_data(prec.id), + {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) + self.assertEqual(self.broker.get_data(prec.id, "color"), "red") + self.assertEqual(self.broker.get_data(prec.id, "pos"), {"x": 23, "y": 12, "desc": {"a": 1}}) + self.assertEqual(self.broker.get_data(prec.id, "pos/desc"), {"a": 1}) + self.assertEqual(self.broker.get_data(prec.id, "pos/desc/a"), 1) + + with self.assertRaises(broker.ObjectNotFound): + self.broker.get_data(prec.id, "pos/desc/b") + with self.assertRaises(broker.ObjectNotFound): + self.broker.get_data("goober") + + + + def test_update_replace_data(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("goob")) + + prec = self.broker.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {}) + self.assertEqual(prec.meta, {}) + + data = self.broker.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + prec = self.broker.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + + data = self.broker.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") + self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) + prec = self.broker.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "B"}}) + + data = self.broker.update_data(prec.id, "C", "pos/grid") + self.assertEqual(data, "C") + prec = self.broker.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "C"}}) + + # replace + data = self.broker.replace_data(prec.id, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + self.assertEqual(data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + prec = self.broker.get_record(prec.id) + self.assertEqual(prec.data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + # update again + data = self.broker.update_data(prec.id, "blue", "color") + self.assertEqual(data, "blue") + prec = self.broker.get_record(prec.id) + self.assertEqual(prec.data, {"color": "blue", "pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + with self.assertRaises(broker.PartNotAccessible): + self.broker.update_data(prec.id, 2, "pos/vec/x") + + + + +if __name__ == '__main__': + test.main() + + + + From cefeeccdc5b85310270eeece6271b35daaf42b3e Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Wed, 28 Sep 2022 17:08:43 -0400 Subject: [PATCH 002/123] dbio.wsgi: mv base.py to project.py --- python/nistoar/midas/dbio/wsgi/{base.py => project.py} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename python/nistoar/midas/dbio/wsgi/{base.py => project.py} (99%) diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/project.py similarity index 99% rename from python/nistoar/midas/dbio/wsgi/base.py rename to python/nistoar/midas/dbio/wsgi/project.py index 348b692..5ff5193 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -16,9 +16,9 @@ from nistoar.pdr.publish SubApp, Handler # use same sevice infrastructure as the publishing service from nistoar.pdr.utils.webrecord import WebRecorder -from nistoar.midas import dbio -from nistoar.midas.dbio import ProjectRecord -from .project import ProjectRequestHandler +from .. import dbio +from ..dbio import ProjectRecord +from .broker import ProjectRecordBroker class MIDASProjectApp(SubApp): From 205540d665dd7bf9e5780238acbaf6aa12ed2a2a Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Wed, 28 Sep 2022 17:18:28 -0400 Subject: [PATCH 003/123] dbio.wsgi: move some common code to base.py --- python/nistoar/midas/dbio/wsgi/base.py | 108 +++++++++++++++++++ python/nistoar/midas/dbio/wsgi/project.py | 120 ++-------------------- 2 files changed, 117 insertions(+), 111 deletions(-) create mode 100644 python/nistoar/midas/dbio/wsgi/base.py diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py new file mode 100644 index 0000000..85af46f --- /dev/null +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -0,0 +1,108 @@ +""" +Some common code for implementing the WSGI front end to dbio +""" +from nistoar.pdr.publish import Handler # use same sevice infrastructure as the publishing service + +class DBIOHandler(Handler): + """ + a base class for handling requests for DBIO data. It provides some common utililty functions + for sending responses and dealing with errors. + """ + def __init__(self, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, path: str="", + config: dict=None, log: Logger=None): + """ + Initialize this handler with the request particulars. + + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str path: the relative path to be handled by this handler; typically, some starting + portion of the original request path has been stripped away to handle + produce this value. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + self._app = app + if config is None: + config = self._app._cfg + if not who: + who = self._app._dbcli.user_id + if not log: + log = self._app.log + Handler.__init__(self, path, wsgienv, start_resp, who, config, log) + self._dbcli = self._app._dbcli + self._reqrec = None + if hasattr(self._app, "_recorder") and self._app._recorder: + self._reqrec = self._app._recorder.from_wsgi(self._env) + + class FatalError(Exception): + def __init__(self, code, reason, explain=None, id=None): + if not explain: + explain = reason or '' + super(FatalError, self).__init__(explain) + self.code = code + self.reason = reason + self.explain = explain + self.id = id + + def send_fatal_error(self, fatalex: FatalError, ashead=False): + send_error_resp(fatalex.code, fatalex.reason, fatalex.explain, fatalex.id, ashead) + + def send_error_resp(self, code, reason, explain, id=None, ashead=False): + """ + respond to client with a JSON-formated error response. + :param int code: the HTTP code to respond with + :param str reason: the reason to return as the HTTP status message + :param str explain: the more extensive explanation as to the reason for the error; + this is returned only in the body of the message + :param str id: the record ID for the requested record; if None, it is not applicable or known + :param bool ashead: if true, do not send the body as this is a HEAD request + """ + resp = { + 'http:code': code, + 'http:reason': reason, + 'midas:message': explain, + } + if id: + resp['midas:id'] = sipid + + return self.send_json(resp, reason, code, ashead) + + def get_json_body(self): + """ + read in the request body assuming that it is in JSON format + """ + try: + bodyin = self._env.get('wsgi.input') + if bodyin is None: + if self._reqrec: + self._reqrec.record() + raise FatalError(400, "Missing input", "Missing expected input JSON data") + + if self.log.isEnabledFor(logging.DEBUG) or self._reqrec: + body = bodyin.read() + out = json.loads(body, object_pairs_hook=OrderedDict) + else: + out = json.load(bodyin, object_pairs_hook=OrderedDict) + if self._reqrec: + self._reqrec.add_body_text(json.dumps(name, indent=2)).record() + return out + + except (ValueError, TypeError) as ex: + if self.log.isEnabledFor(logging.DEBUG): + self.log.error("Failed to parse input: %s", str(ex)) + self.log.debug("\n%s", body) + if self._reqrec: + self._reqrec.add_body_text(body).record() + raise self.FatalError(400, "Input not parseable as JSON", + "Input document is not parse-able as JSON: "+str(ex), sipid) + + except Exception as ex: + if self._reqrec: + self._reqrec.add_body_text(body).record() + raise + diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 5ff5193..53a8357 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -14,10 +14,11 @@ from collections import OrderedDict from collections.abc import Mapping, Sequence -from nistoar.pdr.publish SubApp, Handler # use same sevice infrastructure as the publishing service +from nistoar.pdr.publish import SubApp, Handler # use same sevice infrastructure as the publishing service from nistoar.pdr.utils.webrecord import WebRecorder from .. import dbio from ..dbio import ProjectRecord +from .base import DBIOHandler from .broker import ProjectRecordBroker @@ -79,109 +80,6 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge idattrpart[1] = "/".join(idattrpart[1:]) return ProjectInfoHandler(self, env, start_resp, who, idattrpart[0], idattrpart[1]) -class DBIOHandler(Handler): - """ - a base class for handling requests for DBIO data. It provides some common utililty functions - for sending responses and dealing with errors. - """ - def __init__(self, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, path: str="", - config: dict=None, log: Logger=None): - """ - Initialize this handler with the request particulars. - - :param SubApp subapp: the web service SubApp receiving the request and calling this constructor - :param dict wsgienv: the WSGI request context dictionary - :param Callable start_resp: the WSGI start-response function used to send the response - :param PubAgent who: the authenticated user making the request. - :param str path: the relative path to be handled by this handler; typically, some starting - portion of the original request path has been stripped away to handle - produce this value. - :param dict config: the handler's configuration; if not provided, the inherited constructor - will extract the configuration from `subapp`. Normally, the constructor - is called without this parameter. - :param Logger log: the logger to use within this handler; if not provided (typical), the - logger attached to the SubApp will be used. - """ - self._app = app - if config is None: - config = self._app._cfg - if not who: - who = self._app._dbcli.user_id - if not log: - log = self._app.log - Handler.__init__(self, path, wsgienv, start_resp, who, config, log) - self._dbcli = self._app._dbcli - self._reqrec = None - if hasattr(self._app, "_recorder") and self._app._recorder: - self._reqrec = self._app._recorder.from_wsgi(self._env) - - class FatalError(Exception): - def __init__(self, code, reason, explain=None, id=None): - if not explain: - explain = reason or '' - super(FatalError, self).__init__(explain) - self.code = code - self.reason = reason - self.explain = explain - self.id = id - - def send_fatal_error(self, fatalex: FatalError, ashead=False): - send_error_resp(fatalex.code, fatalex.reason, fatalex.explain, fatalex.id, ashead) - - def send_error_resp(self, code, reason, explain, id=None, ashead=False): - """ - respond to client with a JSON-formated error response. - :param int code: the HTTP code to respond with - :param str reason: the reason to return as the HTTP status message - :param str explain: the more extensive explanation as to the reason for the error; - this is returned only in the body of the message - :param str id: the record ID for the requested record; if None, it is not applicable or known - :param bool ashead: if true, do not send the body as this is a HEAD request - """ - resp = { - 'http:code': code, - 'http:reason': reason, - 'midas:message': explain, - } - if id: - resp['midas:id'] = sipid - - return self.send_json(resp, reason, code, ashead) - - def get_json_body(self): - """ - read in the request body assuming that it is in JSON format - """ - try: - bodyin = self._env.get('wsgi.input') - if bodyin is None: - if self._reqrec: - self._reqrec.record() - raise FatalError(400, "Missing input", "Missing expected input JSON data") - - if self.log.isEnabledFor(logging.DEBUG) or self._reqrec: - body = bodyin.read() - out = json.loads(body, object_pairs_hook=OrderedDict) - else: - out = json.load(bodyin, object_pairs_hook=OrderedDict) - if self._reqrec: - self._reqrec.add_body_text(json.dumps(name, indent=2)).record() - return out - - except (ValueError, TypeError) as ex: - if self.log.isEnabledFor(logging.DEBUG): - self.log.error("Failed to parse input: %s", str(ex)) - self.log.debug("\n%s", body) - if self._reqrec: - self._reqrec.add_body_text(body).record() - raise self.FatalError(400, "Input not parseable as JSON", - "Input document is not parse-able as JSON: "+str(ex), sipid) - - except Exception as ex: - if self._reqrec: - self._reqrec.add_body_text(body).record() - raise - class ProjectRecordHandler(DBIOHandler): """ base handler class for all requests on project records. This base allows requests to be funneled @@ -312,7 +210,7 @@ def do_GET(self, path, ashead=False): def do_PUT(self, path): try: name = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) try: @@ -385,7 +283,7 @@ def do_GET(self, path, ashead=False): def do_PUT(self, path): try: newdata = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) try: @@ -404,7 +302,7 @@ def do_PUT(self, path): def do_PATCH(self, path): try: newdata = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) try: @@ -490,7 +388,7 @@ def do_POST(self, path): """ try: newdata = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) if not newdata['name']: @@ -576,7 +474,7 @@ def do_POST(self, path): try: # the input should be a single string giving a user or group identity to add to PERM ACL identity = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) # make sure a permission type, and only a permission type, is specified @@ -614,7 +512,7 @@ def do_PUT(self, path): """ try: identities = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) # make sure a permission type, and only a permission type, is specified @@ -659,7 +557,7 @@ def do_PATCH(self, path): try: # input is a list of user and/or group identities to add the PERM ACL identities = self.get_json_body() - except FatalError as ex: + except self.FatalError as ex: return self.send_fatal_error(ex) # make sure path is a permission type (PERM), and only a permission type From 92f165a9949485455dd2c1917279dcdca1976b1b Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Wed, 28 Sep 2022 17:24:56 -0400 Subject: [PATCH 004/123] dbio.wsgi: fix infrastructure import --- python/nistoar/midas/dbio/wsgi/base.py | 2 +- python/nistoar/midas/dbio/wsgi/project.py | 2 +- python/nistoar/pdr/publish/service/wsgi/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py index 85af46f..0cde441 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -1,7 +1,7 @@ """ Some common code for implementing the WSGI front end to dbio """ -from nistoar.pdr.publish import Handler # use same sevice infrastructure as the publishing service +from nistoar.pdr.publish.service.wsgi import Handler # same infrastructure as the publishing service class DBIOHandler(Handler): """ diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 53a8357..64784b3 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -14,7 +14,7 @@ from collections import OrderedDict from collections.abc import Mapping, Sequence -from nistoar.pdr.publish import SubApp, Handler # use same sevice infrastructure as the publishing service +from nistoar.pdr.publish.service.wsgi import SubApp, Handler # same infrastructure as publishing service from nistoar.pdr.utils.webrecord import WebRecorder from .. import dbio from ..dbio import ProjectRecord diff --git a/python/nistoar/pdr/publish/service/wsgi/__init__.py b/python/nistoar/pdr/publish/service/wsgi/__init__.py index 7fb97d4..938d2cb 100644 --- a/python/nistoar/pdr/publish/service/wsgi/__init__.py +++ b/python/nistoar/pdr/publish/service/wsgi/__init__.py @@ -8,7 +8,7 @@ from ... import ConfigurationException, PublishSystem, system from ...prov import PubAgent -from .base import Ready +from .base import Ready, SubApp, Handler from .pdp0 import PDP0App log = logging.getLogger(system.system_abbrev) \ From 98dfc6c271e6db7e117b942f3cd071df52f3b05f Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Fri, 30 Sep 2022 16:22:55 -0400 Subject: [PATCH 005/123] dbio.wsgi: unit tests for project --- python/nistoar/midas/dbio/base.py | 6 +- python/nistoar/midas/dbio/wsgi/base.py | 29 +- python/nistoar/midas/dbio/wsgi/project.py | 262 +++++---- .../nistoar/midas/dbio/wsgi/test_project.py | 543 ++++++++++++++++++ 4 files changed, 723 insertions(+), 117 deletions(-) create mode 100644 python/tests/nistoar/midas/dbio/wsgi/test_project.py diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index a620c74..28a02ce 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -32,7 +32,7 @@ ANONYMOUS = PUBLIC_GROUP __all__ = ["DBClient", "DBClientFactory", "ProjectRecord", "DBGroups", "Group", "ACLs", "PUBLIC_GROUP", - "ANONYMOUS", "DRAFT_PROJECTS", "DMP_PROJECTS"] + "ANONYMOUS", "DRAFT_PROJECTS", "DMP_PROJECTS", "ObjectNotFound", "NotAuthorized", "AlreadyExists"] Permissions = Union[str, Sequence[str], AbstractSet[str]] @@ -227,7 +227,7 @@ def authorized(self, perm: Permissions, who: str = None): """ if not who: who = self._cli.user_id - if (self.owner and who == self.owner) or who in self._cli._cfg.get("superusers", []): + if who in self._cli._cfg.get("superusers", []): return True if isinstance(perm, str): @@ -267,7 +267,7 @@ def validate(self, errs=None, data=None) -> List[str]: return errs def to_dict(self): - self._data['acls'] = self.acls._data + self._data['acls'] = self.acls._perms return deepcopy(self._data) class Group(ProtectedRecord): diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py index 0cde441..c88fa5a 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -1,19 +1,26 @@ """ Some common code for implementing the WSGI front end to dbio """ -from nistoar.pdr.publish.service.wsgi import Handler # same infrastructure as the publishing service +import logging, json +from collections import OrderedDict +from collections.abc import Callable + +from nistoar.pdr.publish.service.wsgi import SubApp, Handler # same infrastructure as the publishing service +from nistoar.pdr.publish.prov import PubAgent +from .. import DBClient class DBIOHandler(Handler): """ a base class for handling requests for DBIO data. It provides some common utililty functions for sending responses and dealing with errors. """ - def __init__(self, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, path: str="", - config: dict=None, log: Logger=None): + def __init__(self, subapp: SubApp, dbclient: DBClient, wsgienv: dict, start_resp: Callable, + who: PubAgent, path: str="", config: dict=None, log: logging.Logger=None): """ Initialize this handler with the request particulars. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param DBClient dbclient: the DBIO client to use :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response :param PubAgent who: the authenticated user making the request. @@ -26,15 +33,13 @@ def __init__(self, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: Pub :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - self._app = app + self._app = subapp if config is None: - config = self._app._cfg - if not who: - who = self._app._dbcli.user_id + config = self._app.cfg if not log: log = self._app.log Handler.__init__(self, path, wsgienv, start_resp, who, config, log) - self._dbcli = self._app._dbcli + self._dbcli = dbclient self._reqrec = None if hasattr(self._app, "_recorder") and self._app._recorder: self._reqrec = self._app._recorder.from_wsgi(self._env) @@ -43,14 +48,14 @@ class FatalError(Exception): def __init__(self, code, reason, explain=None, id=None): if not explain: explain = reason or '' - super(FatalError, self).__init__(explain) + super(DBIOHandler.FatalError, self).__init__(explain) self.code = code self.reason = reason self.explain = explain self.id = id def send_fatal_error(self, fatalex: FatalError, ashead=False): - send_error_resp(fatalex.code, fatalex.reason, fatalex.explain, fatalex.id, ashead) + self.send_error_resp(fatalex.code, fatalex.reason, fatalex.explain, fatalex.id, ashead) def send_error_resp(self, code, reason, explain, id=None, ashead=False): """ @@ -68,7 +73,7 @@ def send_error_resp(self, code, reason, explain, id=None, ashead=False): 'midas:message': explain, } if id: - resp['midas:id'] = sipid + resp['midas:id'] = id return self.send_json(resp, reason, code, ashead) @@ -81,7 +86,7 @@ def get_json_body(self): if bodyin is None: if self._reqrec: self._reqrec.record() - raise FatalError(400, "Missing input", "Missing expected input JSON data") + raise self.FatalError(400, "Missing input", "Missing expected input JSON data") if self.log.isEnabledFor(logging.DEBUG) or self._reqrec: body = bodyin.read() diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 64784b3..5134c98 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -12,12 +12,14 @@ """ from logging import Logger from collections import OrderedDict -from collections.abc import Mapping, Sequence +from collections.abc import Mapping, Sequence, Callable +from urllib.parse import parse_qs from nistoar.pdr.publish.service.wsgi import SubApp, Handler # same infrastructure as publishing service +from nistoar.pdr.publish.prov import PubAgent from nistoar.pdr.utils.webrecord import WebRecorder -from .. import dbio -from ..dbio import ProjectRecord +from ... import dbio +from ...dbio import ProjectRecord, DBClientFactory from .base import DBIOHandler from .broker import ProjectRecordBroker @@ -29,11 +31,11 @@ class MIDASProjectApp(SubApp): def_project_broker_class = ProjectRecordBroker def __init__(self, servicetype, log: Logger, dbcli_factory: DBClientFactory, - foruser: str, config: dict={}, project_broker_cls=None): - super(MIDASApp, self).__init__(servicetype, log, config) + config: dict={}, project_broker_cls=None): + super(MIDASProjectApp, self).__init__(servicetype, log, config) ## create dbio client from config - self._prjbrkr_cls = self.cfg.get('project_handler_class', self.def_project_handler_class) + self._prjbrkr_cls = self.cfg.get('project_handler_class', self.def_project_broker_class) self._dbfact = dbcli_factory def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: @@ -52,9 +54,10 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge pbroker = self._prjbrkr_cls(dbcli, self.cfg, env, self.log) # now parse the requested path; we have different handlers for different types of paths + path = path.strip('/') idattrpart = path.split('/', 2) if len(idattrpart) < 2: - if not idattrpart: + if not idattrpart[0]: # path is empty: this is used to list all available projects or create a new one return ProjectSelectionHandler(pbroker, self, env, start_resp, who) else: @@ -72,8 +75,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge elif idattrpart[1] == "acls": # path=ID/acls: get/update the access control on record ID if len(idattrpart) < 3: - idattrpart.append(None) - return ProjectACLsHandler(self, env, start_resp, who, idattrpart[0], idattrpart[2]) + idattrpart.append("") + return ProjectACLsHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[2]) # the fallback handler will return some arbitrary part of the record if len(idattrpart) > 2: @@ -106,9 +109,54 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectHandler, self).__init__(subapp, wsgienv, start_resp, who, path, config, log) + super(ProjectRecordHandler, self).__init__(subapp, broker.dbcli, wsgienv, start_resp, who, + path, config, log) self._pbrkr = broker +class ProjectHandler(ProjectRecordHandler): + """ + handle access to the whole project record + """ + + def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, config: dict=None, log: Logger=None): + """ + Initialize this handler with the request particulars. This constructor is called + by the webs service SubApp. + + :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update + the project data through. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + + super(ProjectHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", config, log) + + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + def do_GET(self, path, ashead=False): + try: + prec = self._pbrkr.get_record(self._id) + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except dbio.ObjectNotFound as ex: + return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", + self._id, ashead=ashead) + + return self.send_json(prec.to_dict()) + + class ProjectInfoHandler(ProjectRecordHandler): """ handle retrieval of simple parts of a project record @@ -127,6 +175,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Callable start_resp: the WSGI start-response function used to send the response :param PubAgent who: the authenticated user making the request. :param str id: the ID of the project record being requested + :param str attribute: a recognized project model attribute :param dict config: the handler's configuration; if not provided, the inherited constructor will extract the configuration from `subapp`. Normally, the constructor is called without this parameter. @@ -134,8 +183,8 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectInfoHandler, self).__init__(broker, subapp, attribute, wsgienv, start_resp, - who, attribute, config, self._app.log) + super(ProjectInfoHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, attribute, + config, log) self._id = id if not id: # programming error @@ -148,21 +197,22 @@ def do_GET(self, path, ashead=False): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", + self._id, ashead=ashead) parts = path.split('/') data = prec.to_dict() while len(parts) > 0: attr = parts.pop(0) if not isinstance(data, Mapping) or attr not in data: - return send_error(404, "Record attribute not available", - "Requested record attribute not found", self._id, ashead=ashead) + return self.send_error(404, "Record attribute not available", + "Requested record attribute not found", self._id, ashead=ashead) data = data[attr] - return send_json(data, ashead=ashead) + return self.send_json(data, ashead=ashead) class ProjectNameHandler(ProjectRecordHandler): """ @@ -170,7 +220,7 @@ class ProjectNameHandler(ProjectRecordHandler): """ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, - who: PubAgent, id: str, config: dict={}, log: Logger=None): + who: PubAgent, id: str, config: dict=None, log: Logger=None): """ Initialize this handler with the request particulars. This constructor is called by the webs service SubApp. @@ -200,10 +250,10 @@ def do_GET(self, path, ashead=False): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", + self._id, ashead=ashead) return self.send_json(prec.name) @@ -219,11 +269,12 @@ def do_PUT(self, path): if not prec.authorized(dbio.ACLs.ADMIN): raise dbio.NotAuthorized(self._dbcli.user_id, "change record name") prec.save() + return self.send_json(prec.name) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) class ProjectDataHandler(ProjectRecordHandler): """ @@ -269,15 +320,15 @@ def do_GET(self, path, ashead=False): :param bool ashead: if True, the request is actually a HEAD request for the data """ try: - out = self.get_data(self._id, part) + out = self._pbrkr.get_data(self._id, path) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: if ex.record_part: - return send_error_resp(404, "Data property not found", - "No data found at requested property", self._id, ashead=ashead) - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "Data property not found", + "No data found at requested property", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) return self.send_json(out) def do_PUT(self, path): @@ -287,17 +338,19 @@ def do_PUT(self, path): return self.send_fatal_error(ex) try: - return self.replace_data(self._id, newdata, path) + data = self._pbrkr.replace_data(self._id, newdata, path) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) except InvalidUpdate as ex: - return send_error_resp(400, "Invalid Input Data", str(ex)) + return self.send_error_resp(400, "Invalid Input Data", str(ex)) except PartNotAccessible as ex: - return send_error_resp(405, "Data part not updatable", - "Requested part of data cannot be updated") + return self.send_error_resp(405, "Data part not updatable", + "Requested part of data cannot be updated") + + return self.send_json(data) def do_PATCH(self, path): try: @@ -306,17 +359,19 @@ def do_PATCH(self, path): return self.send_fatal_error(ex) try: - return self.update_data(self._id, newdata, path) + data = self._pbrkr.update_data(self._id, newdata, path) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) except InvalidUpdate as ex: - return send_error_resp(400, "Invalid Input Data", str(ex)) + return self.send_error_resp(400, "Invalid Input Data", str(ex)) except PartNotAccessible as ex: - return send_error_resp(405, "Data part not updatable", - "Requested part of data cannot be updated") + return self.send_error_resp(405, "Data part not updatable", + "Requested part of data cannot be updated") + + return self.send_json(data) class ProjectSelectionHandler(ProjectRecordHandler): @@ -356,23 +411,24 @@ def do_GET(self, path, ashead=False): params = parse_qs(qstr) perms = params.get('perm') if not perms: - perms = [ dbio.ACLs.READWRITE ] + perms = dbio.ACLs.OWN # sort the results by the best permission type permitted selected = OrderedDict() for rec in self._dbcli.select_records(perms): - if rec.owner == _dbcli.user_id: - rec['maxperm'] = "owner" + maxperm = '' + if rec.owner == self._dbcli.user_id: + maxperm = "owner" elif rec.authorized(dbio.ACLs.ADMIN): - rec['maxperm'] = dbio.ACLs.ADMIN + maxperm = dbio.ACLs.ADMIN elif rec.authorized(dbio.ACLs.WRITE): - rec['maxperm'] = dbio.ACLs.WRITE + maxperm = dbio.ACLs.WRITE else: - rec['maxperm'] = dbio.ACLs.READ + maxperm = dbio.ACLs.READ - if rec['perm'] not in selected: - selected[rec['perm']] = [] - selected[rec['perm']].append(rec) + if maxperm not in selected: + selected[maxperm] = [] + selected[maxperm].append(rec) # order the matched records based on best permissions out = [] @@ -380,7 +436,7 @@ def do_GET(self, path, ashead=False): for rec in selected.get(perm, []): out.append(rec.to_dict()) - return send_json(out, ashead=ashead) + return self.send_json(out, ashead=ashead) def do_POST(self, path): """ @@ -392,16 +448,16 @@ def do_POST(self, path): return self.send_fatal_error(ex) if not newdata['name']: - return send_error_resp(400, "Bad POST input", "No mneumonic name provided") + return self.send_error_resp(400, "Bad POST input", "No mneumonic name provided") try: prec = self.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.AlreadyExists as ex: - return send_error_resp(400, "Name already in use", str(ex)) + return self.send_error_resp(400, "Name already in use", str(ex)) - return send_json(prec.to_dict()) + return self.send_json(prec.to_dict()) class ProjectACLsHandler(ProjectRecordHandler): @@ -410,7 +466,7 @@ class ProjectACLsHandler(ProjectRecordHandler): """ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, - who: PubAgent, id: str, datapath: str, config: dict=None, log: Logger=None): + who: PubAgent, id: str, datapath: str="", config: dict=None, log: Logger=None): """ Initialize this data request handler with the request particulars. This constructor is called by the webs service SubApp in charge of the project record interface. @@ -422,7 +478,8 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Callable start_resp: the WSGI start-response function used to send the response :param PubAgent who: the authenticated user making the request. :param str id: the ID of the project record being requested - :param str datapath: the subpath pointing to a particular piece of the project record's data; + :param str permpath: the subpath pointing to a particular permission ACL; it can either be + simply a permission name, PERM (e.g. "read"), or a p this will be a '/'-delimited identifier pointing to an object property within the data object. This will be an empty string if the full data object is requested. @@ -432,7 +489,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - super(ProjectDataHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, + super(ProjectACLsHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, config, log) self._id = id if not id: @@ -444,10 +501,11 @@ def do_GET(self, path, ashead=False): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", + self._id, ashead=ashead) recd = prec.to_dict() if not path: @@ -492,15 +550,15 @@ def do_POST(self, path): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) if path in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: - pres.acls.grant_perm_to(path, identity) - pres.save() - return send_json(prec.to_dict().get('acls', {})) + prec.acls.grant_perm_to(path, identity) + prec.save() + return self.send_json(prec.to_dict().get('acls', {}).get(path,[])) return self.send_error_resp(405, "POST not allowed on this permission type", "Updating specified permission is not allowed") @@ -510,19 +568,19 @@ def do_PUT(self, path): replace the list of identities in a particular ACL. This handles PUT ID/acls/PERM; `path` should be set to PERM. Note that previously set identities are removed. """ - try: - identities = self.get_json_body() - except self.FatalError as ex: - return self.send_fatal_error(ex) - # make sure a permission type, and only a permission type, is specified path = path.strip('/') if not path or '/' in path: return self.send_error_resp(405, "PUT not allowed", "Unable set ACL membership") + try: + identities = self.get_json_body() + except self.FatalError as ex: + return self.send_fatal_error(ex) + if isinstance(identities, str): identities = [identities] - if not isinstance(identity, list): + if not isinstance(identities, list): return self.send_error_resp(400, "Wrong input data type" "Input data is not a string providing a user/group list") @@ -531,19 +589,19 @@ def do_PUT(self, path): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) if path in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: try: - pres.acls.revoke_perm_for_alll(path) - pres.acls.grant_perm_to(path, *identities) - pres.save() - return send_json(prec.to_dict().get('acls', {})) + prec.acls.revoke_perm_from_all(path) + prec.acls.grant_perm_to(path, *identities) + prec.save() + return self.send_json(prec.to_dict().get('acls', {}).get(path,[])) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() return self.send_error_resp(405, "PUT not allowed on this permission type", "Updating specified permission is not allowed") @@ -568,7 +626,7 @@ def do_PATCH(self, path): if isinstance(identities, str): identities = [identities] - if not isinstance(identity, list): + if not isinstance(identities, list): return self.send_error_resp(400, "Wrong input data type" "Input data is not a list of user/group identities") @@ -577,18 +635,18 @@ def do_PATCH(self, path): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) - if path in in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + if path in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: try: - pres.acls.grant_perm_to(path, *identities) - pres.save() - return send_json(prec.to_dict().get('acls', {})) + prec.acls.grant_perm_to(path, *identities) + prec.save() + return self.send_json(prec.to_dict().get('acls', {}).get(path, [])) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() return self.send_error_resp(405, "PATCH not allowed on this permission type", "Updating specified permission is not allowed") @@ -613,19 +671,19 @@ def do_DELETE(self, path): try: prec = self._pbrkr.get_record(self._id) except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() except dbio.ObjectNotFound as ex: - return send_error_resp(404, "ID not found", - "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) - if path in in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: + if parts[0] in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: # remove the identity from the ACL try: - pres.acls.revoke_perm_from(parts[0], parts[1]) - pres.save() - return send_ok() + prec.acls.revoke_perm_from(parts[0], parts[1]) + prec.save() + return self.send_ok() except dbio.NotAuthorized as ex: - return send_unauthorized() + return self.send_unauthorized() return self.send_error_resp(405, "DELETE not allowed on this permission type", "Updating specified permission is not allowed") diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py new file mode 100644 index 0000000..edb64a1 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -0,0 +1,543 @@ +import os, json, pdb, logging, tempfile +from collections import OrderedDict +from io import StringIO +import unittest as test + +from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio.wsgi import project as prj +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_broker.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestMIDASProjectApp(test.TestCase): + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.cfg = { + "superusers": [ "rlp" ], + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + }, + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + self.dbfact = inmem.InMemoryDBClientFactory(self.cfg, { "nextnum": { "mdm1": 2 }}) + self.app = prj.MIDASProjectApp(base.DMP_PROJECTS, rootlog.getChild("dmpapi"), self.dbfact, self.cfg) + self.resp = [] + self.rootpath = "/midas/dmp/" + + def create_record(self, name="goob"): + return self.dbfact.create_client(base.DMP_PROJECTS, nistr.actor).create_record(name, "mdm1") + + def sudb(self): + return self.dbfact.create_client(base.DMP_PROJECTS, "rlp") + + def test_create_handler_name(self): + path = "mdm1:0001/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectNameHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0001") + + def test_get_name(self): + path = "mdm1:0003/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record("goob") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "goob") + + self.resp = [] + path = "mdm1:0001/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + def test_put_name(self): + path = "mdm1:0003/name" + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("gary")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record("goob") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "gary") + + self.resp = [] + path = "mdm1:0003/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + resp = self.body2dict(body) + self.assertEqual(resp, "gary") + + self.resp = [] + path = "mdm1:0001/name" + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("hank")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + def test_name_methnotallowed(self): + path = "mdm1:0003/name" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("gary")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record("goob") + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + self.resp = [] + path = "mdm1:0001/name" + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + def test_create_handler_full(self): + path = "mdm1:0001/" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0001") + + path = "mdm1:0001" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0001") + + def test_get_full(self): + path = "mdm1:0003/" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record() + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "goob") + self.assertEqual(resp['id'], "mdm1:0003") + + self.resp = [] + path = "mdm1:0001" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + def test_full_methnotallowed(self): + path = "mdm1:0003" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("gary")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record("goob") + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("gary")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + self.resp = [] + path = "mdm1:0001" + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + def test_search(self): + path = "" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), []) + + prec = self.create_record("bob") + self.assertEqual(prec.name, "bob") + prec = self.create_record("carole") + self.assertEqual(prec.name, "carole") + prec = self.sudb().get_record_by_name("carole", prec.owner) + self.assertEqual(prec.name, "carole") + self.assertEqual(prec.id, "mdm1:0004") + self.assertTrue(prec.authorized(prec.acls.WRITE, "nstr1")) + prec.acls.revoke_perm_from(prec.acls.WRITE, "nstr1") + prec.save() + self.assertTrue(not prec.authorized(prec.acls.WRITE, "nstr1")) + + self.resp = [] + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + matches = self.body2dict(body) + self.assertEqual(len(matches), 2) + names = [m['name'] for m in matches] + self.assertIn("bob", names) + self.assertIn("carole", names) + + self.resp = [] + req['QUERY_STRING'] = "perm=write" + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + matches = self.body2dict(body) + self.assertEqual(len(matches), 1) + self.assertEqual(matches[0]['name'], "bob") + + self.resp = [] + req['QUERY_STRING'] = "perm=write&perm=read" + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + matches = self.body2dict(body) + self.assertEqual(len(matches), 2) + names = [m['name'] for m in matches] + self.assertIn("bob", names) + self.assertIn("carole", names) + + def test_getput_data(self): + path = "mdm1:0003/data" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0003") + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + self.resp = [] + prec = self.create_record() + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), {}) + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"color": "red", "pos": {"vec": [1,2,3]}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), {"color": "red", "pos": {"vec": [1,2,3]}}) + + self.resp = [] + path += "/color" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), "red") + + self.resp = [] + path = "mdm1:0003/data/pos/vec" + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps([4,5,6])) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), [4,5,6]) + + self.resp = [] + path = "mdm1:0003" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + prec = self.body2dict(body) + self.assertEqual(prec['id'], "mdm1:0003") + self.assertEqual(prec['name'], "goob") + self.assertEqual(prec['data'], {"color": "red", "pos": {"vec": [4,5,6]}}) + self.assertEqual(prec['meta'], {}) + + def test_create_handler_datapart(self): + path = "pdr0:0012/data/authors" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "authors") + self.assertEqual(hdlr._id, "pdr0:0012") + + def test_create_handler_acls(self): + path = "mdm1:0003/acls" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0003") + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + self.resp = [] + prec = self.create_record() + acls = dict([(p, ["nstr1"]) for p in "read write admin delete".split()]) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), acls) + + def test_acls_methnotallowed(self): + path = "mdm1:0003/acls" + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record() + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + self.resp = [] + req['REQUEST_METHOD'] = 'DELETE' + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + + def test_getupd_aclsperm(self): + path = "mdm1:0003/acls/read" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "read") + self.assertEqual(hdlr._id, "mdm1:0003") + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + self.resp = [] + prec = self.create_record() + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'POST' + req['wsgi.input'] = StringIO(json.dumps("gary")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1", "gary"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'GET' + del req['wsgi.input'] + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1", "gary"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'PATCH' + req['wsgi.input'] = StringIO(json.dumps(["gary", "hank"])) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1", "gary", "hank"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'GET' + del req['wsgi.input'] + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1", "gary", "hank"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'PUT' + req['wsgi.input'] = StringIO(json.dumps(["hank", "nstr1"])) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["hank", "nstr1"]) + + self.resp = [] + req['REQUEST_METHOD'] = 'GET' + del req['wsgi.input'] + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["hank", "nstr1"]) + + def test_getdel_aclspermmem(self): + path = "mdm1:0003/acls/write/hank" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "write/hank") + self.assertEqual(hdlr._id, "mdm1:0003") + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + self.resp = [] + prec = self.create_record() + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), False) + + prec.acls.grant_perm_to("write", "hank") + prec.save() + + self.resp = [] + req['REQUEST_METHOD'] = 'GET' + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), True) + + self.resp = [] + req['REQUEST_METHOD'] = 'DELETE' + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + + self.resp = [] + req['REQUEST_METHOD'] = 'GET' + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), False) + + self.resp = [] + path = "mdm1:0003/acls/write" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + self.assertEqual(self.body2dict(body), ["nstr1"]) + + + +if __name__ == '__main__': + test.main() + + From d0ccab8c12d14b93ade4e9f65d673c0b467b30de Mon Sep 17 00:00:00 2001 From: Ray Plante Date: Fri, 30 Sep 2022 16:51:13 -0400 Subject: [PATCH 006/123] dbio.wsgi: add/debug create test to project --- python/nistoar/midas/dbio/wsgi/project.py | 8 ++--- .../nistoar/midas/dbio/wsgi/test_project.py | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 5134c98..f01878b 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -51,7 +51,7 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge # set up dbio client and the request handler that will mediate with it dbcli = self._dbfact.create_client(self._name, who.actor) - pbroker = self._prjbrkr_cls(dbcli, self.cfg, env, self.log) + pbroker = self._prjbrkr_cls(dbcli, self.cfg, who, env, self.log) # now parse the requested path; we have different handlers for different types of paths path = path.strip('/') @@ -447,17 +447,17 @@ def do_POST(self, path): except self.FatalError as ex: return self.send_fatal_error(ex) - if not newdata['name']: + if not newdata.get('name'): return self.send_error_resp(400, "Bad POST input", "No mneumonic name provided") try: - prec = self.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) + prec = self._pbrkr.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.AlreadyExists as ex: return self.send_error_resp(400, "Name already in use", str(ex)) - return self.send_json(prec.to_dict()) + return self.send_json(prec.to_dict(), "Project Created", 201) class ProjectACLsHandler(ProjectRecordHandler): diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index edb64a1..9e9c962 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -240,6 +240,36 @@ def test_full_methnotallowed(self): body = hdlr.handle() self.assertIn("405 ", self.resp[0]) + def test_create(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("400 ", self.resp[0]) + + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "big") + self.assertEqual(resp['owner'], "nstr1") + self.assertEqual(resp['id'], "mdm1:0003") + self.assertEqual(resp['data'], {"color": "red"}) + self.assertEqual(resp['meta'], {}) + + def test_search(self): path = "" req = { From b4f2db9f99add8908326e593a9aaded666973d23 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 21 Oct 2022 06:52:29 -0400 Subject: [PATCH 007/123] dbio.wsgi: add test for getting arbitrary info (like meta) --- python/nistoar/midas/dbio/wsgi/project.py | 4 +- .../nistoar/midas/dbio/wsgi/test_project.py | 70 ++++++++++++++++++- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index f01878b..9905b39 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -81,7 +81,7 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge # the fallback handler will return some arbitrary part of the record if len(idattrpart) > 2: idattrpart[1] = "/".join(idattrpart[1:]) - return ProjectInfoHandler(self, env, start_resp, who, idattrpart[0], idattrpart[1]) + return ProjectInfoHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[1]) class ProjectRecordHandler(DBIOHandler): """ @@ -159,7 +159,7 @@ def do_GET(self, path, ashead=False): class ProjectInfoHandler(ProjectRecordHandler): """ - handle retrieval of simple parts of a project record + handle retrieval of simple parts of a project record. Only GET requests are allowed via this handler. """ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index 9e9c962..88e8cea 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -62,8 +62,13 @@ def setUp(self): self.resp = [] self.rootpath = "/midas/dmp/" - def create_record(self, name="goob"): - return self.dbfact.create_client(base.DMP_PROJECTS, nistr.actor).create_record(name, "mdm1") + def create_record(self, name="goob", meta=None): + cli = self.dbfact.create_client(base.DMP_PROJECTS, nistr.actor) + out = cli.create_record(name, "mdm1") + if meta: + out.meta = meta + out.save() + return out def sudb(self): return self.dbfact.create_client(base.DMP_PROJECTS, "rlp") @@ -565,6 +570,67 @@ def test_getdel_aclspermmem(self): self.assertIn("200 ", self.resp[0]) self.assertEqual(self.body2dict(body), ["nstr1"]) + def test_get_info(self): + path = "mdm1:0003/id" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectInfoHandler)) + prec = self.create_record("goob", {"foo": "bar"}) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "mdm1:0003") + + self.resp = [] + path = "mdm1:0001/id" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + self.resp = [] + path = "mdm1:0003/meta" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectInfoHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, {"foo": "bar"}) + + self.resp = [] + path = "mdm1:0003/meta/foo" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectInfoHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "bar") + + self.resp = [] + path = "mdm1:0003/meta/bob" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectInfoHandler)) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + if __name__ == '__main__': From 2ecdaccf9addf7fdebcdaf87674d1e5dc400bdc4 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 7 Nov 2022 15:38:47 -0500 Subject: [PATCH 008/123] docs/pdp-openapi.yml: quick syntax fix --- docs/pdp-openapi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/pdp-openapi.yml b/docs/pdp-openapi.yml index d56c6bb..f9cce55 100644 --- a/docs/pdp-openapi.yml +++ b/docs/pdp-openapi.yml @@ -75,8 +75,8 @@ paths: submitted metadata will be filtered, expanded according to policy. The modified metadata that actually gets saved will be returned. parameters: - "$ref": "#/components/parameters/action" - summary: the additional action to take after creating the SIP + - "$ref": "#/components/parameters/action" + summary: the additional action to take after creating the SIP requestBody: "$ref": "#/components/requestBodies/SIPResource" summary: the initial description metadata for the SIP to create From 09c9c29618f601a941741d688bfa5f9b48604068 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 17 Nov 2022 05:52:34 -0500 Subject: [PATCH 009/123] dbio.wsgi: add wsgiapp.py, the container app for MIDAS services (testing still needed) --- python/nistoar/midas/__init__.py | 20 +- python/nistoar/midas/dbio/wsgi/__init__.py | 87 ++++ python/nistoar/midas/dbio/wsgi/project.py | 4 +- python/nistoar/midas/dbio/wsgi/wsgiapp.py | 394 ++++++++++++++++++ .../nistoar/midas/dbio/wsgi/test_project.py | 4 +- 5 files changed, 504 insertions(+), 5 deletions(-) create mode 100644 python/nistoar/midas/dbio/wsgi/__init__.py create mode 100644 python/nistoar/midas/dbio/wsgi/wsgiapp.py diff --git a/python/nistoar/midas/__init__.py b/python/nistoar/midas/__init__.py index b1053ad..fb2fe5e 100644 --- a/python/nistoar/midas/__init__.py +++ b/python/nistoar/midas/__init__.py @@ -1,8 +1,26 @@ """ midas: A module providing infrastructure support for MIDAS applications. """ -from nistoar.base import OARException +from nistoar.base import OARException, SystemInfoMixin, config +try: + from .version import __version__ +except ImportError: + __version__ = "(unset)" + +_MIDASSYSNAME = "MIDAS" +_MIDASSYSABBREV = "MIDAS" + +class MIDASSystem(SystemInfoMixin): + """ + A SystemInfoMixin representing the overall PDR system. + """ + def __init__(self, subsysname="", subsysabbrev=""): + super(MIDASSystem, self).__init__(_MIDASSYSNAME, _MIDASSYSABBREV, + subsysname, subsysabbrev, __version__) + +system = MIDASSystem() + class MIDASException(OARException): """ An general base class for exceptions that occur while using MIDAS infrastructure or applications diff --git a/python/nistoar/midas/dbio/wsgi/__init__.py b/python/nistoar/midas/dbio/wsgi/__init__.py new file mode 100644 index 0000000..42a2183 --- /dev/null +++ b/python/nistoar/midas/dbio/wsgi/__init__.py @@ -0,0 +1,87 @@ +""" +The WSGI interface to the DBIO layer. + +The :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.MIDASApp` WSGI application provides DBIO collections +and data and serves as the API for the suite of available MIDAS services. In particular, when so +configured, this application can provide the following endpoints: + + * ``/dmp/mdm1/`` -- the Data Management Plan (DMP) Authoring API, for creating and editing DMPs + (according to the "mdm1" convention) + * ``/dap/mds3/`` -- the Digital Assets Publication (DAP) Authoring API, for drafting data and + software publications (according to the mds3 convention) to be submitted to the Public Data + Repository (PDR) + * ``/groups/`` -- the API for creating and managing access permission groups for collaborative + authoring. + +These endpoint send and receive data stored in the backend database through the common +:py:module:` DBIO layer `. + +The app configuration determines which endpoints that are actually available. The authoring API +endpoints follow a common pattern: + + /_service_/_convention_/ + +where _service_ is MIDAS service name (like "dmp" or "dap") and _convention_ is name that represents +the version of the service interface. Usually, there is one convention available called "def", which +serves as a synonym for the convention that is considered the default convention. Through the +configuration, it is possible, then, to create additional authoring services or conventions of services. + +The configuration that is expected by ``MIDASApp`` is a (JSON) object with the following properties: + +``base_endpoint`` + (str) _optional_. the URL resource path where the base of the base of the service suite is accessed. + The default value is "/midas/". An empty string is equivalent to "/", the root path. +``strict`` + (bool) _optional_. if False and if a service type (see below) given in this configuration is not + recognized, a ``ConfiguraitonException`` will be raised. +``about`` + (object) _optional_. an object of data describing this suite of services that should be returned + when the base path is requested. (See the :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class + for an example.) There are no requirements on the properties in this object except that it should + _not_ include "services" or "versions". +``services`` + (object) _required_. an object in which each property is a service name (as referred to above in + the API endpoint pattern--e.g., "dmp" or "dap"), and its value is the configuration for that service. + +Most of the properties in a service configuration object will be treated as default configuration +parameters for configuring a particular version, or _convention_, of the service. Convention-level +configuration will be merged with these properties (overriding the defaults) to produce the configuration +that is passed to the service SubApp that handles the service. The properties supported are +service-specific. In addition to the service-specific properties, two special-purpose properties are +supported: + +``about`` + (object) _optional_. an object of data describing the service catagory that should be returned + when the service name endpoint is requested. (See the + :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class for an example.) There are no requirements + on the properties in this object except that it should _not_ include "services" or "versions". +``conventions`` + (object) _optional_. an object in which each property is a convention name supported for the service + (as referred to above in the API endpoint pattern--e.g., "mdm1" for the DMP service), and its value is + the configuration for that convention (i.e. version) of the service. Any properties given here + override properties of the same name given at the service level, as discussed above. The properties + can be service- or convention-specific, apart from the required property, ``type`` (defined below). +``default_convention`` + (str) _optional_. the name of the convention (one of the names specified as a property of the + ``conventions`` field described above) that should be considered the default convention. If a client + requests the special convention name "def", the request will be routed to the version of the service + with that name. + +There are two common properties that can appear in either the service or convention level (or both, where +the convention level takes precedence): ``project_name`` and ``type``. These optional properties are +defined as follows: + +``project_name`` + (str) _optional_. a name indicating the type of DBIO project the service manages. This name + corresponds to a DBIO project collection name. It defaults to the value of the name associated with + the configuration under the ``services`` property (described above). +``type`` + (str) _optional_. a name that serves as an alias for the Python ``SubApp`` class that implements + the service convention. The default value is the service and convention names combined as + "_service_/_convention_". + +""" +from .base import SubApp, Handler, DBIOHandler +from .wsgiapp import MIDASApp + +app = MIDASApp diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 9905b39..8e22b64 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -30,9 +30,9 @@ class MIDASProjectApp(SubApp): """ def_project_broker_class = ProjectRecordBroker - def __init__(self, servicetype, log: Logger, dbcli_factory: DBClientFactory, + def __init__(self, projname, log: Logger, dbcli_factory: DBClientFactory, config: dict={}, project_broker_cls=None): - super(MIDASProjectApp, self).__init__(servicetype, log, config) + super(MIDASProjectApp, self).__init__(projname, log, config) ## create dbio client from config self._prjbrkr_cls = self.cfg.get('project_handler_class', self.def_project_broker_class) diff --git a/python/nistoar/midas/dbio/wsgi/wsgiapp.py b/python/nistoar/midas/dbio/wsgi/wsgiapp.py new file mode 100644 index 0000000..bedac6a --- /dev/null +++ b/python/nistoar/midas/dbio/wsgi/wsgiapp.py @@ -0,0 +1,394 @@ +""" +A module that provides the top-level WSGI App providing access to the MIDAS services via the DBIO layer. + +The :ref:class:`MIDASApp` class is an WSGI application class that provides the suite of MIDAS services. +Which services are actually made available depends on the configuration provided at construction time. +See the :py:module:`nistoar.midas.dbio.wsgi` module documentation for a description of the +configuraiton schema. + +In addition to providing the :ref:class:`MIDASApp` class, this module provides a mechanism for plugging +addition _project_ services, particularly new conventions of services. The class constructor takes +an optional dictionary parameter that provides in its values the +:ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` class that implements a particular DBIO project +service. The keys labels that correspond to the ``type`` parameter in the +:py:module:`configuration ` and which, by default, have the form +_service_/_convention_ (e.g. ``dmp/mdm1``). If this dictionary is not provided to the constructur, an +default defined in this module, ``_MIDASSubApps`` is used. Thus, the normal way to add a new service +implementation to the suite is to add it to the internal ``_MIDASSubApps`` dictionary. + +This module also provides two other classes that are used internally to initialize a :ref:class:`MIDASApp` +instance: :ref:class:`SubAppFactory` and :ref:class:`About`. :ref:class:`SubAppFactory` is used within +:ref:class:`MIDASApp` to instantiate all of the ``SubApp`` classes given in the above mentioned +dictionary; however, it also provides functions to instantiate a ``SubApp`` individually and to extract +the configuration parameters needed to do that instantiation. :ref:class:`About` is a ``SubApp`` that +returns through the web interface information about the MIDAS services. :ref:class:`SubAppFactory` +injects instances of this class into :ref:class:`MIDASApp` to respond to GET requests on the MIDAS +service's parent resources. +""" +import os, sys, logging, json, re +from logging import Logger +from wsgiref.headers import Headers +from collections.abc import Mapping, MutableMapping, Callable +from copy import deepcopy + +from ... import system +from . import project as prj, SubApp, Handler +from ..base import DBClientFactory +from ..inmem import InMemoryDBClientFactory +from nistoar.pdr.publish.prov import PubAgent +from nistoar.base.config import ConfigurationException, merge_config + +log = logging.getLogger(system.system_abbrev) \ + .getChild(system.subsystem_abbrev) \ + .getChild('wsgi') + +DEF_BASE_PATH = "/midas/" + +class SubAppFactory: + """ + a factory for creating MIDAS WSGI SubApps based on a configuration. Individual SubApps can be + instantiated on demand or all at once (for :py:class:`MIDASApp`). + """ + + def __init__(self, config: Mapping, subapps: Mapping): + """ + :param Mapping subapps: a mapping of type names (referred to in the configuration) to + a SubApp class (factory function that produces a SubApp) that + takes four arguments: an application name, a ``Logger`` instance, + a :py:class:`~nistoar.midas.dbio.DBIOClientFactory` instance, + and the complete convention-specific configuration appropriate + for the SubApp type referred to in by the type name. (See also + :py:method:`register_subapp`.) + :param Mapping config: the configuration for the full collection of MIDAS sub-apps that + be included in the output. + """ + self.cfg = config + self.subapps = subapps + + def register_subapp(self, typename: str, factory: Callable): + """ + Make a SubApp class available through this factory class via a given type name + :param str typename: the type name by which the factory function can accessed + :param str cls_or_fact: a SubApp class or other factory callable that produces a SubApp + that accepts four arguments: an application name, a ``Logger`` instance, + a :py:class:`~nistoar.midas.dbio.DBIOClientFactory` instance, + and the complete convention-specific configuration appropriate + for the SubApp type referred to in by the type name. (See also + :py:method:`register_subapp`. + """ + self.subapps[typename] = factory + + + def config_for_convention(self, appname: str, convention: str, typename: str = None) -> MutableMapping: + """ + Find the convention-specific subapp configuration, merge in its app-level defaults, and + return it as a complete convention-specific configuration, or None if the convention is not + configured. + :param str appname: the name of the MIDAS app to be configured. (Examples are "dmp", + "pdr") + :param str convention: the name of the API convention that is desired in the configuration. A + special name "def" refers to the convention that is configured as the + default for the app; an empty string and None behaves in the same way. + :param str typename: a app type name to assign to this configuration, overriding the name + that might be in configuration by default. This name should be used to + select the SubApp factory function in the set of SubApp provided at + construction time. + """ + if appname not in self.cfg: + return None + if not convention: + convention = "def" + + appcfg = deepcopy(self.cfg[appname]) + if "conventions" in appcfg: + cnvcfg = deepcopy(appcfg.get("conventions", {}).get(convention)) + if not cnvcfg and convention == "def" and appcfg.get("default_convention"): + convention = appcfg["default_convention"] + cnvcfg = appcfg.get("conventions", {}).get(convention) + + del appcfg["conventions"] + if "about" in appcfg: + del appcfg["about"] + if cnvcfg: + appcfg = merge_config(cnvcfg, appcfg) + + if type: + appcfg['type'] = type + elif not appcfg.get('type'): + appcfg['type'] = "%s/%s" % (appname, convention) + appcfg.setdefault("project_name", appname) + + return appcfg + + def create_subapp(self, log: Logger, dbio_client_factory: DBClientFactory, + appconfig: Mapping, typename: str=None) -> SubApp: + """ + instantiate a SubApp as specified by the given configuration + :param Logger log: the Logger instance to inject into the SubApp + :param Mapping appconfig: the convention-specific SubApp configuration to initialize the + SubApp with + :param str typename: the name to use to look-up the SubApp's factory function. If not + provided, the value of the configuration's ``type`` property will be + used instead. + :raises ConfigurationException: if the type name is not provided and is not otherwise set in + the configuration + :raises KeyError: if the type name is not recognized as registered SubApp + """ + if not typename: + typename = appconfig.get('type') + if typename is None: + raise ConfigurationException("Missing configuration parameter: type") + factory = self.subapps[typename] + + return factory(appconfig.get('name', typename), log, dbio_client_factory, appconfig) + + def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> MutableMapping: + """ + instantiate all of the MIDAS subapps found configured in the configuration provided at + construction time, returning them as a map of web resource paths to SubApp instances. + The path for a SubApp will be of the form "[appname]/[convention]". Also included will + be About SubApps that provide information and proof-of-life for parent paths. + """ + out = OrderedDict() + about = About(self.cfg.get("about")) + + for appname, appcfg in self.cfg.items(): + if not isinstance(appcfg, Mapping): + # wrong type; skip + continue + + about.add_service(appname, appcfg.get('about', {})) + aboutapp = About(appcfg.get('about', {})) + + if "conventions" in appcfg: + if not isinstance(appcfg["conventions"], Mapping): + raise ConfigurationException("Parameter 'conventions' not a dictionary: "+ + type(appcfg["conventions"])) + + for conv in self appcfg.get("conventions", {}): + cnvcfg = self.config_for_convention(appname, conv) + if isinstance(cnvcfg, Mapping): + + # Add an entry into the About SubApp + about.add_version(conv, cnvcfg.get("about", {})) + + path = "%s/%s" % (appname, conv) + try: + out[path] = self.create_subapp(log, dbio_client_factory, cnvcfg) + except KeyError as ex: + if self.cfg.get("strict", False): + raise ConfigurationException("MIDAS app type not recognized: "+str(ex)) + else: + log.warn("Skipping unrecognized MIDAS app type: "+str(ex)) + + # if so configured, set as default + if appcfg.get("default_convention") == conv: + out["%s/def" % appname] = out[path] + elif not appcfg.get("default_convention") and len(appcfg["conventions"]) == 1: + out["%s/def" % appname] = out[path] + + else: + # No conventions configured for this app name; try to create an app from the defaults + path = appcfg.get("path") + if path is None: + path = "%s/def" % appname + try: + out[path] = self.create_subapp(log, dbio_client_factory, appcfg) + except KeyError as ex: + if self.cfg.get("strict", False): + raise ConfigurationException("MIDAS app type not recognized: "+str(ex)) + else: + log.warn("Skipping unrecognized MIDAS app type: "+str(ex)) + + out[appname] = aboutapp + + out[""] = about + + return out + + +class About(SubApp): + """ + a SubApp intended to provide information about the endpoints available as part of the overall + MIDAS WSGI App. + + This SubApp only supports a GET response, to which it responds with a JSON document containing + data provided to this SubApp at construction time and subsequently added to via ``add_*`` methods. + This document might look something like this: + + .. code-block:: + :caption: An example About response document describing the MIDAS API suite + + { + "message": "Services are available", + "title": "MIDAS Authoring Suite", + "describedBy": "https://midas3.nist.gov/midas/apidocs" + "services": { + "dmp": { + "title": "Data Management Plan Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dmp", + "href": "http://midas3.nist.gov/midas/dmp" + }, + "dap": { + "title": "Digital Asset Publication Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dap", + "href": "http://midas3.nist.gov/midas/dmp/mdm1" + } + } + } + + """ + + def __init__(self, base_data: Mapping=None): + """ + initialize the SubApp. Some default properties may be added to base_data. + :param Mapping base_data: the initial data the should appear in the GET response JSON object + """ + if not base_data: + base_data = OrderedDict() + self.data = self._init_data(base_data) + + def _init_data(self, data: Mapping): + data = deepcopy(data) + if "message" is not in data: + data["message"] = "Service is available" + return data + + def add_component(self, compcat, compname, data): + """ + append data for a named component of the about information to return. Within GET responses, + components are listed by its category by its name (e.g. "services") which is an object; each + key in that object is the component's name. This method provides the implementation for + :py:method:`add_service` and :py:method:`add_version`. + + :param str compcat: the component category name to add the data to (e.g. "services"); if a + property does not exist in the base data with this name, it will be added. + :param str compname: the name of the component; the data will be added within the ``compcls`` + object property as the value of a subproperty with this name. If this + subproperty already exists, it will be overridden. + :param Mapping data: the data to add for the component + :raises ValueError: if the ``compcls`` property already exists in the base data but is not an + object. + """ + if compcat not in self.data: + self.data[compcat] = OrderedDict() + if not isinstance(self.data[comcat], MutableMapping): + raise ValueError("Category property is not an object: %s: %s" % (comcat, type(self.data[comcat]))) + + self.data[compcat][compname] = data + + def add_service(self, name, data): + """ + add a named description to the ``services`` property + """ + self.add_component("services", name, data) + + def add_version(self, name, data): + """ + add a named description to the ``versions`` property + """ + self.add_component("versions", name, data) + + class _Handler(DBIOHandler): + + def __init__(self, parentapp, path: str, wsgienv: Mapping, start_resp: Callable, who=None, + config: Mapping={}, log: Logger=None): + Handler.__init__(self, path, wsgienv, start_resp, who, config, log) + self.app = parentapp + + def do_GET(self, path, ashead=False): + path = path.strip('/') + if path: + # only the root path is supported + return self.send_error(404, "Not found") + + return self.send_json(self.app.data, ashead=ashead) + + + + +_MIDASSubApps = { + "dmp/mdm1": prj.MIDASProjectApp +} + +class MIDASApp: + """ + A complete WSGI App implementing the suite of MIDAS APIs. The MIDAS applications that are included + are driven by the configuration. The Groups application (used to define access groups) is always + included. + """ + + def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, + base_ep: str=None, subapp_factory_funcs=None): + self.cfg = config + if not self.cfg.get("services"): + raise ConfigurationException("No MIDAS apps configured (missing 'service' parameter)") + + if base_ep is None: + base_ep = self.cfg.get('base_endpoint', DEF_BASE_PATH) + self.base_ep = base_ep.strip('/').split('/') + + # Load MIDAS project servies based on what's in the configuration (i.e. if only the dmp app + # is configured, only that app will be available; others will return 404) + if not subapp_factory_funcs: + subapp_factory_funcs = _MIDASSubApps + + factory = SubAppFactory(self.cfg.get('services'), subapp_factory_funcs) + self.subapps = factory.create_suite() + + # Add the groups endpoint + # TODO + + def authenticate(self, env) -> PubAgent: + """ + determine and return the identity of the client. This checks both user credentials and, if + configured, the client application key. If client keys are configured and the client has not + provided a recognized key, an exception is thrown. Otherwise, if the request has not presented + authenticable credentials, the returned PubAgent will represent an anoymous user. + + :param Mapping env: the WSGI request environment + :return: a representation of the requesting user + :rtype: PubAgent + """ + # TODO: support JWT cookie for authentication + + # TODO: support optional client + + # anonymous user + return PubAgent("public", PubAgent.UNKN, "anonymous") + + def handle_request(self, env, start_resp): + path = env.get('PATH_INFO', '/').strip('/').split('/') + if path == ['']: + path = [] + + # determine who is making the request + who = self.authenticate(env) + + if self.base_ep: + if len(path) < len(self.base_ep) or path[:len(self.base_ep)] != self.base_ep: + # path does not match the required base endpoint path + return Handler(path, env, start_resp).send_error(404, "Not Found", ashead=ashead) + + # lop off the base endpoint path + path = path[len(self.base_ep):] + + # Determine which subapp should handle this request + subapp = None + if len(path) > 1: + sapath = '/'.join(path[:2]) + subapp = self.subapps.get(sapath) + if subapp: + path = path[2:] + if not subapp and len(path) > 0: + subapp = self.subapps.get(path[0]) + if subapp: + path = path[1:] + if not subapp: + # this will handle any other non-existing paths + subapp = self.subapps.get('') + + return subapp.handle_path_request(env, start_resp, path, who) + + + diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index 88e8cea..ad9476e 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -7,14 +7,14 @@ from nistoar.midas.dbio.wsgi import project as prj from nistoar.pdr.publish import prov -tmpdir = tempfile.TemporaryDirectory(prefix="_test_broker.") +tmpdir = tempfile.TemporaryDirectory(prefix="_test_project.") loghdlr = None rootlog = None def setUpModule(): global loghdlr global rootlog rootlog = logging.getLogger() - loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_project.log")) loghdlr.setLevel(logging.DEBUG) rootlog.addHandler(loghdlr) From 90224a783f80bdf07f884598890f275f5ca8cef5 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 18 Nov 2022 08:52:33 -0500 Subject: [PATCH 010/123] dbio.DBClientFactory: allow auth config to be provided to create_client() --- python/nistoar/midas/dbio/__init__.py | 7 ++-- python/nistoar/midas/dbio/base.py | 48 +++++++++++++++++++++++---- python/nistoar/midas/dbio/fsbased.py | 8 +++-- python/nistoar/midas/dbio/inmem.py | 7 ++-- python/nistoar/midas/dbio/mongo.py | 5 +-- 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index e2c26f2..c439d6d 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -22,10 +22,11 @@ from nistoar.midas import dbio # the factory will need a configuration (see CLIENT CONFIGURATION section) + storeconfig = { "mongodb//localhost:27017/MIDAS" } # storage-specific configuration config = { "default_shoulder": "mdst" } # connect to the DMP collection - client = dbio.MIDASDBClientFactory(config).create_client(dbio.DMP_PROJECTS, userid) + client = dbio.MIDASDBClientFactory(storeconfig).create_client(dbio.DMP_PROJECTS, config, userid) # create a new record: rec = client.create_record(user_specified_rec_name) @@ -133,13 +134,13 @@ ``default_shoulder`` the identifier prefix--i.e. the ID *shoulder*--that will be used to create the identifier for a new project record if one is not specified in the call to - :py:method:`~nistoar.midas.dbio.DBClient.create_client`. This is effectively a required + :py:method:`~nistoar.midas.dbio.DBClient.create_record`. This is effectively a required parameter; however, if not specified, ``allowed_project_shoulders`` must be set to create new project records. ``allowed_project_shoulders`` a list of shoulders that one can request when creating new project records via - :py:method:`~nistoar.midas.dbio.DBClient.create_client`. Note that the value the of + :py:method:`~nistoar.midas.dbio.DBClient.create_record`. Note that the value the of ``default_shoulder`` is implicitly added to this list; thus, if not specified, the default is the ``default_shoulder``. diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 28a02ce..f386169 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -3,7 +3,7 @@ This interface is based on the following model: - * Each service (drafting, DMPs, etc.) has its own collection that extends on a common base model + * Each service (DAP, DMPs, etc.) has its own collection that extends on a common base model * Each *record* in the collection represents a "project" that a user is working on via the service * A record can be expressed as a Python dictionary which can be exported into JSON @@ -634,6 +634,24 @@ def __str__(self): class DBClient(ABC): """ a client connected to the database for a particular service (e.g. drafting, DMPs, etc.) + + As this class is abstract, implementations provide support for specific storage backends. + All implementations support the following common set of configuration parameters: + + ``superusers`` + (List[str]) _optional_. a list of strings giving the identifiers of users that + should be considered superusers who will be afforded authorization for all operations + ``allowed_project_shoulders`` + (List[str]) _optional_. a list of strings representing the identifier prefixes--i.e. + the _shoulders_--that can be used to create new project identifiers. If not provided, + the only allowed shoulder will be that given by ``default_shoulder``. + ``default_shoulder`` + (str) _required_. the identifier prefix--i.e. the _shoulder_--that should be used + by default when not otherwise requested by the user when creating new project records. + ``allowed_group_shoulders`` + (List[str]) _optional_. a list of strings representing the identifier prefixes--i.e. + the _shoulders_--that can be used to create new group identifiers. If not provided, + the only allowed shoulder will be the default, ``grp0``. """ def __init__(self, config: Mapping, projcoll: str, nativeclient=None, foruser: str = ANONYMOUS): @@ -647,10 +665,16 @@ def __init__(self, config: Mapping, projcoll: str, nativeclient=None, foruser: s @property def user_id(self) -> str: + """ + the identifier of the user that this client is acting on behalf of + """ return self._who @property def user_groups(self) -> frozenset: + """ + the set of identifiers for groups that the user given by :py:property:`user_id` belongs to. + """ if not self._whogrps: self.recache_user_groups() return self._whogrps @@ -671,9 +695,8 @@ def create_record(self, name: str, shoulder: str=None, foruser: str = None) -> P :param str name: the mnumonic name (provided by the requesting user) to give to the record. :param str shoulder: the identifier shoulder prefix to create the new ID with. - (The implementation should ensure that the requested shoulder is - recognized and that the requesting user is authorized to request - the shoulder.) + (The implementation should ensure that the requested user is authorized + to request the shoulder.) :param str foruser: the ID of the user that should be registered as the owner. If not specified, the value of :py:property:`user_id` will be assumed. In this implementation, only a superuser can create a record for someone @@ -916,10 +939,18 @@ class DBClientFactory(ABC): """ def __init__(self, config): + """ + initialize the factory with its configuration. The configuration provided here serves as + the default parameters for the cient as these can be overridden by the configuration parameters + provided via :py:method:`create_client`. Generally, it is recommended that the parameters + the configure the backend storage be provided here, and that the non-storage parameters--namely, + the ones that control authorization--be provided via :py:method:`create_client` as these can + depend on the type of project being access (e.g. "dmp" vs. "dap"). + """ self._cfg = config @abstractmethod - def create_client(self, servicetype: str, foruser: str = ANONYMOUS): + def create_client(self, servicetype: str, config: Mapping={}, foruser: str = ANONYMOUS): """ create a client connected to the database and the contents related to the given service @@ -927,10 +958,15 @@ def create_client(self, servicetype: str, foruser: str = ANONYMOUS): :caption: Example # connect to the DMP collection - client = dbio.MIDASDBClienFactory(configdata).create_client(dbio.DMP_PROJECTS, userid) + client = dbio.MIDASDBClienFactory(configdata).create_client(dbio.DMP_PROJECTS, config, userid) :param str servicetype: the service data desired. The value should be one of ``DRAFT_PROJECTS`` or ``DMP_PROJECTS`` + :param Mapping config: the configuration to pass into the client. This will be merged into and + override the configuration provided to the factory at construction time. + Typically, the configuration provided here are the common parameters that + are independent of the type of backend storage. + :param str foruser: The identifier of the user that DBIO requests will be made on behalf of. """ raise NotImplementedError() diff --git a/python/nistoar/midas/dbio/fsbased.py b/python/nistoar/midas/dbio/fsbased.py index 0b31525..260a9ac 100644 --- a/python/nistoar/midas/dbio/fsbased.py +++ b/python/nistoar/midas/dbio/fsbased.py @@ -3,12 +3,13 @@ """ import os, json from pathlib import Path +from copy import deepcopy from collections.abc import Mapping, MutableMapping, Set from typing import Iterator, List from . import base from nistoar.pdr.utils import read_json, write_json -from nistoar.base.config import ConfigurationException +from nistoar.base.config import ConfigurationException, merge_config class FSBasedDBClient(base.DBClient): """ @@ -167,6 +168,7 @@ def __init__(self, config: Mapping, dbroot: str = None): raise base.DBIOException("FSBasedDBClientFactory: %s: does not exist as a directory" % dbroot) self._dbroot = dbroot - def create_client(self, servicetype: str, foruser: str = base.ANONYMOUS): - return FSBasedDBClient(self._dbroot, self._cfg, servicetype, foruser) + def create_client(self, servicetype: str, config: Mapping = {}, foruser: str = base.ANONYMOUS): + cfg = merge_config(config, deepcopy(self._cfg)) + return FSBasedDBClient(self._dbroot, cfg, servicetype, foruser) diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index ce8d830..a0b1aee 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -8,6 +8,8 @@ from typing import Iterator, List from . import base +from nistoar.base.config import merge_config + class InMemoryDBClient(base.DBClient): """ an in-memory DBClient implementation @@ -96,8 +98,9 @@ def __init__(self, config: Mapping, _dbdata = None): self._db.update(deepcopy(_dbdata)) - def create_client(self, servicetype: str, foruser: str = base.ANONYMOUS): + def create_client(self, servicetype: str, config: Mapping={}, foruser: str = base.ANONYMOUS): + cfg = merge_config(config, deepcopy(self._cfg)) if servicetype not in self._db: self._db[servicetype] = {} - return InMemoryDBClient(self._db, self._cfg, servicetype, foruser) + return InMemoryDBClient(self._db, cfg, servicetype, foruser) diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index ba2f001..bd6ebef 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -1,14 +1,15 @@ """ An implementation of the dbio interface that uses a MongoDB database as it backend store """ -from collections.abc import Mapping, MutableMapping, Set import re +from copy import deepcopy +from collections.abc import Mapping, MutableMapping, Set from typing import Iterator, List from . import base from pymongo import MongoClient -from nistoar.base.config import ConfigurationException +from nistoar.base.config import ConfigurationException, merge_config _dburl_re = re.compile(r"^mongodb://(\w+(:\S+)?@)?\w+(\.\w+)*(:\d+)?/\w+$") From 6ac671c3a55e2b4971fc6256872bf1a2b2eca4c0 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 18 Nov 2022 08:55:17 -0500 Subject: [PATCH 011/123] dbio: doc typo --- python/nistoar/midas/dbio/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index f386169..9377c03 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -958,7 +958,7 @@ def create_client(self, servicetype: str, config: Mapping={}, foruser: str = ANO :caption: Example # connect to the DMP collection - client = dbio.MIDASDBClienFactory(configdata).create_client(dbio.DMP_PROJECTS, config, userid) + client = dbio.MIDASDBClientFactory(configdata).create_client(dbio.DMP_PROJECTS, config, userid) :param str servicetype: the service data desired. The value should be one of ``DRAFT_PROJECTS`` or ``DMP_PROJECTS`` From 1b385773441ca462a1d727ae4c52c5eed278aeb0 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 18 Nov 2022 14:28:28 -0500 Subject: [PATCH 012/123] dbio.DBClientFactory: updated tests for change in create_client() --- python/tests/nistoar/midas/dbio/test_acls.py | 2 +- python/tests/nistoar/midas/dbio/test_client.py | 16 ++++++++-------- python/tests/nistoar/midas/dbio/test_fsbased.py | 2 +- python/tests/nistoar/midas/dbio/test_groups.py | 8 ++++---- python/tests/nistoar/midas/dbio/test_inmem.py | 4 ++-- python/tests/nistoar/midas/dbio/test_mongo.py | 4 ++-- python/tests/nistoar/midas/dbio/test_record.py | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/tests/nistoar/midas/dbio/test_acls.py b/python/tests/nistoar/midas/dbio/test_acls.py index d786da3..a1208db 100644 --- a/python/tests/nistoar/midas/dbio/test_acls.py +++ b/python/tests/nistoar/midas/dbio/test_acls.py @@ -11,7 +11,7 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0" } self.user = "nist0:ava1" self.fact = inmem.InMemoryDBClientFactory(self.cfg) - self.cli = self.fact.create_client(base.DMP_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DMP_PROJECTS, {}, self.user) self.rec = self.cli.create_record("test") self.acls = self.rec.acls diff --git a/python/tests/nistoar/midas/dbio/test_client.py b/python/tests/nistoar/midas/dbio/test_client.py index 71b7a10..98eca0a 100644 --- a/python/tests/nistoar/midas/dbio/test_client.py +++ b/python/tests/nistoar/midas/dbio/test_client.py @@ -10,10 +10,10 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0", "allowed_project_shoulders": ["mds3"] } self.user = "nist0:ava1" self.fact = inmem.InMemoryDBClientFactory(self.cfg) - self.cli = self.fact.create_client(base.DRAFT_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DRAFT_PROJECTS, {}, self.user) def test_ctor(self): - self.assertIs(self.cli._cfg, self.cfg) + self.assertEqual(self.cli._cfg, self.cfg) self.assertEqual(self.cli.user_id, self.user) self.assertIsNone(self.cli._whogrps) self.assertTrue(isinstance(self.cli.groups, base.DBGroups)) @@ -60,7 +60,7 @@ def test_create_record(self): with self.assertRaises(base.NotAuthorized): self.cli.create_record("hers", foruser="alice") - rec = self.fact.create_client(base.DRAFT_PROJECTS, "alice").create_record("goob", foruser="alice") + rec = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice").create_record("goob", foruser="alice") self.assertEqual(rec.id, "pdr0:0002") self.assertEqual(rec.owner, "alice") self.assertEqual(rec.name, "goob") @@ -80,7 +80,7 @@ def test_get_record(self): self.cli.create_record("test1") self.cli.create_record("test2") - rec = self.fact.create_client(base.DRAFT_PROJECTS, "alice").create_record("goob") + rec = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice").create_record("goob") rec = self.cli.get_record_for("pdr0:0001") self.assertEqual(rec.name, "test1") @@ -91,7 +91,7 @@ def test_get_record(self): with self.assertRaises(base.NotAuthorized): self.cli.get_record_for("pdr0:0003") - goob = self.fact.create_client(base.DRAFT_PROJECTS, "alice").get_record_for("pdr0:0003") + goob = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice").get_record_for("pdr0:0003") self.assertEqual(goob.name, "goob") goob.acls.grant_perm_to(base.ACLs.READ, self.user) @@ -112,7 +112,7 @@ def test_get_record(self): def test_get_record_by_name(self): self.cli.create_record("test1") self.cli.create_record("test2") - rec = self.fact.create_client(base.DRAFT_PROJECTS, "alice").create_record("goob") + rec = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice").create_record("goob") rec = self.cli.get_record_by_name("test1") self.assertEqual(rec.name, "test1") @@ -123,7 +123,7 @@ def test_get_record_by_name(self): self.assertIsNone(self.cli.get_record_by_name("goob")) self.assertIsNone(self.cli.get_record_by_name("goob", "alice")) - goob = self.fact.create_client(base.DRAFT_PROJECTS, "alice").get_record_by_name("goob") + goob = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice").get_record_by_name("goob") self.assertEqual(goob.name, "goob") self.assertEqual(goob.id, "pdr0:0003") @@ -144,7 +144,7 @@ def test_select_records(self): rec.acls.grant_perm_to(rec.acls.WRITE, "alice") rec.save() - cli = self.fact.create_client(base.DRAFT_PROJECTS, "alice") + cli = self.fact.create_client(base.DRAFT_PROJECTS, {}, "alice") rec = cli.create_record("test1") rec = cli.create_record("test2") diff --git a/python/tests/nistoar/midas/dbio/test_fsbased.py b/python/tests/nistoar/midas/dbio/test_fsbased.py index 1c0678b..67d2760 100644 --- a/python/tests/nistoar/midas/dbio/test_fsbased.py +++ b/python/tests/nistoar/midas/dbio/test_fsbased.py @@ -20,7 +20,7 @@ def test_ctor(self): self.assertEqual(len([f for f in os.listdir(self.fact._dbroot) if not f.startswith(".")]), 0) def test_create_client(self): - cli = self.fact.create_client(base.DMP_PROJECTS, "ava1") + cli = self.fact.create_client(base.DMP_PROJECTS, {}, "ava1") self.assertEqual(cli._cfg, self.fact._cfg) self.assertEqual(cli._projcoll, base.DMP_PROJECTS) self.assertEqual(cli._who, "ava1") diff --git a/python/tests/nistoar/midas/dbio/test_groups.py b/python/tests/nistoar/midas/dbio/test_groups.py index ebcfd5c..74514eb 100644 --- a/python/tests/nistoar/midas/dbio/test_groups.py +++ b/python/tests/nistoar/midas/dbio/test_groups.py @@ -10,7 +10,7 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0" } self.user = "nist0:ava1" self.fact = inmem.InMemoryDBClientFactory(self.cfg) - self.cli = self.fact.create_client(base.DMP_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DMP_PROJECTS, {}, self.user) self.rec = base.Group({"id": "g:ava1:friends", "name": "friends", "owner": self.user}, self.cli) def test_ctor(self): @@ -84,7 +84,7 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0" } self.user = "nist0:ava1" self.fact = inmem.InMemoryDBClientFactory(self.cfg) - self.cli = self.fact.create_client(base.DMP_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DMP_PROJECTS, {}, self.user) self.dbg = self.cli.groups def test_ctor(self): @@ -117,7 +117,7 @@ def test_create_group(self): with self.assertRaises(base.NotAuthorized): grp = self.dbg.create_group("friends", "alice") - self.cfg['superusers'] = [self.user] + self.cli._cfg['superusers'] = [self.user] grp = self.dbg.create_group("friends", "alice") self.assertEqual(grp.name, "friends") self.assertEqual(grp.owner, "alice") @@ -169,7 +169,7 @@ def test_get_by_name(self): self.assertIsNone(self.dbg.get_by_name("friends", "alice")) - self.cfg['superusers'] = [self.user] + self.cli._cfg['superusers'] = [self.user] grp = self.dbg.create_group("friends", "alice") grp = self.dbg.get_by_name("friends", "alice") self.assertEqual(grp.id, "grp0:alice:friends") diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index 3d2bd77..077f1b6 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -20,7 +20,7 @@ def test_ctor(self): self.assertEqual(self.fact._db.get("nextnum"), {"hank": 2}) def test_create_client(self): - cli = self.fact.create_client(base.DMP_PROJECTS, "ava1") + cli = self.fact.create_client(base.DMP_PROJECTS, {}, "ava1") self.assertEqual(cli._db, self.fact._db) self.assertEqual(cli._cfg, self.fact._cfg) self.assertEqual(cli._projcoll, base.DMP_PROJECTS) @@ -35,7 +35,7 @@ class TestInMemoryDBClient(test.TestCase): def setUp(self): self.cfg = {} self.user = "nist0:ava1" - self.cli = inmem.InMemoryDBClientFactory({}).create_client(base.DMP_PROJECTS, self.user) + self.cli = inmem.InMemoryDBClientFactory({}).create_client(base.DMP_PROJECTS, {}, self.user) def test_next_recnum(self): self.assertEqual(self.cli._next_recnum("goob"), 1) diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index ef14741..1f7c2b3 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -43,7 +43,7 @@ def test_ctor(self): mongo.MongoDBClientFactory(self.cfg) def test_create_client(self): - cli = self.fact.create_client(base.DMP_PROJECTS, "bob") + cli = self.fact.create_client(base.DMP_PROJECTS, {}, "bob") self.assertEqual(cli._cfg, self.fact._cfg) self.assertEqual(cli._projcoll, base.DMP_PROJECTS) self.assertEqual(cli._who, "bob") @@ -252,7 +252,7 @@ class TestMongoProjectRecord(test.TestCase): def setUp(self): self.fact = mongo.MongoDBClientFactory({}, dburl) self.user = "nist0:ava1" - self.cli = self.fact.create_client(base.DRAFT_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DRAFT_PROJECTS, {}, self.user) self.rec = base.ProjectRecord(base.DRAFT_PROJECTS, {"id": "pdr0:2222", "name": "brains", "owner": self.user}, self.cli) diff --git a/python/tests/nistoar/midas/dbio/test_record.py b/python/tests/nistoar/midas/dbio/test_record.py index b456cbc..df48d2d 100644 --- a/python/tests/nistoar/midas/dbio/test_record.py +++ b/python/tests/nistoar/midas/dbio/test_record.py @@ -10,7 +10,7 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0" } self.user = "nist0:ava1" self.fact = inmem.InMemoryDBClientFactory(self.cfg) - self.cli = self.fact.create_client(base.DRAFT_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DRAFT_PROJECTS, {}, self.user) self.rec = base.ProjectRecord(base.DRAFT_PROJECTS, {"id": "pdr0:2222", "name": "brains", "owner": self.user}, self.cli) From ff49df2aada22e1006b7a6904b1020d18a85bd65 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 18 Nov 2022 14:34:52 -0500 Subject: [PATCH 013/123] dbio.wsgi: finished and debugged wsgiapp.py --- python/nistoar/midas/dbio/base.py | 1 + python/nistoar/midas/dbio/wsgi/project.py | 8 +- python/nistoar/midas/dbio/wsgi/wsgiapp.py | 114 +++- .../nistoar/midas/dbio/wsgi/test_broker.py | 4 +- .../nistoar/midas/dbio/wsgi/test_project.py | 28 +- .../nistoar/midas/dbio/wsgi/test_wsgiapp.py | 623 ++++++++++++++++++ 6 files changed, 732 insertions(+), 46 deletions(-) create mode 100644 python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 9377c03..5fd1be0 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -268,6 +268,7 @@ def validate(self, errs=None, data=None) -> List[str]: def to_dict(self): self._data['acls'] = self.acls._perms + self._data['type'] = self._coll return deepcopy(self._data) class Group(ProtectedRecord): diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 8e22b64..8d80caf 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -35,7 +35,9 @@ def __init__(self, projname, log: Logger, dbcli_factory: DBClientFactory, super(MIDASProjectApp, self).__init__(projname, log, config) ## create dbio client from config - self._prjbrkr_cls = self.cfg.get('project_handler_class', self.def_project_broker_class) + if not project_broker_cls: + project_broker_cls = self.def_project_broker_class + self._prjbrkr_cls = project_broker_cls self._dbfact = dbcli_factory def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: @@ -50,8 +52,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge """ # set up dbio client and the request handler that will mediate with it - dbcli = self._dbfact.create_client(self._name, who.actor) - pbroker = self._prjbrkr_cls(dbcli, self.cfg, who, env, self.log) + dbcli = self._dbfact.create_client(self._name, self.cfg.get('dbio'), who.actor) + pbroker = self._prjbrkr_cls(dbcli, self.cfg.get('broker'), who, env, self.log) # now parse the requested path; we have different handlers for different types of paths path = path.strip('/') diff --git a/python/nistoar/midas/dbio/wsgi/wsgiapp.py b/python/nistoar/midas/dbio/wsgi/wsgiapp.py index bedac6a..482af4d 100644 --- a/python/nistoar/midas/dbio/wsgi/wsgiapp.py +++ b/python/nistoar/midas/dbio/wsgi/wsgiapp.py @@ -28,11 +28,12 @@ import os, sys, logging, json, re from logging import Logger from wsgiref.headers import Headers +from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Callable from copy import deepcopy from ... import system -from . import project as prj, SubApp, Handler +from . import project as prj, SubApp, Handler, DBIOHandler from ..base import DBClientFactory from ..inmem import InMemoryDBClientFactory from nistoar.pdr.publish.prov import PubAgent @@ -43,6 +44,7 @@ .getChild('wsgi') DEF_BASE_PATH = "/midas/" +DEF_DBIO_CLIENT_FACTORY_CLASS = InMemoryDBClientFactory class SubAppFactory: """ @@ -63,6 +65,11 @@ def __init__(self, config: Mapping, subapps: Mapping): be included in the output. """ self.cfg = config + if "services" not in self.cfg: + raise ConfigurationException("Missing required config parameter: services") + if not isinstance(self.cfg["services"], Mapping): + raise ConfigurationException("Config parameter type error: services: not a dictionary: "+ + type(self.cfg["services"])) self.subapps = subapps def register_subapp(self, typename: str, factory: Callable): @@ -94,12 +101,13 @@ def config_for_convention(self, appname: str, convention: str, typename: str = N select the SubApp factory function in the set of SubApp provided at construction time. """ - if appname not in self.cfg: + svccfg = self.cfg["services"] + if appname not in svccfg: return None if not convention: convention = "def" - appcfg = deepcopy(self.cfg[appname]) + appcfg = deepcopy(svccfg[appname]) if "conventions" in appcfg: cnvcfg = deepcopy(appcfg.get("conventions", {}).get(convention)) if not cnvcfg and convention == "def" and appcfg.get("default_convention"): @@ -112,8 +120,8 @@ def config_for_convention(self, appname: str, convention: str, typename: str = N if cnvcfg: appcfg = merge_config(cnvcfg, appcfg) - if type: - appcfg['type'] = type + if typename: + appcfg['type'] = typename elif not appcfg.get('type'): appcfg['type'] = "%s/%s" % (appname, convention) appcfg.setdefault("project_name", appname) @@ -140,7 +148,7 @@ def create_subapp(self, log: Logger, dbio_client_factory: DBClientFactory, raise ConfigurationException("Missing configuration parameter: type") factory = self.subapps[typename] - return factory(appconfig.get('name', typename), log, dbio_client_factory, appconfig) + return factory(appconfig.get('project_name', typename), log, dbio_client_factory, appconfig) def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> MutableMapping: """ @@ -150,28 +158,24 @@ def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> Mut be About SubApps that provide information and proof-of-life for parent paths. """ out = OrderedDict() - about = About(self.cfg.get("about")) + about = About(log, self.cfg.get("about", {})) - for appname, appcfg in self.cfg.items(): + for appname, appcfg in self.cfg['services'].items(): if not isinstance(appcfg, Mapping): # wrong type; skip continue - about.add_service(appname, appcfg.get('about', {})) - aboutapp = About(appcfg.get('about', {})) + aboutapp = About(log, appcfg.get('about', {})) if "conventions" in appcfg: if not isinstance(appcfg["conventions"], Mapping): raise ConfigurationException("Parameter 'conventions' not a dictionary: "+ type(appcfg["conventions"])) - for conv in self appcfg.get("conventions", {}): + for conv in appcfg.get("conventions", {}): cnvcfg = self.config_for_convention(appname, conv) if isinstance(cnvcfg, Mapping): - # Add an entry into the About SubApp - about.add_version(conv, cnvcfg.get("about", {})) - path = "%s/%s" % (appname, conv) try: out[path] = self.create_subapp(log, dbio_client_factory, cnvcfg) @@ -179,7 +183,14 @@ def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> Mut if self.cfg.get("strict", False): raise ConfigurationException("MIDAS app type not recognized: "+str(ex)) else: - log.warn("Skipping unrecognized MIDAS app type: "+str(ex)) + log.warning("Skipping unrecognized MIDAS app type: "+str(ex)) + continue + except ConfigurationException as ex: + ex.message = "While creating subapp for %s: %s" % (path, str(ex)) + raise + + # Add an entry into the About SubApp + aboutapp.add_version(conv, cnvcfg.get("about", {})) # if so configured, set as default if appcfg.get("default_convention") == conv: @@ -189,21 +200,25 @@ def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> Mut else: # No conventions configured for this app name; try to create an app from the defaults - path = appcfg.get("path") - if path is None: - path = "%s/def" % appname + cnvcfg = self.config_for_convention(appname, "def") + path = "%s/def" % appname try: - out[path] = self.create_subapp(log, dbio_client_factory, appcfg) + out[path] = self.create_subapp(log, dbio_client_factory, cnvcfg) + aboutapp.add_version("def", cnvcfg.get("about", {})) except KeyError as ex: if self.cfg.get("strict", False): raise ConfigurationException("MIDAS app type not recognized: "+str(ex)) else: - log.warn("Skipping unrecognized MIDAS app type: "+str(ex)) + log.warning("Skipping unrecognized MIDAS app type: "+str(ex)) + continue + except ConfigurationException as ex: + raise ConfigurationException("While creating subapp for %s: %s" % (path, str(ex)), + cause=ex) out[appname] = aboutapp + about.add_service(appname, appcfg.get('about', {})) out[""] = about - return out @@ -239,18 +254,19 @@ class About(SubApp): """ - def __init__(self, base_data: Mapping=None): + def __init__(self, log, base_data: Mapping=None): """ initialize the SubApp. Some default properties may be added to base_data. :param Mapping base_data: the initial data the should appear in the GET response JSON object """ + super(About, self).__init__("about", log, {}) if not base_data: base_data = OrderedDict() self.data = self._init_data(base_data) def _init_data(self, data: Mapping): data = deepcopy(data) - if "message" is not in data: + if "message" not in data: data["message"] = "Service is available" return data @@ -272,7 +288,7 @@ def add_component(self, compcat, compname, data): """ if compcat not in self.data: self.data[compcat] = OrderedDict() - if not isinstance(self.data[comcat], MutableMapping): + if not isinstance(self.data[compcat], MutableMapping): raise ValueError("Category property is not an object: %s: %s" % (comcat, type(self.data[comcat]))) self.data[compcat][compname] = data @@ -296,6 +312,13 @@ def __init__(self, parentapp, path: str, wsgienv: Mapping, start_resp: Callable, Handler.__init__(self, path, wsgienv, start_resp, who, config, log) self.app = parentapp + def handle(self): + # no sub resources are supported via this SubApp + if self._path.strip('/'): + return self.send_error(404, "Not found") + + return super().handle() + def do_GET(self, path, ashead=False): path = path.strip('/') if path: @@ -304,6 +327,16 @@ def do_GET(self, path, ashead=False): return self.send_json(self.app.data, ashead=ashead) + def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: + """ + return a handler instance to handle a particular request to a path + :param Mapping env: the WSGI environment containing the request + :param Callable start_resp: the start_resp function to use initiate the response + :param str path: the path to the resource being requested. This is usually + relative to a parent path that this SubApp is configured to + handle. + """ + return self._Handler(self, path, env, start_resp, who, log=self.log) @@ -319,10 +352,27 @@ class MIDASApp: """ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, - base_ep: str=None, subapp_factory_funcs=None): + base_ep: str=None, subapp_factory_funcs: Mapping=None): + """ + initial the App + :param Mapping config: the collected configuration for the App (see the + :py:module:`wsgi module documentation ` + for the schema + :param DBClientFactory dbio_client_factory: the DBIO client factory to use to create + clients used to access the DBIO storage backend. If not specified, + the in-memory client factory will be used. + :param str base_ep: the resource path to assume as the base of all services provided by + this App. If not provided, a value set in the configuration is + used (which itself defaults to "/midas/"). + :param Mapping subapp_factory_funcs: a map of project service names to ``SubApp`` classes + that implement the MIDAS Project services that can be included in + this App. The service name (which gets matched to the ``type``) + configuration parameter, normally has the form "_service_/_convention_". + If not provided (typical), an internal map is used. + """ self.cfg = config if not self.cfg.get("services"): - raise ConfigurationException("No MIDAS apps configured (missing 'service' parameter)") + raise ConfigurationException("No MIDAS apps configured (missing 'services' parameter)") if base_ep is None: base_ep = self.cfg.get('base_endpoint', DEF_BASE_PATH) @@ -333,8 +383,11 @@ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, if not subapp_factory_funcs: subapp_factory_funcs = _MIDASSubApps - factory = SubAppFactory(self.cfg.get('services'), subapp_factory_funcs) - self.subapps = factory.create_suite() + if not dbio_client_factory: + dbio_client_factory = DEF_DBIO_CLIENT_FACTORY_CLASS(self.cfg.get('dbio', {})) + + factory = SubAppFactory(self.cfg, subapp_factory_funcs) + self.subapps = factory.create_suite(log, dbio_client_factory) # Add the groups endpoint # TODO @@ -388,7 +441,10 @@ def handle_request(self, env, start_resp): # this will handle any other non-existing paths subapp = self.subapps.get('') - return subapp.handle_path_request(env, start_resp, path, who) + return subapp.handle_path_request(env, start_resp, "/".join(path), who) + + def __call__(self, env, start_resp): + return self.handle_request(env, start_resp) diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py index 6816031..57ad3d6 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py @@ -43,8 +43,8 @@ def setUp(self): "allowed_project_shoulders": ["mdm1", "spc1"], "default_shoulder": "mdm0" } - self.fact = inmem.InMemoryDBClientFactory(self.cfg, { "nextnum": { "mdm1": 2 }}) - self.dbcli = self.fact.create_client(base.DMP_PROJECTS, nistr.actor) + self.fact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) + self.dbcli = self.fact.create_client(base.DMP_PROJECTS, self.cfg, nistr.actor) self.resp = [] def create_broker(self, request=None): diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index ad9476e..f9728e0 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -45,25 +45,29 @@ def tostr(self, resplist): def setUp(self): self.cfg = { - "superusers": [ "rlp" ], - "clients": { - "midas": { - "default_shoulder": "mdm1" - }, - "default": { - "default_shoulder": "mdm0" + "broker": { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } } }, - "allowed_project_shoulders": ["mdm1", "spc1"], - "default_shoulder": "mdm0" + "dbio": { + "superusers": [ "rlp" ], + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } } - self.dbfact = inmem.InMemoryDBClientFactory(self.cfg, { "nextnum": { "mdm1": 2 }}) + self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) self.app = prj.MIDASProjectApp(base.DMP_PROJECTS, rootlog.getChild("dmpapi"), self.dbfact, self.cfg) self.resp = [] self.rootpath = "/midas/dmp/" def create_record(self, name="goob", meta=None): - cli = self.dbfact.create_client(base.DMP_PROJECTS, nistr.actor) + cli = self.dbfact.create_client(base.DMP_PROJECTS, self.cfg["dbio"], nistr.actor) out = cli.create_record(name, "mdm1") if meta: out.meta = meta @@ -71,7 +75,7 @@ def create_record(self, name="goob", meta=None): return out def sudb(self): - return self.dbfact.create_client(base.DMP_PROJECTS, "rlp") + return self.dbfact.create_client(base.DMP_PROJECTS, self.cfg["dbio"], "rlp") def test_create_handler_name(self): path = "mdm1:0001/name" diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py b/python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py new file mode 100644 index 0000000..f1cabe4 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py @@ -0,0 +1,623 @@ +import os, json, pdb, logging, tempfile +from collections import OrderedDict +from io import StringIO +import unittest as test + +from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio.wsgi import wsgiapp as app +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_wsgiapp.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_wsgiapp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestAbout(test.TestCase): + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.data = { + "goob": "gurn", + "foo": { + "bar": [1, 2, 3] + } + } + self.app = app.About(rootlog, self.data) + self.resp = [] + + def test_ctor(self): + self.assertEqual(sorted(list(self.app.data.keys())), "foo goob message".split()) + self.assertEqual(self.app.data['message'], "Service is available") + for key in self.data.keys(): + self.assertEqual(self.app.data[key], self.data[key]) + + def test_add_stuff(self): + self.assertNotIn("hairdos", self.app.data) + self.assertNotIn("services", self.app.data) + self.assertNotIn("versions", self.app.data) + + self.app.add_component("hairdos", "bob", { "color": "brunette" }) + self.app.add_component("hairdos", "beehive", { "color": "blond" }) + self.assertIn("hairdos", self.app.data) + self.assertEqual(sorted(list(self.app.data["hairdos"].keys())), ["beehive", "bob"]) + self.assertEqual(self.app.data["hairdos"]["bob"], { "color": "brunette" }) + self.assertEqual(self.app.data["hairdos"]["beehive"], { "color": "blond" }) + self.app.add_component("hairdos", "beehive", { "color": "red" }) + self.assertEqual(self.app.data["hairdos"]["beehive"], { "color": "red" }) + + self.app.add_service("dmp", {"title": "DMP svc"}) + self.assertIn("hairdos", self.app.data) + self.assertIn("services", self.app.data) + self.assertNotIn("versions", self.app.data) + self.assertEqual(self.app.data["services"]["dmp"], { "title": "DMP svc" }) + + self.app.add_service("dap", [1, 2, 3]) + self.assertEqual(self.app.data["services"]["dmp"], { "title": "DMP svc" }) + self.assertEqual(self.app.data["services"]["dap"], [1, 2, 3]) + + self.app.add_version("pdr0", "internal") + self.assertIn("hairdos", self.app.data) + self.assertIn("services", self.app.data) + self.assertIn("versions", self.app.data) + self.assertEqual(self.app.data["services"]["dmp"], { "title": "DMP svc" }) + self.assertEqual(self.app.data["services"]["dap"], [1, 2, 3]) + self.assertEqual(self.app.data["versions"]["pdr0"], "internal") + self.app.add_version("pdr0", False) + self.assertEqual(self.app.data["versions"]["pdr0"], False) + + def test_get(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + + data = self.body2dict(body) + self.assertEqual(sorted(list(self.app.data.keys())), "foo goob message".split()) + self.assertEqual(data['message'], "Service is available") + for key in self.data.keys(): + self.assertEqual(data[key], self.data[key]) + + self.data['message'] = "Services are ready" + self.app = app.About(rootlog, self.data) + self.resp = [] + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertEqual(sorted(list(self.app.data.keys())), "foo goob message".split()) + self.assertEqual(self.app.data['message'], "Services are ready") + for key in self.data.keys(): + self.assertEqual(self.app.data[key], self.data[key]) + + def test_notfound(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas' + } + body = self.app(req, self.start) + self.assertIn("404 ", self.resp[0]) + + +class TestSubAppFactory(test.TestCase): + + def setUp(self): + self.config = { + "about": { + "title": "MIDAS Authoring Services", + "describedBy": "https://midas3.nist.gov/midas/apidocs", + "href": "http://midas3.nist.gov/midas/dmp" + }, + "services": { + "dmp": { + "about": { + "message": "DMP Service is available", + "title": "Data Management Plan (DMP) Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs", + "href": "http://midas3.nist.gov/midas/dmp" + }, + "foo": "but", + "gurn": "goob", + "default_convention": "mdm1", + "conventions": { + "mdm1": { + "about": { + "title": "Data Management Plan (DMP) Authoring API (mdm1 convention)", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dmp/mdm1", + "href": "http://midas3.nist.gov/midas/dmp/mdm1", + "version": "mdm1" + }, + "foo": "bar", + "ab": 2 + }, + "mdm2": { + "about": { + "title": "Data Management Plan (DMP) Authoring API (mdm2 convention)", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dmp/mdm2", + "href": "http://midas3.nist.gov/midas/dmp/mdm2", + "version": "mdm2" + }, + "type": "dmp/mdm1" + } + } + }, + "dap": { + "about": { + "message": "DAP Service is available", + "title": "Data Asset Publication (DAP) Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dap", + "href": "http://midas3.nist.gov/midas/dap" + }, + "project_name": "drafts", + "type": "dmp/mdm1" + }, + "pyu": { + "about": { + "describedBy": "https://midas3.nist.gov/midas/apidocs/pyu", + "href": "http://midas3.nist.gov/midas/pyu" + } + } + } + } + self.fact = app.SubAppFactory(self.config, app._MIDASSubApps) + + def test_ctor_register(self): + self.assertTrue(bool(self.fact.cfg)) + self.assertTrue(bool(self.fact.subapps)) + self.assertIn("dmp/mdm1", self.fact.subapps) + self.assertNotIn("dap", self.fact.subapps) + + self.fact.register_subapp("dap", app._MIDASSubApps["dmp/mdm1"]) + self.assertIn("dmp/mdm1", self.fact.subapps) + self.assertIn("dap", self.fact.subapps) + self.assertIs(self.fact.subapps["dmp/mdm1"], self.fact.subapps["dap"]) + + def test_config_for_convention(self): + cfg = self.fact.config_for_convention("dmp", "mdm1") + self.assertIsNotNone(cfg) + self.assertEqual(cfg["gurn"], "goob") + self.assertEqual(cfg["ab"], 2) + self.assertEqual(cfg["foo"], "bar") + self.assertEqual(cfg["default_convention"], "mdm1") + self.assertEqual(cfg["about"]["version"], "mdm1") + self.assertEqual(cfg["type"], "dmp/mdm1") + self.assertEqual(cfg["project_name"], "dmp") + self.assertNotIn("conventions", cfg) + + cfg = self.fact.config_for_convention("dmp", "def") + self.assertIsNotNone(cfg) + self.assertEqual(cfg["gurn"], "goob") + self.assertEqual(cfg["ab"], 2) + self.assertEqual(cfg["foo"], "bar") + self.assertEqual(cfg["default_convention"], "mdm1") + self.assertEqual(cfg["about"]["version"], "mdm1") + self.assertEqual(cfg["type"], "dmp/mdm1") + self.assertEqual(cfg["project_name"], "dmp") + self.assertNotIn("conventions", cfg) + + cfg = self.fact.config_for_convention("dmp", "") + self.assertIsNotNone(cfg) + self.assertEqual(cfg["gurn"], "goob") + self.assertEqual(cfg["ab"], 2) + self.assertEqual(cfg["foo"], "bar") + self.assertEqual(cfg["default_convention"], "mdm1") + self.assertEqual(cfg["about"]["version"], "mdm1") + self.assertEqual(cfg["type"], "dmp/mdm1") + self.assertEqual(cfg["project_name"], "dmp") + self.assertNotIn("conventions", cfg) + + cfg = self.fact.config_for_convention("dmp", "mdm2") + self.assertIsNotNone(cfg) + self.assertEqual(cfg["gurn"], "goob") + self.assertNotIn("ab", cfg) + self.assertEqual(cfg["foo"], "but") + self.assertEqual(cfg["default_convention"], "mdm1") + self.assertEqual(cfg["about"]["version"], "mdm2") + self.assertEqual(cfg["type"], "dmp/mdm1") + self.assertEqual(cfg["project_name"], "dmp") + self.assertNotIn("conventions", cfg) + + cfg = self.fact.config_for_convention("dmp", "mdm2", "hank") + self.assertEqual(cfg["foo"], "but") + self.assertEqual(cfg["project_name"], "dmp") + self.assertEqual(cfg["type"], "hank") + + cfg = self.fact.config_for_convention("dap", None) + self.assertEqual(cfg["type"], "dmp/mdm1") + self.assertEqual(cfg["project_name"], "drafts") + self.assertIn("about", cfg) + self.assertNotIn("conventions", cfg) + + cfg = self.fact.config_for_convention("project", "") + self.assertIsNone(cfg) + + cfg = self.fact.config_for_convention("pyu", "def") + self.assertIn("about", cfg) + self.assertEqual(cfg["project_name"], "pyu") + self.assertEqual(cfg["type"], "pyu/def") + + + def test_create_subapp(self): + subapp = self.fact.create_subapp(rootlog, app.DEF_DBIO_CLIENT_FACTORY_CLASS({}), + {"project_name": "pj", "type": "dmp/mdm1", "a": "b"}) + self.assertTrue(subapp) + self.assertTrue(isinstance(subapp, app.prj.MIDASProjectApp)) + self.assertEqual(subapp.cfg["a"], "b") + self.assertEqual(subapp._name, "pj") + + with self.assertRaises(KeyError): + self.fact.create_subapp(rootlog, app.DEF_DBIO_CLIENT_FACTORY_CLASS({}), + {"project_name": "pj", "a": "b"}, "dap") + with self.assertRaises(app.ConfigurationException): + self.fact.create_subapp(rootlog, app.DEF_DBIO_CLIENT_FACTORY_CLASS({}), + {"project_name": "pj", "a": "b"}) + + def test_create_suite(self): + subapps = self.fact.create_suite(rootlog, app.DEF_DBIO_CLIENT_FACTORY_CLASS({})) + self.assertTrue(subapps) + self.assertTrue(isinstance(subapps["dmp/mdm1"], app.prj.MIDASProjectApp)) + self.assertTrue(isinstance(subapps["dmp/mdm2"], app.prj.MIDASProjectApp)) + self.assertTrue(isinstance(subapps["dap/def"], app.prj.MIDASProjectApp)) + self.assertTrue(isinstance(subapps[""], app.About)) + self.assertTrue(isinstance(subapps["dmp"], app.About)) + self.assertTrue(isinstance(subapps["dap"], app.About)) + self.assertNotIn("pyu/def", subapps) + self.assertNotIn("pyu", subapps) + + self.assertIn("message", subapps[""].data) + self.assertIn("services", subapps[""].data) + self.assertIn("href", subapps[""].data) + +class TestMIDASApp(test.TestCase): + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.resp = [] + self.config = { + "dbio": { }, + "about": { + "title": "MIDAS Authoring Services", + "describedBy": "https://midas3.nist.gov/midas/apidocs", + "href": "http://midas3.nist.gov/midas/dmp" + }, + "services": { + "dmp": { + "about": { + "message": "DMP Service is available", + "title": "Data Management Plan (DMP) Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs", + "href": "http://midas3.nist.gov/midas/dmp" + }, + "broker": { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + } + }, + "dbio": { + "default_convention": "mdm1", + "superusers": [ "rlp" ], + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + }, + "conventions": { + "mdm1": { + "about": { + "title": "Data Management Plan (DMP) Authoring API (mdm1 convention)", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dmp/mdm1", + "href": "http://midas3.nist.gov/midas/dmp/mdm1", + "version": "mdm1" + } + }, + "mdm2": { + "about": { + "title": "Data Management Plan (DMP) Authoring API (mdm2 convention)", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dmp/mdm2", + "href": "http://midas3.nist.gov/midas/dmp/mdm2", + "version": "mdm2" + }, + "type": "dmp/mdm2" + } + } + }, + "dap": { + "about": { + "message": "DAP Service is available", + "title": "Data Asset Publication (DAP) Authoring API", + "describedBy": "https://midas3.nist.gov/midas/apidocs/dap", + "href": "http://midas3.nist.gov/midas/dap" + }, + "project_name": "drafts", + "type": "dmp/mdm1", + "broker": { + "clients": { + "default": { + "default_shoulder": "mds3" + } + } + }, + "dbio": { + "default_convention": "mds3", + "superusers": [ "rlp" ], + "allowed_project_shoulders": ["mds3", "pdr0"], + "default_shoulder": "mds3" + }, + }, + "pyu": { + "about": { + "describedBy": "https://midas3.nist.gov/midas/apidocs/pyu", + "href": "http://midas3.nist.gov/midas/pyu" + } + } + } + } + self.clifact = inmem.InMemoryDBClientFactory({}) + self.app = app.MIDASApp(self.config, self.clifact) + self.data = self.clifact._db + + def test_ctor(self): + self.assertEqual(self.app.base_ep, ['midas']) + self.assertIn("dmp/mdm1", self.app.subapps) + self.assertIn("dmp/mdm1", self.app.subapps) + self.assertNotIn("dmp/mdm2", self.app.subapps) + self.assertIn("dap/def", self.app.subapps) + self.assertIn("dap", self.app.subapps) + self.assertIn("dmp", self.app.subapps) + self.assertIn("", self.app.subapps) + self.assertNotIn("pyu/def", self.app.subapps) + self.assertNotIn("pyu", self.app.subapps) + + self.assertTrue(self.app.subapps["dmp/mdm1"]._dbfact) + + self.assertEqual(self.data["dmp"], {}) + self.assertEqual(self.data["draft"], {}) + + def test_about_suite(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertTrue(data["message"], "Service is available") + self.assertIn("https://", data["describedBy"]) + self.assertIn("http://", data["href"]) + self.assertIn("services", data) + self.assertIn("dmp", list(data['services'].keys())) + self.assertIn("dap", list(data['services'].keys())) + self.assertEqual(len(data["services"]), 2) + self.assertNotIn("versions", data) + + def test_about_dmp(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp/' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertTrue(data["message"], "Service is available") + self.assertIn("https://", data["describedBy"]) + self.assertIn("http://", data["href"]) + self.assertEqual(list(data['versions'].keys()), ["mdm1"]) + self.assertEqual(len(data["versions"]), 1) + self.assertNotIn("services", data) + + def test_about_dap(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertTrue(data["message"], "Service is available") + self.assertIn("https://", data["describedBy"]) + self.assertIn("http://", data["href"]) + self.assertIn("versions", data) + self.assertEqual(list(data['versions'].keys()), ["def"]) + self.assertNotIn("services", data) + + def test_dmp(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp/mdm1' + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + self.assertEqual(data, []) + + self.resp = [] + inp = { + "name": "gary", + "data": { + "color": "red" + } + } + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dmp/mdm1', + 'wsgi.input': StringIO(json.dumps(inp)) + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data["name"], "gary") + self.assertEqual(data["data"], {"color": "red"}) + self.assertEqual(data["id"], "mdm0:0001") + self.assertEqual(data["owner"], "anonymous") + self.assertEqual(data["type"], "dmp") + + self.assertEqual(self.data["dmp"]["mdm0:0001"]["name"], "gary") + self.assertEqual(self.data["dmp"]["mdm0:0001"]["data"], {"color": "red"}) + self.assertEqual(self.data["dmp"]["mdm0:0001"]["id"], "mdm0:0001") + self.assertEqual(self.data["dmp"]["mdm0:0001"]["owner"], "anonymous") + + self.resp = [] + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp/mdm1/mdm0:0001' + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + + self.assertEqual(data["name"], "gary") + self.assertEqual(data["data"], {"color": "red"}) + self.assertEqual(data["id"], "mdm0:0001") + self.assertEqual(data["owner"], "anonymous") + self.assertEqual(data["type"], "dmp") + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dmp/mdm1/mdm0:0001/data', + 'wsgi.input': StringIO(json.dumps({"size": "grande"})) + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + + self.assertEqual(data, {"color": "red", "size": "grande"}) + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dmp/mdm1/mdm0:0001/name', + 'wsgi.input': StringIO(json.dumps("bob")) + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + + self.assertEqual(data, "bob") + + self.resp = [] + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp/mdm1/mdm0:0001' + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + + self.assertEqual(data["name"], "bob") + self.assertEqual(data["data"], {"color": "red", "size": "grande"}) + self.assertEqual(data["id"], "mdm0:0001") + self.assertEqual(data["owner"], "anonymous") + self.assertEqual(data["type"], "dmp") + + self.resp = [] + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp/mdm1/mdm0:0001/meta' + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + self.assertEqual(data, {}) + + def test_dap(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/def' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, []) + + self.resp = [] + inp = { + "name": "gary", + "data": { + "color": "red" + } + } + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/def', + 'wsgi.input': StringIO(json.dumps(inp)) + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data["name"], "gary") + self.assertEqual(data["data"], {"color": "red"}) + self.assertEqual(data["id"], "mds3:0001") + self.assertEqual(data["owner"], "anonymous") + self.assertEqual(data["type"], "drafts") + + self.assertEqual(self.data["drafts"]["mds3:0001"]["name"], "gary") + self.assertEqual(self.data["drafts"]["mds3:0001"]["data"], {"color": "red"}) + self.assertEqual(self.data["drafts"]["mds3:0001"]["id"], "mds3:0001") + self.assertEqual(self.data["drafts"]["mds3:0001"]["owner"], "anonymous") + + self.resp = [] + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/def/mds3:0001' + } + body = self.app(req, self.start) + data = self.body2dict(body) + self.assertIn("200 ", self.resp[0]) + + self.assertEqual(data["name"], "gary") + self.assertEqual(data["data"], {"color": "red"}) + self.assertEqual(data["id"], "mds3:0001") + self.assertEqual(data["owner"], "anonymous") + self.assertEqual(data["type"], "drafts") + + + + + +if __name__ == '__main__': + test.main() + + From 475cd392654d5acfeaa5db3703ab7366a43b180a Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 22 Nov 2022 16:59:29 -0500 Subject: [PATCH 014/123] scripts/pdp-uwsgi.py: fix doc and default config --- scripts/pdp-uwsgi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pdp-uwsgi.py b/scripts/pdp-uwsgi.py index af80967..228ad37 100644 --- a/scripts/pdp-uwsgi.py +++ b/scripts/pdp-uwsgi.py @@ -1,7 +1,7 @@ """ the uWSGI script for launching the PDP web service. -This script launches an ID resolver as a web service using uwsgi. For example, one can +This script launches the web service using uwsgi. For example, one can launch the service with the following command: uwsgi --plugin python3 --http-socket :9090 --wsgi-file pdp-uwsgi.py \ @@ -50,7 +50,7 @@ def _dec(obj): elif config.service: config.service.wait_until_up(int(os.environ.get('OAR_CONFIG_TIMEOUT', 10)), True, sys.stderr) - cfg = config.service.get(os.environ.get('OAR_CONFIG_APP', 'pdr-resolve')) + cfg = config.service.get(os.environ.get('OAR_CONFIG_APP', 'pdr-pdp')) else: raise config.ConfigurationException("resolver: nist-oar configuration not provided") From 5fc25a03f1c89c92d60f10a42204f56741ae6be6 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 22 Nov 2022 17:01:43 -0500 Subject: [PATCH 015/123] midasserver: setup running in docker container; debug --- docker/dockbuild.sh | 11 +- docker/midasserver/Dockerfile | 24 +++ docker/midasserver/entrypoint.sh | 26 +++ docker/midasserver/midas-dmp_conf.yml | 35 ++++ docker/midasserver/run.sh | 181 +++++++++++++++++++++ oar-build/_dockbuild.sh | 71 +++++++- oar-build/dockbuild_help.txt | 5 +- python/nistoar/midas/dbio/__init__.py | 8 +- python/nistoar/midas/dbio/wsgi/__init__.py | 9 + python/nistoar/midas/dbio/wsgi/wsgiapp.py | 19 ++- python/setup.py | 2 +- 11 files changed, 373 insertions(+), 18 deletions(-) create mode 100644 docker/midasserver/Dockerfile create mode 100644 docker/midasserver/entrypoint.sh create mode 100644 docker/midasserver/midas-dmp_conf.yml create mode 100755 docker/midasserver/run.sh diff --git a/docker/dockbuild.sh b/docker/dockbuild.sh index 48fa3b3..c829496 100755 --- a/docker/dockbuild.sh +++ b/docker/dockbuild.sh @@ -23,14 +23,15 @@ PACKAGE_NAME=oar-pdr-py ## containers to be built. List them in dependency order (where a latter one ## depends the former ones). # -DOCKER_IMAGE_DIRS="pymongo jqfromsrc ejsonschema pyenv pdrpytest pdpserver" +DEP_DOCKER_IMAGE_DIRS="pymongo jqfromsrc ejsonschema pyenv" +EXEC_DOCKER_IMAGE_DIRS="pdrpytest pdpserver midasserver" -[ -d "$codedir/metadata/oar-build" ] || { +[ -d "$codedir/metadata/docker" ] || { echo ${prog}: Missing metadata submodule echo Clone the oar-metadata repo in this directory\; name it "'metadata'" exit 3 } -. $codedir/metadata/oar-build/_dockbuild.sh +. $codedir/oar-build/_dockbuild.sh # Override, if need be, the UID of the user to run as in the container; the # default is the user running this script. @@ -57,3 +58,7 @@ if { echo " $BUILD_IMAGES " | grep -qs " pdpserver "; }; then echo '+' docker build $BUILD_OPTS -t $PACKAGE_NAME/pdpserver pdpserver | logit docker build $BUILD_OPTS -t $PACKAGE_NAME/pdpserver pdpserver 2>&1 | logit fi +if { echo " $BUILD_IMAGES " | grep -qs " midasserver "; }; then + echo '+' docker build $BUILD_OPTS -t $PACKAGE_NAME/midasserver midasserver | logit + docker build $BUILD_OPTS -t $PACKAGE_NAME/midasserver midasserver 2>&1 | logit +fi diff --git a/docker/midasserver/Dockerfile b/docker/midasserver/Dockerfile new file mode 100644 index 0000000..bc99500 --- /dev/null +++ b/docker/midasserver/Dockerfile @@ -0,0 +1,24 @@ +######################################################################### +# +# MIDAS Authoring web service suite +# +# This container launches the MIDAS web services via scripts/midas-uwsgi.py +# +######################################################################### +FROM oar-pdr-py/pyenv + +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod a+rx /usr/local/bin/entrypoint.sh + +VOLUME /dev/oar-pdr-py +VOLUME /app/dist + +RUN mkdir -p /dev/oar-pdr-py /app && chmod a+rwx /app +WORKDIR /dev/oar-pdr-py + +ENV PYTHONPATH /dev/oar-pdr-py/python/dist/pdr/lib/python:/app/dist/pdr/lib/python + +ARG devuser=developer +USER $devuser +EXPOSE 9091 +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docker/midasserver/entrypoint.sh b/docker/midasserver/entrypoint.sh new file mode 100644 index 0000000..c7ce4d0 --- /dev/null +++ b/docker/midasserver/entrypoint.sh @@ -0,0 +1,26 @@ +#! /bin/bash +# +port=9091 +script=/dev/oar-pdr-py/scripts/midas-uwsgi.py +[ -f "$script" ] || script=/app/dist/pdr/bin/midas-uwsgi.py + +[ -n "$OAR_WORKING_DIR" ] || OAR_WORKING_DIR=`mktemp -d _midasserver.XXXXX` +[ -d "$OAR_WORKING_DIR" ] || { + echo midasserver: ${OAR_WORKING_DIR}: working directory does not exist + exit 10 +} +[ -n "$OAR_LOG_DIR" ] || export OAR_LOG_DIR=$OAR_WORKING_DIR +[ -n "$OAR_MIDASSERVER_CONFIG" ] || OAR_MIDASSERVER_CONFIG=/apps/midas-config.yml + +echo +echo Working Dir: $OAR_WORKING_DIR +echo Access the MIDAS web services at http://localhost:$port/ +echo + +opts= +oar_midas_db_type=$1 +[ -z "$oar_midas_db_type" ] || opts="--set-ph oar_midas_db_type=$oar-midas_db_type" + +uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=$PWD/docs \ + --set-ph oar_config_file=$OAR_MIDASSERVER_CONFIG \ + --set-ph oar_working_dir=$OAR_WORKING_DIR $opts diff --git a/docker/midasserver/midas-dmp_conf.yml b/docker/midasserver/midas-dmp_conf.yml new file mode 100644 index 0000000..ddbf672 --- /dev/null +++ b/docker/midasserver/midas-dmp_conf.yml @@ -0,0 +1,35 @@ +logfile: midas-dmp.log +dbio: + factory: fsbased +about: + title: "MIDAS Authoring Services" + describedBy: "https://midas3.nist.gov/midas/apidocs" + href: "http://midas3.nist.gov/midas/dmp" +services: + dmp: + about: + message: "DMP Service is available" + title: "Data Management Plan (DMP) Authoring API" + describedBy: "https://midas3.nist.gov/midas/apidocs" + href: "http://midas3.nist.gov/midas/dmp" + + broker: + clients: + midas: + default_shoulder: mdm1 + default: + default_shoulder: mdm0 + + dbio: + superusers: [ "rlp" ] + allowed_project_shoulders: ["mdm1", "spc1"] + default_shoulder: mdm0 + + default_convention: mdm1 + conventions: + mdm1: + about: + title: "Data Management Plan (DMP) Authoring API (mdm1 convention)" + describedBy: "https://midas3.nist.gov/midas/apidocs/dmp/mdm1" + href: "http://midas3.nist.gov/midas/dmp/mdm1" + version: mdm1 diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh new file mode 100755 index 0000000..01b26cc --- /dev/null +++ b/docker/midasserver/run.sh @@ -0,0 +1,181 @@ +#! /bin/bash +# +# run.sh -- launch the server in a docker container +# + +prog=midasserver +execdir=`dirname $0` +[ "$execdir" = "" -o "$execdir" = "." ] && execdir=$PWD +dockerdir=`(cd $execdir/.. > /dev/null 2>&1; pwd)` +repodir=`(cd $dockerdir/.. > /dev/null 2>&1; pwd)` +scriptsdir=$repodir/scripts +os=`uname` +SED_RE_OPT=r +[ "$os" != "Darwin" ] || SED_RE_OPT=E + +PACKAGE_NAME=oar-pdr-py +DEFAULT_CONFIGFILE=$dockerdir/midasserver/midas-dmp_conf.yml + +set -e + +function usage { + cat < log +} + +DOPYBUILD= +DODOCKBUILD= +CONFIGFILE= +USEMONGO= +STOREDIR= +DBTYPE= +while [ "$1" != "" ]; do + case "$1" in + -b|--build) + DOPYBUILD="-b" + ;; + -D|--docker-build) + DODOCKBUILD="-D" + ;; + -c) + shift + CONFIGFILE=$1 + ;; + --config-file=*) + CONFIGFILE=`echo $1 | sed -e 's/[^=]*=//'` + ;; + -M|--use-mongo) + DBTYPE="mongo" + ;; + -*) + echo "${prog}: unsupported option:" $1 + false + ;; + *) + [ -z "$STOREDIR" ] || { + echo "${prog}: DIR already set to $STOREDIR; unsupported extra argument:" $1 + false + } + STOREDIR=$1 + ;; + esac + shift +done + +([ -z "$DOPYBUILD" ] && [ -e "$repodir/dist" ]) || { + echo '+' scripts/install.sh --prefix=$repodir/dist/pdr + $repodir/scripts/install.sh --prefix=$repodir/dist/pdr +} +[ -d "$repodir/dist/pdr/lib/python/nistoar" ] || { + echo ${prog}: Python library not found in dist directory: $repodir/dist + false +} +VOLOPTS="-v $repodir/dist:/app/dist" + +# build the docker images if necessary +(docker_images_built midasserver && [ -z "$DODOCKBUILD" ]) || build_server_image + +[ -n "$CONFIGFILE" ] || CONFIGFILE=$DEFAULT_CONFIGFILE +[ -f "$CONFIGFILE" ] || { + echo "${prog}: Config file ${CONFIGFILE}: does not exist as a file" + false +} +configext=`echo $CONFIGFILE | sed -e 's/^.*\.//' | tr A-Z a-z` +[ "$configext" = "json" -o "$configext" = "yml" ] || { + echo "${prog}:" Config file type not recognized by extension: $configext + false +} +VOLOPTS="$VOLOPTS -v ${CONFIGFILE}:/app/midas-config.${configext}:ro" +ENVOPTS="-e OAR_MIDASSERVER_CONFIG=/app/midas-config.${configext}" + +if [ -d "$repodir/docs" ]; then + VOLOPTS="$VOLOPTS -v $repodir/docs:/docs" +fi + +[ -z "$STOREDIR" ] || { + [ -d "$STOREDIR" ] || { + parent=`dirname $STOREDIR` + if [ -d "$parent" ]; then + mkdir $STOREDIR + else + echo "${prog}: ${STOREDIR}: storage directory not found" + false + fi + } + sdir=`cd $STOREDIR; pwd` + VOLOPTS="$VOLOPTS -v ${sdir}:/data/midas" + ENVOPTS="$ENVOPTS -e OAR_WORKING_DIR=/data/midas" +} + +STOP_MONGO=true +if [ "$DBTYPE" = "mongo" ]; then + DOCKER_COMPOSE="docker compose" + (docker compose version > /dev/null 2>&1) || DOCKER_COMPOSE=docker-compose + ($DOCKER_COMPOSE version > /dev/null 2>&1) || { + echo ${prog}: docker compose required for -M + false + } + + + dc_vol_file=`mktemp --tmpdir --suffix=.yml docker-compose.volumes.XXXXXX` + cat > $dc_vol_file <> $dc_vol_file + echo " source: $sdir" >> $dc_vol_file + } + + # now launch the database in its own containers + echo '+' $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d + $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d + + function stop_mongo { + $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file + [ -f "$dc_vol_file" ] || rm $dc_vol_file; + } + STOP_MONGO=stop_mongo + + echo + echo NOTE: Visit http://localhost:8081/ to view MongoDB contents + echo +fi + +CONTAINER_NAME="midasserver" +function stop_server { + echo '+' docker kill $CONTAINER_NAME + docker kill $CONTAINER_NAME +} +trap "{ stop_server; $STOP_MONGO; }" EXIT TERM STOP + +echo '+' docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE +docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE + + +[ "$DBTYPE" != "mongo" ] || { + $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file down + [ ! -f "$dc_vol_file" ] || rm $dc_vol_file +} + diff --git a/oar-build/_dockbuild.sh b/oar-build/_dockbuild.sh index f4ba894..0f2cdc3 100644 --- a/oar-build/_dockbuild.sh +++ b/oar-build/_dockbuild.sh @@ -15,7 +15,9 @@ true ${OAR_BUILD_DIR:=$codedir/oar-build} true ${OAR_DOCKER_DIR:=$codedir/docker} true ${PACKAGE_NAME:=`basename $codedir`} -[ -z "$DOCKER_IMAGE_DIRS" ] && { +[ -n "$DOCKER_IMAGE_DIRS" ] || \ + DOCKER_IMAGE_DIRS=`echo $DEP_DOCKER_IMAGE_DIRS $EXEC_DOCKER_IMAGE_DIRS` +[ -n "$DOCKER_IMAGE_DIRS" ] || { for item in `ls $OAR_DOCKER_DIR`; do [ -d "$item" -a -f "$item/Dockerfile" ] && \ DOCKER_IMAGE_DIRS="$DOCKER_IMAGE_DIRS $item" @@ -32,17 +34,18 @@ function sort_build_images { # Input: list of the requested images (on the command line) # - if [ -z "$@" -o "$@" = ":" ]; then + if [ "$#" -eq 0 ]; then # no images are mentioned on the command line, build them all # out=$DOCKER_IMAGE_DIRS else # make sure we build them in the right order - # + # + imgs=:`echo $@ | tr ' ' :`: out= for img in $DOCKER_IMAGE_DIRS; do - (echo $@ | grep -qs ":${img}:") && \ + (echo $imgs | grep -qs ":${img}:") && \ out="$out $img" done fi @@ -50,13 +53,64 @@ function sort_build_images { echo $out } +function index_of_word { + args=(`echo $@`) + find=${args[0]} + words=(${args[@]:1}) + for i in "${!words[@]}"; do + if [ "${words[$i]}" == "$find" ]; then + echo $i + return 0 + fi + done +} + +function word_is_in { + words=:`echo $@ | sed -e 's/^.* //' -e 's/ /:/'`: + echo $words | grep -qs :$1: +} + +function dependency_images { + # check for exec image request; include all dependency image if match found + out= + for img in $@; do + i=`index_of_word $img $EXEC_DOCKER_IMAGE_DIRS` + [ -z "$i" ] || { + echo $DEP_DOCKER_IMAGE_DIRS + return 0 + } + done + + max= + for im in $@; do + i=`index_of_word $im $DEP_DOCKER_IMAGE_DIRS` + [ -z "$i" ] || ([ -n "$max" ] && [ "$i" -le "$max" ]) || max=$i + done + deps=($DEP_DOCKER_IMAGE_DIRS) + [ -z "$max" ] || out="${deps[@]:0:$max}" + echo $out +} + +function get_build_images_with_deps { + deps=`dependency_images $@` + out=:`echo $deps | tr ' ' :`: + for img in $@; do + (echo $out | grep -sq ":$img:") || out="${out}${img}:" + done + echo $out | tr : ' ' +} + function collect_build_opts { [ -n "$OAR_DOCKER_UID" ] || OAR_DOCKER_UID=`id -u` echo "--build-arg=devuid=$OAR_DOCKER_UID" } function setup_build { - BUILD_IMAGES=`sort_build_images $do_BUILD_IMAGES` + if [ -n "$DODEPS" ]; then + BUILD_IMAGES=`get_build_images_with_deps $do_BUILD_IMAGES` + else + BUILD_IMAGES=`sort_build_images $do_BUILD_IMAGES` + fi BUILD_OPTS=`collect_build_opts` } @@ -69,7 +123,7 @@ function help { CL4LOG=$@ -do_BUILD_IMAGES=":" +do_BUILD_IMAGES= while [ "$1" != "" ]; do case "$1" in --logfile=*) @@ -79,6 +133,9 @@ while [ "$1" != "" ]; do shift LOGPATH=$1 ;; + --build-dependencies|-d) + DODEPS=-d + ;; --quiet|-q) QUIET=-q ;; @@ -91,7 +148,7 @@ while [ "$1" != "" ]; do false ;; *) - do_BUILD_IMAGES="${do_BUILD_IMAGES}${1}:" + do_BUILD_IMAGES=`echo ${do_BUILD_IMAGES} ${1}` ;; esac shift diff --git a/oar-build/dockbuild_help.txt b/oar-build/dockbuild_help.txt index e8d8d3f..4abec54 100644 --- a/oar-build/dockbuild_help.txt +++ b/oar-build/dockbuild_help.txt @@ -5,8 +5,9 @@ Usage: %PROG% [-lq] [image_dir ...] Options: --logfile=FILEPATH, -l FILEPATH Log file to record build output to + --build-dependencies, -d Build all dependencies of the listed images as well --quiet, -q Suppress messages to terminal Arguments: - image_dir ... names of docker directories for the images that should - (re-)build; only these will be built. + image_dir ... names of docker directories for the images that should (re-)build + diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index c439d6d..a51f3d7 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -196,8 +196,10 @@ fact = fsbased.FSBasedDBClientFactory(config, "./db") """ -from .base import * -from . import mongo +from .base import * +from .mongo import MongoDBClientFactory +from .inmem import InMemoryDBClientFactory +from .fsbased import FSBasedDBClientFactory -MIDASDBClientFactory = mongo.MongoDBClientFactory +MIDASDBClientFactory = MongoDBClientFactory diff --git a/python/nistoar/midas/dbio/wsgi/__init__.py b/python/nistoar/midas/dbio/wsgi/__init__.py index 42a2183..d32d042 100644 --- a/python/nistoar/midas/dbio/wsgi/__init__.py +++ b/python/nistoar/midas/dbio/wsgi/__init__.py @@ -42,6 +42,11 @@ ``services`` (object) _required_. an object in which each property is a service name (as referred to above in the API endpoint pattern--e.g., "dmp" or "dap"), and its value is the configuration for that service. +``dbio`` + (object) _recommended_. an object that provides configuration for the DBIO client; typically, this + includes a ``factory`` property whose string value identifies the type of + backend storage to use ("mongo", "fsbased", or "inmem"). The other properties + are the parameters that are specific to the backend storage. Most of the properties in a service configuration object will be treated as default configuration parameters for configuring a particular version, or _convention_, of the service. Convention-level @@ -66,6 +71,10 @@ ``conventions`` field described above) that should be considered the default convention. If a client requests the special convention name "def", the request will be routed to the version of the service with that name. +``dbio`` + (object) _recommended_. the configuration parameters for the DBIO client which are specific to the + project service type (see below). In particular, this includes the authorization configurations; + see the :py:module:`dbio module documentation ` for this schema. There are two common properties that can appear in either the service or convention level (or both, where the convention level takes precedence): ``project_name`` and ``type``. These optional properties are diff --git a/python/nistoar/midas/dbio/wsgi/wsgiapp.py b/python/nistoar/midas/dbio/wsgi/wsgiapp.py index 482af4d..c99d0df 100644 --- a/python/nistoar/midas/dbio/wsgi/wsgiapp.py +++ b/python/nistoar/midas/dbio/wsgi/wsgiapp.py @@ -36,6 +36,8 @@ from . import project as prj, SubApp, Handler, DBIOHandler from ..base import DBClientFactory from ..inmem import InMemoryDBClientFactory +from ..fsbased import FSBasedDBClientFactory +from ..mongo import MongoDBClientFactory from nistoar.pdr.publish.prov import PubAgent from nistoar.base.config import ConfigurationException, merge_config @@ -45,6 +47,7 @@ DEF_BASE_PATH = "/midas/" DEF_DBIO_CLIENT_FACTORY_CLASS = InMemoryDBClientFactory +DEF_DBIO_CLIENT_FACTORY_NAME = "inmem" class SubAppFactory: """ @@ -351,6 +354,12 @@ class MIDASApp: included. """ + DB_FACTORY_CLASSES = { + "inmem": InMemoryDBClientFactory, + "fsbased": FSBasedDBClientFactory, + "mongo": MongoDBClientFactory + } + def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, base_ep: str=None, subapp_factory_funcs: Mapping=None): """ @@ -384,7 +393,13 @@ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, subapp_factory_funcs = _MIDASSubApps if not dbio_client_factory: - dbio_client_factory = DEF_DBIO_CLIENT_FACTORY_CLASS(self.cfg.get('dbio', {})) + dbclsnm = self.cfg.get('dbio', {}).get('factory') + if not dbclsnm: + dbclsnm = DEF_DBIO_CLIENT_FACTORY_NAME + dbcls = self.DB_FACTORY_CLASSES.get(dbclsnm) + if dbcls: + dbcls = DEF_DBIO_CLIENT_FACTORY_CLASS + dbio_client_factory = dbcls(self.cfg.get('dbio', {})) factory = SubAppFactory(self.cfg, subapp_factory_funcs) self.subapps = factory.create_suite(log, dbio_client_factory) @@ -421,7 +436,7 @@ def handle_request(self, env, start_resp): if self.base_ep: if len(path) < len(self.base_ep) or path[:len(self.base_ep)] != self.base_ep: # path does not match the required base endpoint path - return Handler(path, env, start_resp).send_error(404, "Not Found", ashead=ashead) + return Handler(path, env, start_resp).send_error(404, "Not Found") # lop off the base endpoint path path = path[len(self.base_ep):] diff --git a/python/setup.py b/python/setup.py index 08af2a2..5c8dab0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -17,7 +17,7 @@ SCRIPTS = [ 'pdr.py', 'pdrhealthcheck.py', - 'resolver-uwsgi.py' + 'resolver-uwsgi.py', 'pdp-uwsgi.py', 'midas-uwsgi.py' ] TESTSCRIPTS = [ From 0e8c9f11baba8af79a5382a2a1ff018a34708e4d Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 23 Nov 2022 13:29:15 -0500 Subject: [PATCH 016/123] midasserver: add start/stop feature --- docker/midasserver/run.sh | 57 ++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 01b26cc..1d9ff65 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -23,7 +23,7 @@ function usage { $prog - launch a docker container running the midas web server SYNOPSIS - $prog [-b|--build] [-D|--docker-build] [-c|--config-file FILE] [-M|--use-mongodb] [DIR] + $prog [-b|--build] [-D|--docker-build] [-c|--config-file FILE] [-M|--use-mongodb] [DIR] [start|stop] EOF } @@ -70,6 +70,13 @@ while [ "$1" != "" ]; do echo "${prog}: unsupported option:" $1 false ;; + start|stop) + [ -z "$ACTION" ] || { + echo "${prog}: Action $ACTION already set; provide only one" + false + } + ACTION=`echo $1 | tr A-Z a-z` + ;; *) [ -z "$STOREDIR" ] || { echo "${prog}: DIR already set to $STOREDIR; unsupported extra argument:" $1 @@ -80,6 +87,7 @@ while [ "$1" != "" ]; do esac shift done +[ -n "$ACTION" ] || ACTION=start ([ -z "$DOPYBUILD" ] && [ -e "$repodir/dist" ]) || { echo '+' scripts/install.sh --prefix=$repodir/dist/pdr @@ -134,33 +142,38 @@ if [ "$DBTYPE" = "mongo" ]; then echo ${prog}: docker compose required for -M false } - dc_vol_file=`mktemp --tmpdir --suffix=.yml docker-compose.volumes.XXXXXX` cat > $dc_vol_file <> $dc_vol_file - echo " source: $sdir" >> $dc_vol_file + echo " device: $sdir" >> $dc_vol_file } + source $dockerdir/midasserver/mongo/mongo.env + + [ "$ACTION" = "stop" ] || { + # now launch the database in its own containers + echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d + $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d - # now launch the database in its own containers - echo '+' $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d - $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d + echo + echo NOTE: Visit http://localhost:8081/ to view MongoDB contents + echo + } function stop_mongo { - $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file + echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file down + $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file down [ -f "$dc_vol_file" ] || rm $dc_vol_file; } STOP_MONGO=stop_mongo - - echo - echo NOTE: Visit http://localhost:8081/ to view MongoDB contents - echo fi CONTAINER_NAME="midasserver" @@ -168,14 +181,14 @@ function stop_server { echo '+' docker kill $CONTAINER_NAME docker kill $CONTAINER_NAME } -trap "{ stop_server; $STOP_MONGO; }" EXIT TERM STOP - -echo '+' docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE -docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE - - -[ "$DBTYPE" != "mongo" ] || { - $DOCKER_COMPOSE -f $dockerdir/mongo/docker-compose.mongo.yml -f $dc_vol_file down - [ ! -f "$dc_vol_file" ] || rm $dc_vol_file -} +trap "{ stop_server; $STOP_MONGO; }" TERM STOP + +if [ "$ACTION" = "stop" ]; then + echo Shutting down the midas server... + stop_server || true + $STOP_MONGO +else + echo '+' docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE + docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE +fi From 860fe3c067acb3a0db0cd18155c249973a974db1 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 23 Nov 2022 13:40:17 -0500 Subject: [PATCH 017/123] midas-dmp_conf.yml: change default shoulder to mdm1 --- docker/midasserver/midas-dmp_conf.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/midasserver/midas-dmp_conf.yml b/docker/midasserver/midas-dmp_conf.yml index ddbf672..f58b62e 100644 --- a/docker/midasserver/midas-dmp_conf.yml +++ b/docker/midasserver/midas-dmp_conf.yml @@ -18,12 +18,12 @@ services: midas: default_shoulder: mdm1 default: - default_shoulder: mdm0 + default_shoulder: mdm1 dbio: - superusers: [ "rlp" ] - allowed_project_shoulders: ["mdm1", "spc1"] - default_shoulder: mdm0 + superusers: [ "rlp3" ] + allowed_project_shoulders: ["mdm0", "mdm1"] + default_shoulder: mdm1 default_convention: mdm1 conventions: From 88bb027ccc52bb95d99e02aa4ab116a20138d4a7 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 26 Nov 2022 11:08:21 -0500 Subject: [PATCH 018/123] docker/midasserver: enable and debug mongo backend --- docker/midasserver/entrypoint.sh | 6 +- docker/midasserver/midas-dmp_conf.yml | 1 + .../mongo/docker-compose.mongo.yml | 30 +++++ docker/midasserver/mongo/mongo-init.sh | 11 ++ docker/midasserver/mongo/mongo.env | 8 ++ docker/midasserver/run.sh | 39 +++--- python/nistoar/midas/dbio/mongo.py | 27 ++-- python/nistoar/midas/dbio/wsgi/broker.py | 3 +- python/nistoar/midas/dbio/wsgi/project.py | 1 + python/tests/nistoar/midas/dbio/test_mongo.py | 2 +- .../midas/dbio/wsgi/test_project_midas.py | 117 ++++++++++++++++++ 11 files changed, 206 insertions(+), 39 deletions(-) create mode 100644 docker/midasserver/mongo/docker-compose.mongo.yml create mode 100644 docker/midasserver/mongo/mongo-init.sh create mode 100644 docker/midasserver/mongo/mongo.env create mode 100644 python/tests/nistoar/midas/dbio/wsgi/test_project_midas.py diff --git a/docker/midasserver/entrypoint.sh b/docker/midasserver/entrypoint.sh index c7ce4d0..bdac3ef 100644 --- a/docker/midasserver/entrypoint.sh +++ b/docker/midasserver/entrypoint.sh @@ -4,13 +4,13 @@ port=9091 script=/dev/oar-pdr-py/scripts/midas-uwsgi.py [ -f "$script" ] || script=/app/dist/pdr/bin/midas-uwsgi.py -[ -n "$OAR_WORKING_DIR" ] || OAR_WORKING_DIR=`mktemp -d _midasserver.XXXXX` +[ -n "$OAR_WORKING_DIR" ] || OAR_WORKING_DIR=`mktemp --tmpdir -d _midasserver.XXXXX` [ -d "$OAR_WORKING_DIR" ] || { echo midasserver: ${OAR_WORKING_DIR}: working directory does not exist exit 10 } [ -n "$OAR_LOG_DIR" ] || export OAR_LOG_DIR=$OAR_WORKING_DIR -[ -n "$OAR_MIDASSERVER_CONFIG" ] || OAR_MIDASSERVER_CONFIG=/apps/midas-config.yml +[ -n "$OAR_MIDASSERVER_CONFIG" ] || OAR_MIDASSERVER_CONFIG=/app/midas-config.yml echo echo Working Dir: $OAR_WORKING_DIR @@ -19,7 +19,7 @@ echo opts= oar_midas_db_type=$1 -[ -z "$oar_midas_db_type" ] || opts="--set-ph oar_midas_db_type=$oar-midas_db_type" +[ -z "$oar_midas_db_type" ] || opts="--set-ph oar_midas_db_type=$oar_midas_db_type" uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=$PWD/docs \ --set-ph oar_config_file=$OAR_MIDASSERVER_CONFIG \ diff --git a/docker/midasserver/midas-dmp_conf.yml b/docker/midasserver/midas-dmp_conf.yml index f58b62e..b72b907 100644 --- a/docker/midasserver/midas-dmp_conf.yml +++ b/docker/midasserver/midas-dmp_conf.yml @@ -1,4 +1,5 @@ logfile: midas-dmp.log +loglevel: DEBUG dbio: factory: fsbased about: diff --git a/docker/midasserver/mongo/docker-compose.mongo.yml b/docker/midasserver/mongo/docker-compose.mongo.yml new file mode 100644 index 0000000..f0f5898 --- /dev/null +++ b/docker/midasserver/mongo/docker-compose.mongo.yml @@ -0,0 +1,30 @@ +version: "3" +services: + mongodb: + image: mongo:${MONGO_VERSION} + container_name: midas_mongodb + restart: always + ports: + - 27017:27017 + environment: + - MONGO_INITDB_ROOT_USERNAME=${OAR_MONGODB_ADMIN_USER} + - MONGO_INITDB_ROOT_PASSWORD=${OAR_MONGODB_ADMIN_PASS} + - MONGO_INITDB_DATABASE=${OAR_MONGODB_DBNAME} + - MONGO_USER=${OAR_MONGODB_USER} + - MONGO_PASS=${OAR_MONGODB_PASS} + volumes: + - ${OAR_MONGODB_DBDIR}:/data/db + - ./mongo-init.sh:/docker-entrypoint-initdb.d/mongo-init.sh:ro + command: "--auth" + + mongo-express: + image: mongo-express + restart: always + ports: + - 8081:8081 + links: + - mongodb + environment: + ME_CONFIG_MONGODB_ADMINUSERNAME: ${OAR_MONGODB_ADMIN_USER} + ME_CONFIG_MONGODB_ADMINUSERNAME: ${OAR_MONGODB_ADMIN_PASS} + ME_CONFIG_MONGODB_URL: mongodb://${OAR_MONGODB_ADMIN_USER}:${OAR_MONGODB_ADMIN_PASS}@mongodb:27017/ diff --git a/docker/midasserver/mongo/mongo-init.sh b/docker/midasserver/mongo/mongo-init.sh new file mode 100644 index 0000000..8627396 --- /dev/null +++ b/docker/midasserver/mongo/mongo-init.sh @@ -0,0 +1,11 @@ +echo "Creating curator user..." +echo ' + use '${OAR_MONGODB_DBNAME}' + db.createUser( + { + user: "'${OAR_MONGODB_USER}'", + pwd: "'${OAR_MONGODB_PASS}'", + roles: [ "readWrite" ] + } + ) + exit' | mongo diff --git a/docker/midasserver/mongo/mongo.env b/docker/midasserver/mongo/mongo.env new file mode 100644 index 0000000..5993cd5 --- /dev/null +++ b/docker/midasserver/mongo/mongo.env @@ -0,0 +1,8 @@ +set -a +OAR_MONGODB_ADMIN_USER=admin +OAR_MONGODB_ADMIN_PASS=admin +OAR_MONGODB_USER=oarop +OAR_MONGODB_PASS=oarop +OAR_MONGODB_DBNAME=midas +MONGO_VERSION=4.4.8 +set +a diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 1d9ff65..6f17a2a 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -134,6 +134,7 @@ fi ENVOPTS="$ENVOPTS -e OAR_WORKING_DIR=/data/midas" } +NETOPTS= STOP_MONGO=true if [ "$DBTYPE" = "mongo" ]; then DOCKER_COMPOSE="docker compose" @@ -142,26 +143,23 @@ if [ "$DBTYPE" = "mongo" ]; then echo ${prog}: docker compose required for -M false } - - dc_vol_file=`mktemp --tmpdir --suffix=.yml docker-compose.volumes.XXXXXX` - cat > $dc_vol_file <> $dc_vol_file - } + + echo '+' source $dockerdir/midasserver/mongo/mongo.env source $dockerdir/midasserver/mongo/mongo.env + [ -n "$STOREDIR" -o "$ACTION" = "stop" ] || { + echo ${prog}: DIR argument must be provided with -M/--use-mongo + false + } + export OAR_MONGODB_DBDIR=`cd $STOREDIR; pwd`/mongo + + NETOPTS="--network=mongo_default --link midas_mongodb:mongodb" + ENVOPTS="$ENVOPTS -e OAR_MONGODB_HOST=mongodb -e OAR_MONGODB_USER=oarop" + [ "$ACTION" = "stop" ] || { # now launch the database in its own containers - echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d - $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file up -d + echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml up -d + $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml up -d echo echo NOTE: Visit http://localhost:8081/ to view MongoDB contents @@ -169,9 +167,8 @@ EOF } function stop_mongo { - echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file down - $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml -f $dc_vol_file down - [ -f "$dc_vol_file" ] || rm $dc_vol_file; + echo '+' $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml down + $DOCKER_COMPOSE -f $dockerdir/midasserver/mongo/docker-compose.mongo.yml down } STOP_MONGO=stop_mongo fi @@ -188,7 +185,7 @@ if [ "$ACTION" = "stop" ]; then stop_server || true $STOP_MONGO else - echo '+' docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE - docker run $ENVOPTS $VOLOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE + echo '+' docker run $ENVOPTS $VOLOPTS $NETOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE + docker run $ENVOPTS $VOLOPTS $NETOPTS -p 127.0.0.1:9091:9091/tcp --rm --name=$CONTAINER_NAME $PACKAGE_NAME/midasserver $DBTYPE fi diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index bd6ebef..87960e6 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -63,7 +63,7 @@ def native(self): the native pymongo database object that contains the DBIO collections. Accessing this property will implicitly connect this client to the underlying MongoDB database. """ - if not self._native: + if self._native is None: self.connect() return self._native @@ -71,7 +71,7 @@ def _upsert(self, collname: str, recdata: Mapping) -> bool: try: id = recdata['id'] except KeyError as ex: - raise DBIOException("_upsert(): record is missing required 'id' property") + raise base.DBIOException("_upsert(): record is missing required 'id' property") key = {"id": id} try: @@ -81,10 +81,10 @@ def _upsert(self, collname: str, recdata: Mapping) -> bool: result = coll.replace_one(key, recdata, upsert=True) return result.matched_count == 0 - except DBIOException as ex: + except base.DBIOException as ex: raise except Exception as ex: - raise DBIOException("Failed to load record with id=%s: %s" % (id, str(ex))) + raise base.DBIOException("Failed to load record with id=%s: %s" % (id, str(ex))) def _next_recnum(self, shoulder): key = {"slot": shoulder} @@ -102,10 +102,10 @@ def _next_recnum(self, shoulder): result = coll.find_one_and_update(key, {"$inc": {"next": 1}}) return result["next"] - except DBIOException as ex: + except base.DBIOException as ex: raise except Exception as ex: - raise DBIOException("Failed to access named sequence, =%s: %s" % (shoulder, str(ex))) + raise base.DBIOException("Failed to access named sequence, =%s: %s" % (shoulder, str(ex))) def _get_from_coll(self, collname, id) -> MutableMapping: key = {"id": id} @@ -117,7 +117,7 @@ def _get_from_coll(self, collname, id) -> MutableMapping: return coll.find_one(key, {'_id': False}) except Exception as ex: - raise DBIOException("Failed to access record with id=%s: %s" % (id, str(ex))) + raise base.DBIOException("Failed to access record with id=%s: %s" % (id, str(ex))) def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: try: @@ -128,7 +128,7 @@ def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping] yield rec except Exception as ex: - raise DBIOException("Failed while selecting records: " + str(ex)) + raise base.DBIOException("Failed while selecting records: " + str(ex)) def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMapping]: try: @@ -139,7 +139,7 @@ def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMappi yield rec except Exception as ex: - raise DBIOException("Failed while selecting records: " + str(ex)) + raise base.DBIOException("Failed while selecting records: " + str(ex)) def _delete_from(self, collname, id): key = {"id": id} @@ -151,7 +151,7 @@ def _delete_from(self, collname, id): return results.deleted_count > 0 except Exception as ex: - raise DBIOException("Failed while deleting record with id=%s: %s" % (id, str(ex))) + raise base.DBIOException("Failed while deleting record with id=%s: %s" % (id, str(ex))) def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base.ProjectRecord]: if isinstance(perm, str): @@ -174,7 +174,7 @@ def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base. yield base.ProjectRecord(self._projcoll, rec) except Exception as ex: - raise base.DBIOException("Failed while selecting records: " + str(ex)) + raise base.DBIOException("Failed while selecting records: " + str(ex), cause=ex) class MongoDBClientFactory(base.DBClientFactory): """ @@ -213,6 +213,7 @@ def __init__(self, config: Mapping, dburl: str = None): dburl) self._dburl = dburl - def create_client(self, servicetype: str, foruser: str = base.ANONYMOUS): - return MongoDBClient(self._dburl, self._cfg, servicetype, foruser) + def create_client(self, servicetype: str, config: Mapping = {}, foruser: str = base.ANONYMOUS): + cfg = merge_config(config, deepcopy(self._cfg)) + return MongoDBClient(self._dburl, cfg, servicetype, foruser) diff --git a/python/nistoar/midas/dbio/wsgi/broker.py b/python/nistoar/midas/dbio/wsgi/broker.py index 095ca53..14167c1 100644 --- a/python/nistoar/midas/dbio/wsgi/broker.py +++ b/python/nistoar/midas/dbio/wsgi/broker.py @@ -75,12 +75,13 @@ def _get_id_shoulder(self, user: PubAgent): if client_ctl is None: client_ctl = self.cfg.get('clients', {}).get("default") if client_ctl is None: - self.log.info("No default ID shoulder configured for client group, %s", user.group) + self.log.debug("Unrecognized client group, %s", user.group) raise NotAuthorized(user.actor, "create record", "Client group, %s, not recognized" % user.group) out = client_ctl.get('default_shoulder') if not out: + self.log.info("No default ID shoulder configured for client group, %s", user.group) raise NotAuthorized(user.actor, "create record", "No default shoulder defined for client group, "+user.group) return out diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 8d80caf..9c7d547 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -455,6 +455,7 @@ def do_POST(self, path): try: prec = self._pbrkr.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) except dbio.NotAuthorized as ex: + self.log.debug("Authorization failure: "+str(ex)) return self.send_unauthorized() except dbio.AlreadyExists as ex: return self.send_error_resp(400, "Name already in use", str(ex)) diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index 1f7c2b3..caee83c 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -310,7 +310,7 @@ def setUp(self): self.cfg = { "default_shoulder": "pdr0" } self.fact = mongo.MongoDBClientFactory(self.cfg, dburl) self.user = "nist0:ava1" - self.cli = self.fact.create_client(base.DMP_PROJECTS, self.user) + self.cli = self.fact.create_client(base.DMP_PROJECTS, {}, self.user) self.dbg = self.cli.groups def tearDown(self): diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project_midas.py b/python/tests/nistoar/midas/dbio/wsgi/test_project_midas.py new file mode 100644 index 0000000..cc670a5 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project_midas.py @@ -0,0 +1,117 @@ +import os, json, pdb, logging, tempfile +from collections import OrderedDict +from io import StringIO +import unittest as test + +from nistoar.midas.dbio import inmem, base, mongo +from nistoar.midas.dbio.wsgi import project as prj +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_project.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_project.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +dburl = None +if os.environ.get('MONGO_TESTDB_URL'): + dburl = os.environ.get('MONGO_TESTDB_URL') + +@test.skipIf(not os.environ.get('MONGO_TESTDB_URL'), "test mongodb not available") +class TestMIDASProjectAppMongo(test.TestCase): + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.cfg = { + "broker": { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + } + }, + "dbio": { + "superusers": [ "rlp" ], + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + } + self.dbfact = mongo.MongoDBClientFactory({}, os.environ['MONGO_TESTDB_URL']) + self.app = prj.MIDASProjectApp(base.DMP_PROJECTS, rootlog.getChild("dmpapi"), self.dbfact, self.cfg) + self.resp = [] + self.rootpath = "/midas/dmp/" + + def create_record(self, name="goob", meta=None): + cli = self.dbfact.create_client(base.DMP_PROJECTS, self.cfg["dbio"], nistr.actor) + out = cli.create_record(name, "mdm1") + if meta: + out.meta = meta + out.save() + return out + + def test_create(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("400 ", self.resp[0]) + + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "big") + self.assertEqual(resp['owner'], "nstr1") + self.assertEqual(resp['id'], "mdm1:0001") + self.assertEqual(resp['data'], {"color": "red"}) + self.assertEqual(resp['meta'], {}) + + + + +if __name__ == '__main__': + test.main() + + From 5013589ce29ef6ce2d84a4e888abf5de47f95242 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 26 Nov 2022 11:28:14 -0500 Subject: [PATCH 019/123] docker/midasserver: add usage doc --- docker/midasserver/run.sh | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 6f17a2a..43b3c9e 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -23,7 +23,32 @@ function usage { $prog - launch a docker container running the midas web server SYNOPSIS - $prog [-b|--build] [-D|--docker-build] [-c|--config-file FILE] [-M|--use-mongodb] [DIR] [start|stop] + $prog [-b|--build] [-D|--docker-build] [-c|--config-file FILE] + [-M|--use-mongodb] [DIR] [start|stop] + +ARGUMENTS + start Start the service; this is the default if the + start|stop argument is not provided. + stop Stop the running service. If -M was used to + start the service, it must also provided when + stopping it. + DIR a directory where the database data backing the + server will be stored. If not provided, a + temporary directory within the midasserver + container will be used. Provide this if you want + look at the database contents directly. + -b, --build Rebuild the python library and install into dist; + This is done automatically if the dist directory + does not exist. + -D, --docker-build Rebuild the midasserver docker image; this is + done automatically if the midasserver image + does not exist. + -c FILE, --config-file FILE Use a custom service configuration given in FILE. + This file must be in YAML or JSON format. + Defaut: docker/midasserver/midas-dmp_config.yml + -M, --use-mongodb Use a MongoDB backend; DIR must also be provided. + If not set, a file-based database (using JSON + files) will be used, stored under DIR/dbfiles. EOF } @@ -66,6 +91,10 @@ while [ "$1" != "" ]; do -M|--use-mongo) DBTYPE="mongo" ;; + -h|--help) + usage + exit + ;; -*) echo "${prog}: unsupported option:" $1 false @@ -89,7 +118,7 @@ while [ "$1" != "" ]; do done [ -n "$ACTION" ] || ACTION=start -([ -z "$DOPYBUILD" ] && [ -e "$repodir/dist" ]) || { +([ -z "$DOPYBUILD" ] && [ -e "$repodir/dist/pdr" ]) || { echo '+' scripts/install.sh --prefix=$repodir/dist/pdr $repodir/scripts/install.sh --prefix=$repodir/dist/pdr } From cc51dabcab5c34a40b974d563e46a72fc6b83a06 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 26 Nov 2022 18:59:01 -0500 Subject: [PATCH 020/123] midasserver: enable html api documentation --- docker/midasserver/entrypoint.sh | 2 +- docker/midasserver/midas-dmp_conf.yml | 12 +- docker/midasserver/run.sh | 3 +- docs/dmpsvc-elements.html | 11 + docs/dmpsvc-openapi.yml | 346 ++++++++++++++++++++++++++ scripts/midasserver | 16 ++ 6 files changed, 382 insertions(+), 8 deletions(-) create mode 100644 docs/dmpsvc-elements.html create mode 100644 docs/dmpsvc-openapi.yml create mode 100755 scripts/midasserver diff --git a/docker/midasserver/entrypoint.sh b/docker/midasserver/entrypoint.sh index bdac3ef..24d0417 100644 --- a/docker/midasserver/entrypoint.sh +++ b/docker/midasserver/entrypoint.sh @@ -21,6 +21,6 @@ opts= oar_midas_db_type=$1 [ -z "$oar_midas_db_type" ] || opts="--set-ph oar_midas_db_type=$oar_midas_db_type" -uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=$PWD/docs \ +uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=/docs \ --set-ph oar_config_file=$OAR_MIDASSERVER_CONFIG \ --set-ph oar_working_dir=$OAR_WORKING_DIR $opts diff --git a/docker/midasserver/midas-dmp_conf.yml b/docker/midasserver/midas-dmp_conf.yml index b72b907..7b16b32 100644 --- a/docker/midasserver/midas-dmp_conf.yml +++ b/docker/midasserver/midas-dmp_conf.yml @@ -4,15 +4,15 @@ dbio: factory: fsbased about: title: "MIDAS Authoring Services" - describedBy: "https://midas3.nist.gov/midas/apidocs" - href: "http://midas3.nist.gov/midas/dmp" + describedBy: "http://localhost:9091/midas/docs" + href: "http://localhost:9091/midas/" services: dmp: about: message: "DMP Service is available" title: "Data Management Plan (DMP) Authoring API" - describedBy: "https://midas3.nist.gov/midas/apidocs" - href: "http://midas3.nist.gov/midas/dmp" + describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" + href: "http://localhost:9091/midas/dmp" broker: clients: @@ -31,6 +31,6 @@ services: mdm1: about: title: "Data Management Plan (DMP) Authoring API (mdm1 convention)" - describedBy: "https://midas3.nist.gov/midas/apidocs/dmp/mdm1" - href: "http://midas3.nist.gov/midas/dmp/mdm1" + describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" + href: "http://localhost:9091/midas/dmp/mdm1" version: mdm1 diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 43b3c9e..56a3081 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -23,7 +23,7 @@ function usage { $prog - launch a docker container running the midas web server SYNOPSIS - $prog [-b|--build] [-D|--docker-build] [-c|--config-file FILE] + $prog [-h|--help] [-b|--build] [-D|--docker-build] [-c|--config-file FILE] [-M|--use-mongodb] [DIR] [start|stop] ARGUMENTS @@ -49,6 +49,7 @@ ARGUMENTS -M, --use-mongodb Use a MongoDB backend; DIR must also be provided. If not set, a file-based database (using JSON files) will be used, stored under DIR/dbfiles. + -h, --help Print this text to the terminal and then exit EOF } diff --git a/docs/dmpsvc-elements.html b/docs/dmpsvc-elements.html new file mode 100644 index 0000000..8a6e19c --- /dev/null +++ b/docs/dmpsvc-elements.html @@ -0,0 +1,11 @@ + +MIDAS Data Management Planning + + + + + + + + + diff --git a/docs/dmpsvc-openapi.yml b/docs/dmpsvc-openapi.yml new file mode 100644 index 0000000..fbb97e7 --- /dev/null +++ b/docs/dmpsvc-openapi.yml @@ -0,0 +1,346 @@ +openapi: 3.1.0 +info: + title: MIDAS Data Management Plan Service (DMPS) Interface, Convention MDM1 + summary: A service allows the creation, updating, and publishing of Data Management Plans (DMPs) + description: |- + A client uses the DMPS interface to create and edit metadata that describe a Data Management + Plan. Clients are expected to be either automated systems or user-driven, interactive tools. + + contact: + name: MIDAS support office + email: datasupport@nist.gov + + version: 0.1 + +servers: + - url: https://localhost/midas/dmp + +paths: + /mdm1: + summary: the resource representing the full collection of DMPs + get: + summary: return all or subset of DMP records readable by the requesting user + responses: + "200": + description: + A list of the matched DMP records. See the DMP record description for details. + content: + "application/json": + schema: + description: a list of matched DMP records. + type: array + items: { "type": { "$ref": "#/components/schemas/ProjectRecord" } } + examples: + "empty": + summary: no matching DMPs found + value: '[]' + post: + summary: create a new DMP record + description: + The client provides a name and initial data to be used to create the new record. The + server will assign a new identifier to the newly created record, and returns the actual + ProjectRecord saved as a result of the request. + parameters: [ ] # parameters for selecting matching records. + + requestBody: + summary: the data that should be used to initialize the newly created record + description: + This body contains the initial data for the record. It must include the human-oriented + name to assign to the record. The data field is optionaly, but if given, it must only + include properties that are part of the DMP schema. Properties provide within the optional + meta field are advisory, only. + content: + "application/json": + schema: + "$ref": "#/components/schemas/CreateRecordRequest" + responses: + "201": + description: + The request was accepted and a DMP record was created. The response will include the + identifier assigned to the DMP. + content: + "applicaiton/json": + schema: + "$ref": "#/components/schemas/ProjectRecord" + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as JSON + * The JSON document in the request body was not compliant with the CreateRecordRequest + schema, such as not including the name field, or including unrecognized properties as + part of the data or meta fields. + * The requested name is already applied to another record owned by the user + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The client did not submit recognized credentials and thus is not authorized + to create DMP records. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mdm1/{projid}: + summary: access to a specific DMP record + parameters: + "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record + get: + summary: Return the DMP record with the specified identifier. + description: + The returns the DMP porject record where the data property contains the data consituting the + contents of the plan. + responses: + "200": + description: The DMP draft record with the given identifier was found and returned + content: + "application/json": + schema: + "$ref": "#/components/schemas/ProjectRecord" + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mdm1/{projid}/data: + summary: the data describing the contents of a Data Management Plan (DMP) + parameters: + "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record + get: + summary: return the DMP record contents + description: + this returns the contents of the "data" property that is returned by the "/mdm1/{projid}" + endpoint. + responses: + "200": + description: The DMP record with the given identifier was found and it data content was returned + content: + "application/json": + schema: + "type": object + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + put: + summary: replace the DMP record data contents + description: + This replaces the contents of the "data" property of the DMP record with the given JSON object. + All sub-properties that were previously stored under "data" will be removed (see also PATCH). + requestBody: + summary: the data that should replace the currently stored data content + description: + The request message body is a JSON object whose properties represent the content of the + data management plan. This JSON object will completely replace the data object previously + saved for this record. + content: + "application/json": + schema: + type: object + responses: + "200": + description: The DMP record with the given identifier was found and it data content was updated and returned + content: + "application/json": + schema: + "type": object + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + patch: + summary: update the DMP record data contents + description: + This merges the input data representing a partial update to the contents of the "data" property + into the data already stored. Any sub-properties that match those in the input object will + updated with the given values; any previously stored properties not provided in the input will + remain unchanged. Input subproperty object values can also be partially complete; these will be + correspondingly merged hierarchically. + requestBody: + summary: the data that should be used to initialize the newly created record + description: + The request message body is a JSON object whose properties represent some portion of the content + of the data management plan. This JSON object will merged in with the data object previously + saved for this record. + content: + "application/json": + schema: + type: object + responses: + "200": + description: The DMP record with the given identifier was found and it data content was updated and returned + content: + "application/json": + schema: + "type": object + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mdm1/{projid}/name: + summary: the mnemonic name for the DMP record with the given identifier + parameters: + "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record + get: + summary: "return the DMP record's mnemonic name" + description: + this returns the value of the "name" property that is returned by the "/mdm1/{projid}" + endpoint. + responses: + "200": + description: The DMP record was found and its name was returned. + content: + "application/json": + schema: + "type": string + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + put: + summary: Change the name of the DMP record with the given identifier + description: This endpoint method is used to change the mnemonic name assigned to the record. + requestBody: + summary: the new name to assign to the DMP + content: + "application/json": + schema: + type: string + responses: + "200": + description: the record was found, the name was successfully changed and the new name returned + content: + "application": + "type": string + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + +components: + parameters: + projid: + name: projid + in: path + description: + The identifer used to refer to a DMP + required: true + schema: + type: string + + schemas: + ErrorResponse: + title: a JSON-encoded error response + description: + This is a JSON-encoded description of an error detected by the service while processing + a service request. + properties: + "http:code": + description: the HTTP status code response + type: integer + minimum: 400 + exclusiveMaximum: 600 + "http:reason": + description: the (brief) HTTP status message associated with the code + type: string + "pdr:message": + description: + a more detailed explanation of the error detected. This message may be quite lengthy. + type: string + "pdr:sipid": + description: + the identifier for the SIP being accessed, if known and exists. + required: [ 'http:code', 'http:reason', 'pdr:message' ] + + ProjectRecord: + title: a JSON-encoded DBIO project record + description: + This record describes a project record being drafted by a client + properties: + "name": + description: the mnemonic, user-chosen name for the record + type: string + minimum: 1 + required: true + "id": + description: the unique identifier assigned to the record by the system at its creation + type: string + minimum: 3 + required: true + "owner": + description: the identifier of the user that is primarily responsible for maintaining this record + type: string + minimum: 1 + required: true + "created": + description: the epoch date-time that this record was created + type: integer + required: true + "created_date": + description: the ISO 8601-formatted data-time that this record was created + type: string + "curators": + description: + the list of IDs for people who have been assigned as curators for this record; it will be empty + if no curators are currently assigned. + type: array + items: { type: string } + "deactivated": + description: + a boolean set to True if this record has been deactivated, preventing any further editing or + listing + type: boolean + "acl": + description: the access control lists associated with this record + type: { $ref: ACLs } + "data": + description: the actual DMP record data set by the client + type: object + "meta": + description: metadata associated with the client, managed by the service + type: object + + CreateRecordRequest: + title: a form of a ProjectRecord that is used as a request to create a new one + description: + This record describes a project record being drafted by a client + properties: + "name": + description: the mnemonic, user-chosen name to give to the record + type: string + minimum: 1 + required: true + "data": + description: the initial DMP record data to set + type: object + "meta": + description: + initial meta-information to associate with the record. This will be considered + advisory only; the server may override some or all of this data based on policy. + type: object + + + + diff --git a/scripts/midasserver b/scripts/midasserver new file mode 100755 index 0000000..53e3ee5 --- /dev/null +++ b/scripts/midasserver @@ -0,0 +1,16 @@ +#! /bin/bash +# +# midasserver -- launch a docker container running the midas web server +# +# Usage: midasserver [-h|--help] [-b|--build] [-D|--docker-build] +# [-c|--config-file FILE] [-M|--use-mongodb] [DIR] [start|stop] +# +# Type "midasserver -h" print help on arguments/options. +# +prog=`basename $0` +execdir=`dirname $0` +[ "$execdir" = "" -o "$execdir" = "." ] && execdir=$PWD +export CODEDIR=`(cd $execdir/.. > /dev/null 2>&1; pwd)` +export DOCKERDIR=$CODEDIR/docker + +exec $DOCKERDIR/midasserver/run.sh "$@" From 8a96d17a448f4324daed855a7ac9d10a1b3246a3 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sun, 27 Nov 2022 11:04:11 -0500 Subject: [PATCH 021/123] midasserver: add more to openapi doc, add README for server --- docker/midasserver/README.md | 218 +++++++++++++++++++++++++++++++++++ docs/dmpsvc-openapi.yml | 86 +++++++++++++- 2 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 docker/midasserver/README.md diff --git a/docker/midasserver/README.md b/docker/midasserver/README.md new file mode 100644 index 0000000..578b3c6 --- /dev/null +++ b/docker/midasserver/README.md @@ -0,0 +1,218 @@ +# MIDAS Web Services (for development mode) + +The intention of this Docker container is to run a fully functional MIDAS web service +suite* primarily for development and purposes. It can be launched via the +[`midasserver` script](https://github.com/usnistgov/oar-pdr-py/tree/feature/draft-service/scripts/midasserver) +in the [`scripts` directory](https://github.com/usnistgov/oar-pdr-py/tree/feature/draft-service/scripts). + +By default, the server operates with a storage backend in which records are stored in JSON +files beneath a specified data directory. This makes it easy to inspect the current contents +of the stored records during development. However, the server can optionally be run using a +MongoDB backend. + +*_Note: While the server is designed to provided the full suite of MIDAS APIs, +as of this writing, only the DMP project service is available._ + +## Prerequisites for running the server + +To run this server "out of the box" requires: + + * Python 3 (>= 3.8.X) + * Docker Engine with command-line tools + * if you want to use the option MongoDB backend, you will also need the [docker compose + plugin](https://docs.docker.com/get-started/08_using_compose/). This is included with + Docker Desktop (typical for Mac installs); to check to see if you already have it, type, + `docker compose version`. + * The [oar-pdr-py repository](https://github.com/usnistgov/oar-pdr-py) cloned onto your machine. + +## Starting and stopping the server + +To run the server, you should open a terminal and change into you local clone of the `oar-pdr-py` +repository. The server is launched using the `midasserver` script located in the `scripts` +directory. Note when you run the script for the first time, it will automatically build all of +the python code and docker images (producing a lot of output to the screen); these are not rebuilt +by default on subsequent executions. + +To start the server, you provide the name of the directory where you want the backend data written. +For example, you can type: + +```bash +scripts/midasserver midasdata +``` + +This will create a `midasdata` subdirectory in the current directory. The actual record data will +be stored under `./midasdata/dbfiles`. + +The server runs in a docker container in the shell's forground; that is, you will not get your shell +prompt back. This allows you to see messages logging each call made to the service. To stop the +server, you need to open another terminal, change into the `oar-pdr-py` repository directory, and +type: + +```bash +scripts/midasserver stop +``` + +More information about `midasserver` command-line options can be viewed via the `-h` option: + +```bash +scripts/midasserver -h +``` + +### Launching with a MongoDB backend + +The server can be optionally switched to store its records in a MongoDB database backend with the +`-M` command-line option: + +```bash +scripts/midasserver -M midasdata +``` + +In addition to starting a MongoDB database server in a Docker container, it also launches a server +frontend (called mongo-express) that allows you to explore the database contents via a web browser. +To view, open the URL, `http://localhost:8081/`. + +To stop the server, be sure to also provide the `-M` option to ensure that the Mongo database gets +shutdown as well: + +```bash +scripts/midasserver -M stop +``` + +## Using the service + +The base URLs for the MIDAS services are as follows: + + - [`http://localhost:9091/midas/dmp/mdm1`](http://localhost:9091/midas/dmp/mdm1) -- the Data + Management Plan (DMP) project service + - `http://localhost:9091/midas/dap/mds3` -- the Digital Asset Publication (DAP) project service + - `http://localhost:9091/midas/groups` -- the MIDAS user group service + +Note that when you start the service, you also have access to online API documentation. To view, +open the URL, +[`http://localhost:9091/docs/dmpsvc-elements.html`](http://localhost:9091/docs/dmpsvc-elements.html). + +At this time, all request and response messages are JSON documents with a content type of +"application/json". This content type is assumed by default, so "Accept" and "Content-Header" +HTTP headers are not required. + +### Creating a new DMP project + +Creating a new DMP project record is done by POSTing to the service's base URL. The request +body must be a JSON object which can contain the following properties: + + - `name` -- (Required) The user-supplied mnemonic name to assign to the new record. This name + is intended only for display purposes; it is not part of the DMP data content. + - `data` -- (Optional) A JSON object containing the data content. Each project type will + enforce its own schema for this object. The data included here is not expected to be complete. + - `meta` -- (Optional) A JSON object containing metadata hints that help the server manage + the record. This data will not be part of the public DMP data content. Creation is the only + time the client can directly add information to this object; although the server may update + the information triggered by other user requests. Unrecognized data in this object may be + ignored. + +For example, an initial record might be created with: + +```bash +curl -X POST --data '{"name": "CoTEM", "data": {"title": "Microscopy of Cobalt Samples"}}' \ + http://localhost:9091/midas/dmp/mdm1 +``` + +If the creation request is successful, the request will return a 201 status and a JSON document +containing the full, newly created record: + +```json +{ + "id": "mdm1:0003", + "name": "CoTEM", + "acls": { + "read": [ + "anonymous" + ], + "write": [ + "anonymous" + ], + "admin": [ + "anonymous" + ], + "delete": [ + "anonymous" + ] + }, + "owner": "anonymous", + "data": { + "title": "Microscopy of Cobalt Samples" + }, + "meta": {}, + "curators": [], + "created": 1669560885.988901, + "createdDate": "2022-11-27T09:54:45", + "lastModified": 1669560885.988901, + "lastModifiedDate": "2022-11-27T09:54:45", + "deactivated": null, + "type": "dmp" +} +``` + +Clients should note the value of the `id` property in order to make further updates. + +### Updating a record's data content + +Typically in the life of project record, after the client creates a new record, it will incrementally +update its data content as the user manipulates the client interface. Updates can be made using either +PUT or PATCH requests. PATCH is perhaps more typical: the input can contain partial data that will be +merged with the data that is already saved on the server. With PUT, the input can also contain partial +data; however, it will completely replace the data that was already saved on the server, deleting all +previous data properties regardless of whether they are included in the input. + +In this example, we use PATCH to add more data to the record. Note that the URL includes the record's +`id` value, followed by `/data`. + +```bash +curl -X PATCH --data '{"expectedDataSize": "2 TB"}' http://localhost:9091/midas/dmp/mdm1/mdm1:0003/data +``` + +Because the URL used above specifically accesses the "data" part of the record, only the updated data +object is returned: + +``` +{ + "title": "Microscopy of Cobalt Samples", + "expectedDataSize": "2 TB" +} +``` + +### Getting record contents (without updating) + +GET requests can be made to against different resource URLs to get full records or portions of a record. +In particular: + + - `http://localhost:9091/midas/dmp/mdm1` -- returns a list of records that the requesting user is + allowed to read + - `http://localhost:9091/midas/dmp/mdm1/`_id_ -- returns the full record that has an identifier given + by _id_. The format is the same as that returned by the create request illustrated above. + - `http://localhost:9091/midas/dmp/mdm1/`_id_`/data` -- returns just the data contents for the record + with the identifier given by _id_. + - `http://localhost:9091/midas/dmp/mdm1/`_id_`/name` -- returns just the mnemonic name assigned to the + record by the user + - `http://localhost:9091/midas/dmp/mdm1/`_id_`/owner` -- returns just the identifier of the user that + owns (and usually created) the record + - `http://localhost:9091/midas/dmp/mdm1/`_id_`/acls` -- returns just access control lists attached to + the record + - `http://localhost:9091/midas/dmp/mdm1/`_id_`/meta` -- returns just the custom metadata attached to record + +### Other operations of note + +The service provides other operations that a client can provide support for: + + - **Changing the mnemonic name** - a new name can be assigned to the record via a PUT request on the + `/name` sub-resource of a record. + - **Add/Remove permissions for other users** - the `/acls` sub-resource endpoints allow one to manipulate + permissions given to other users. + - **Create user groups*** -- the `/midas/groups` endpoint alls a user to create and manage + their own user groups that can be applied to a records ACLs. + +For more information, consult the [API documentation](https://localhost:9091/docs/dmpsvc-elements.html). + +*_Not implemented yet._ + + diff --git a/docs/dmpsvc-openapi.yml b/docs/dmpsvc-openapi.yml index fbb97e7..4354d08 100644 --- a/docs/dmpsvc-openapi.yml +++ b/docs/dmpsvc-openapi.yml @@ -19,7 +19,7 @@ paths: /mdm1: summary: the resource representing the full collection of DMPs get: - summary: return all or subset of DMP records readable by the requesting user + summary: return all or a subset of DMP records that the requesting user is authorized to read responses: "200": description: @@ -91,7 +91,7 @@ paths: get: summary: Return the DMP record with the specified identifier. description: - The returns the DMP porject record where the data property contains the data consituting the + The returns the DMP project record where the data property contains the data consituting the contents of the plan. responses: "200": @@ -106,6 +106,13 @@ paths: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" /mdm1/{projid}/data: summary: the data describing the contents of a Data Management Plan (DMP) @@ -130,6 +137,13 @@ paths: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" put: summary: replace the DMP record data contents @@ -153,12 +167,29 @@ paths: "application/json": schema: "type": object + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON object + * The JSON document in the request body was not compliant with the data schema + expected by the DMP service + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" "404": description: The DMP draft record with the given identifier was not found content: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to update this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" patch: summary: update the DMP record data contents @@ -185,12 +216,29 @@ paths: "application/json": schema: "type": object + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON object + * The JSON object in the request body contains unrecognized or invalid properties as + expected by the DMP service + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" "404": description: The DMP draft record with the given identifier was not found content: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to update this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" /mdm1/{projid}/name: summary: the mnemonic name for the DMP record with the given identifier @@ -215,6 +263,13 @@ paths: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" put: summary: Change the name of the DMP record with the given identifier @@ -231,13 +286,29 @@ paths: content: "application": "type": string + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON string + * The string represents a name that is already attached to another record owned by the + user. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" "404": description: The DMP draft record with the given identifier was not found content: "application/json": schema: "$ref": "#/components/schemas/ErrorResponse" - + "401": + description: + The authenticated user is not authorized to change its name. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" components: parameters: @@ -298,9 +369,16 @@ components: description: the epoch date-time that this record was created type: integer required: true - "created_date": + "createdDate": description: the ISO 8601-formatted data-time that this record was created type: string + "lastModified": + description: the epoch date-time that this record was last updated via the API + type: integer + required: true + "lastModifiedDate": + description: the ISO 8601-formatted data-time that this record was last updated via the API + type: string "curators": description: the list of IDs for people who have been assigned as curators for this record; it will be empty From 414e50135ebf627b83372303af3e12e4e60b6e06 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sun, 27 Nov 2022 11:56:36 -0500 Subject: [PATCH 022/123] dbio: modify record date handling: * add support for modified date * make dates part of group records, too (move dates to ProtectedRecord base) * add formated "*Date" on dict/JSON ouptut --- python/nistoar/midas/dbio/base.py | 67 +++++++++++++------ .../tests/nistoar/midas/dbio/test_groups.py | 8 ++- .../tests/nistoar/midas/dbio/test_record.py | 4 ++ 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 5fd1be0..af6c545 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -168,6 +168,12 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: :return: an combination of the given data and defaults :rtype: MutableMapping """ + now = time.time() + + if 'created' not in recdata: + recdata['created'] = now + if 'modified' not in recdata: + recdata['modified'] = recdata['created'] if not recdata.get('acls'): recdata['acls'] = {} if not recdata.get('owner'): @@ -188,6 +194,33 @@ def id(self): def owner(self): return self._data.get('owner', "") + @property + def created(self) -> float: + """ + the epoch timestamp indicating when this record was first corrected + """ + return self._data.get('created', 0) + + @property + def created_date(self) -> str: + """ + the creation timestamp formatted as an ISO string + """ + return datetime.fromtimestamp(math.floor(self.created)).isoformat() + + @property + def modified(self) -> float: + """ + the epoch timestamp indicating when this record was last updated + """ + return self._data.get('modified', self._data.get('created', 0)) + + @property + def modified_date(self) -> str: + """ + the timestamp for the last modification, formatted as an ISO string + """ + return datetime.fromtimestamp(math.floor(self.modified)).isoformat() @property def acls(self) -> ACLs: @@ -205,7 +238,13 @@ def save(self): """ if not self.authorized(ACLs.WRITE): raise NotAuthorized(self._cli.user_id, "update record") - self._cli._upsert(self._coll, self._data) + oldmod = self.modified + self._data['modified'] = time.time() + try: + self._cli._upsert(self._coll, self._data) + except Exception as ex: + self._data['modified'] = oldmod + raise def authorized(self, perm: Permissions, who: str = None): """ @@ -267,9 +306,12 @@ def validate(self, errs=None, data=None) -> List[str]: return errs def to_dict(self): - self._data['acls'] = self.acls._perms - self._data['type'] = self._coll - return deepcopy(self._data) + out = deepcopy(self._data) + out['acls'] = self.acls._perms + out['type'] = self._coll + out['createdDate'] = self.created_date + out['modifiedDate'] = self.modified_date + return out class Group(ProtectedRecord): """ @@ -551,8 +593,6 @@ def _initialize(self, rec: MutableMapping) -> MutableMapping: rec['meta'] = OrderedDict() if 'curators' not in rec: rec['curators'] = [] - if 'created' not in rec: - rec['created'] = time.time() if 'deactivated' not in rec: # Should be None or a date rec['deactivated'] = None @@ -587,20 +627,6 @@ def name(self, val): """ self._data['name'] = val - @property - def created(self) -> float: - """ - the epoch timestamp indicating when this record was first corrected - """ - return self._data.get('created', 0) - - @property - def created_date(self) -> str: - """ - the creation timestamp formatted as an ISO string - """ - return datetime.fromtimestamp(math.floor(self.created)).isoformat() - @property def data(self) -> MutableMapping: """ @@ -631,7 +657,6 @@ def __str__(self): return "<{} ProjectRecord: {} ({}) owner={}>".format(self._coll.rstrip("s"), self.id, self.name, self.owner) - class DBClient(ABC): """ a client connected to the database for a particular service (e.g. drafting, DMPs, etc.) diff --git a/python/tests/nistoar/midas/dbio/test_groups.py b/python/tests/nistoar/midas/dbio/test_groups.py index 74514eb..df7bcf3 100644 --- a/python/tests/nistoar/midas/dbio/test_groups.py +++ b/python/tests/nistoar/midas/dbio/test_groups.py @@ -1,4 +1,4 @@ -import os, json, pdb, logging +import os, json, pdb, logging, time from pathlib import Path import unittest as test @@ -105,6 +105,9 @@ def test_create_group(self): self.assertEqual(grp.owner, self.user) self.assertEqual(grp.id, id) self.assertTrue(grp.is_member(self.user)) + self.assertGreater(grp.created, 0) + self.assertLess(grp.created, time.time()) + self.assertEqual(grp.modified, grp.created) self.assertTrue(grp.authorized(base.ACLs.OWN)) @@ -166,6 +169,8 @@ def test_get_by_name(self): self.assertEqual(grp.id, "grp0:nist0:ava1:enemies") self.assertEqual(grp.name, "enemies") self.assertEqual(grp.owner, "nist0:ava1") + self.assertGreater(grp.created, 0) + self.assertGreaterEqual(grp.modified, grp.created) self.assertIsNone(self.dbg.get_by_name("friends", "alice")) @@ -202,6 +207,7 @@ def test_delete(self): grp.save() self.assertTrue(self.dbg.exists("grp0:nist0:ava1:enemies")) self.assertTrue(self.dbg.exists("grp0:nist0:ava1:friends")) + self.assertGreater(grp.modified, grp.created) def test_select_ids_for_user(self): for s in "abcdefghijklmnopqrstuvwxyz": diff --git a/python/tests/nistoar/midas/dbio/test_record.py b/python/tests/nistoar/midas/dbio/test_record.py index df48d2d..2ec8d98 100644 --- a/python/tests/nistoar/midas/dbio/test_record.py +++ b/python/tests/nistoar/midas/dbio/test_record.py @@ -20,6 +20,7 @@ def test_ctor(self): self.assertEqual(self.rec.name, "brains") self.assertEqual(self.rec.owner, self.user) self.assertGreater(self.rec.created, 0) + self.assertEqual(self.rec.modified, self.rec.created) self.assertTrue(self.rec.created_date.startswith("20")) self.assertNotIn('.', self.rec.created_date) self.assertEqual(self.rec.data, {}) @@ -43,6 +44,8 @@ def test_save(self): self.assertNotIn("pdr0:2222", self.cli._db[base.DRAFT_PROJECTS]) self.rec.save() + self.assertGreater(self.rec.modified, self.rec.created) + oldmod = self.rec.modified self.assertIn("pdr0:2222", self.cli._db[base.DRAFT_PROJECTS]) self.assertEqual(self.cli._db[base.DRAFT_PROJECTS]["pdr0:2222"]['name'], "brains") self.assertEqual(self.cli._db[base.DRAFT_PROJECTS]["pdr0:2222"]['data'], {}) @@ -53,6 +56,7 @@ def test_save(self): self.rec.meta['type'] = 'software' self.rec.acls.grant_perm_to(base.ACLs.READ, "alice") self.rec.save() + self.assertGreater(self.rec.modified, oldmod) self.assertEqual(self.cli._db[base.DRAFT_PROJECTS]["pdr0:2222"]['meta'], {"type": "software"}) self.assertEqual(self.cli._db[base.DRAFT_PROJECTS]["pdr0:2222"]['acls'][base.ACLs.READ], [self.user, "alice"]) From f81d0a9f4789fd87b8b3f4f7159d7a9f534c9fc6 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 29 Nov 2022 11:20:44 -0500 Subject: [PATCH 023/123] create midas.dap; move pdr.draft.nerdstore to it --- python/nistoar/midas/dap/__init__.py | 28 +++++++++++++++++++ .../draft => midas/dap}/nerdstore/__init__.py | 0 .../draft => midas/dap}/nerdstore/base.py | 0 .../draft => midas/dap}/nerdstore/fsbased.py | 0 .../draft => midas/dap}/nerdstore/inmem.py | 0 5 files changed, 28 insertions(+) create mode 100644 python/nistoar/midas/dap/__init__.py rename python/nistoar/{pdr/draft => midas/dap}/nerdstore/__init__.py (100%) rename python/nistoar/{pdr/draft => midas/dap}/nerdstore/base.py (100%) rename python/nistoar/{pdr/draft => midas/dap}/nerdstore/fsbased.py (100%) rename python/nistoar/{pdr/draft => midas/dap}/nerdstore/inmem.py (100%) diff --git a/python/nistoar/midas/dap/__init__.py b/python/nistoar/midas/dap/__init__.py new file mode 100644 index 0000000..3c92bf3 --- /dev/null +++ b/python/nistoar/midas/dap/__init__.py @@ -0,0 +1,28 @@ +""" +DAP -- a module implementing the Digital Asset Publication (DAP) Authoring Service. + +A Digital Asset Publication is a digital publication of data, software, or other digital asset +that is made availalbe through the NIST Public Data Repository. It is analagous (and often a +companion) to a traditional publication in the academic literature. At the core of this module +is an implementation of the DAP Authoring Service that allows authors to create a draft DAP +(analagous to a paper manuscript) to be submitted to the PDR for publicaiton. The service is +made available primarily as a web service API, allowing for multiple different client tools to +exist to serve different classes of customers. + +There can be different flavors of the Authoring service supported in this module to support +different interaction models or conventions or evolutions of the interface (i.e. interface +versions). The default flavor is targeted for the MIDAS 3 client. The different flavors are +implemented within the :py:mod`service` subpackage. + +This package draws on some of the infrastructure from the :py:mod:`~nistoar.pdr.publish` package, +including :py:mod:`provenence tracking` and +:py:mod:`README generation`. +""" +from nistoar.pdr.publish import prov, readme + +# subpackages: +# nerdstore +# service +# filemgr +# review + diff --git a/python/nistoar/pdr/draft/nerdstore/__init__.py b/python/nistoar/midas/dap/nerdstore/__init__.py similarity index 100% rename from python/nistoar/pdr/draft/nerdstore/__init__.py rename to python/nistoar/midas/dap/nerdstore/__init__.py diff --git a/python/nistoar/pdr/draft/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py similarity index 100% rename from python/nistoar/pdr/draft/nerdstore/base.py rename to python/nistoar/midas/dap/nerdstore/base.py diff --git a/python/nistoar/pdr/draft/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py similarity index 100% rename from python/nistoar/pdr/draft/nerdstore/fsbased.py rename to python/nistoar/midas/dap/nerdstore/fsbased.py diff --git a/python/nistoar/pdr/draft/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py similarity index 100% rename from python/nistoar/pdr/draft/nerdstore/inmem.py rename to python/nistoar/midas/dap/nerdstore/inmem.py From 0012c046dad2ffbf4004356bf2d025a03619c1c8 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 29 Nov 2022 16:45:28 -0500 Subject: [PATCH 024/123] midasserer: add missing uwsgi script! --- scripts/midas-uwsgi.py | 115 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 scripts/midas-uwsgi.py diff --git a/scripts/midas-uwsgi.py b/scripts/midas-uwsgi.py new file mode 100644 index 0000000..755d65e --- /dev/null +++ b/scripts/midas-uwsgi.py @@ -0,0 +1,115 @@ +""" +the uWSGI script for launching the MIDAS web service. + +This script launches the web service using uwsgi. For example, one can +launch the service with the following command: + + uwsgi --plugin python3 --http-socket :9090 --wsgi-file midas-uwsgi.py \ + --set-ph oar_config_file=midas_conf.yml --set-ph oar_working_dir=_test + +The configuration data can be provided to this script via a file (as illustrated above) or it +can be fetch from a configuration service, depending on the environment (see below). See the +documentation for nistoar.midas.dbio.wsgi for the configuration parameters supported by this +service. + +This script also pays attention to the following environment variables: + + OAR_HOME The directory where the OAR PDR system is installed; this + is used to find the OAR PDR python package, nistoar. + OAR_PYTHONPATH The directory containing the PDR python module, nistoar. + This overrides what is implied by OAR_HOME. + OAR_CONFIG_SERVICE The base URL for the configuration service; this is + overridden by the oar_config_service uwsgi variable. + OAR_CONFIG_ENV The application/component name for the configuration; + this is only used if OAR_CONFIG_SERVICE is used. + OAR_CONFIG_TIMEOUT The max number of seconds to wait for the configuration + service to come up (default: 10); + this is only used if OAR_CONFIG_SERVICE is used. + OAR_CONFIG_APP The name of the component/application to retrieve + configuration data for (default: pdr-resolve); + this is only used if OAR_CONFIG_SERVICE is used. +""" +import os, sys, logging, copy +from copy import deepcopy + +try: + import nistoar +except ImportError: + oarpath = os.environ.get('OAR_PYTHONPATH') + if not oarpath and 'OAR_HOME' in os.environ: + oarpath = os.path.join(os.environ['OAR_HOME'], "lib", "python") + if oarpath: + sys.path.insert(0, oarpath) + import nistoar + +from nistoar.base import config +from nistoar.midas.dbio import wsgi, MongoDBClientFactory, InMemoryDBClientFactory, FSBasedDBClientFactory + +try: + import uwsgi +except ImportError: + # simulate uwsgi for testing purpose + from nistoar.testing import uwsgi + uwsgi = uwsgi.load() + +def _dec(obj): + # decode an object if it is not None + return obj.decode() if isinstance(obj, (bytes, bytearray)) else obj + +DEF_MIDAS_DB_TYPE="fsbased" + +# determine where the configuration is coming from +confsrc = _dec(uwsgi.opt.get("oar_config_file")) +if confsrc: + cfg = config.resolve_configuration(confsrc) + +elif 'oar_config_service' in uwsgi.opt: + srvc = config.ConfigService(_dec(uwsgi.opt.get('oar_config_service')), + _dec(uwsgi.opt.get('oar_config_env'))) + srvc.wait_until_up(int(_dec(uwsgi.opt.get('oar_config_timeout', 10))), + True, sys.stderr) + cfg = srvc.get(_dec(uwsgi.opt.get('oar_config_appname', 'pdr-pdp'))) + +elif config.service: + config.service.wait_until_up(int(os.environ.get('OAR_CONFIG_TIMEOUT', 10)), + True, sys.stderr) + cfg = config.service.get(os.environ.get('OAR_CONFIG_APP', 'pdr-resolve')) + +else: + raise config.ConfigurationException("resolver: nist-oar configuration not provided") + +workdir = _dec(uwsgi.opt.get("oar_working_dir")) +if workdir: + cfg['working_dir'] = workdir + +config.configure_log(config=cfg) + +# setup the MIDAS database backend +dbtype = _dec(uwsgi.opt.get("oar_midas_db_type")) +if not dbtype: + dbtype = cfg.get("dbio", {}).get("factory") +if not dbtype: + dbtype = DEF_MIDAS_DB_TYPE + +if dbtype == "fsbased": + dbdir = os.path.join(cfg.get('working_dir','.'), "dbfiles") + if not os.path.exists(dbdir): + os.mkdir(dbdir) + factory = FSBasedDBClientFactory(cfg.get("dbio", {}), dbdir) +elif dbtype == "mongo": + dburl = os.environ.get("OAR_MONGODB_URL") + if not dburl: + port = ":%s" % os.environ.get("OAR_MONGODB_PORT", "27017") + cred = "" + if os.environ.get("OAR_MONGODB_USER"): + pasw = os.environ.get("OAR_MONGODB_PASS", os.environ.get("OAR_MONGODB_USER")) + cred = "%s:%s@" % (os.environ.get("OAR_MONGODB_USER"), pasw) + dburl = "mongodb://%s%s%s/midas" % (cred, os.environ.get("OAR_MONGODB_HOST", "localhost"), port) + factory = MongoDBClientFactory(cfg.get("dbio", {}), dburl) +elif dbtype == "inmem": + factory = InMemoryDBClientFactory(cfg.get("dbio", {})) +else: + raise RuntimeError("Unsupported database type: "+dbtype) + +application = wsgi.app(cfg, factory) +logging.info("MIDAS service ready with "+dbtype+" backend") From 8c8385a55fdfaa8eed18422f25fc63cfaaf0efad Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 30 Nov 2022 14:29:59 -0500 Subject: [PATCH 025/123] midasserer: enable dap support --- docker/midasserver/midas-dmpdap_conf.yml | 66 +++++++++ docker/midasserver/run.sh | 4 +- python/nistoar/midas/__init__.py | 9 ++ python/nistoar/midas/dap/service/mdsx.py | 134 ++++++++++++++++++ python/nistoar/midas/dbio/wsgi/broker.py | 22 ++- python/nistoar/midas/dbio/wsgi/wsgiapp.py | 4 +- .../nistoar/midas/dap/service/test_mdsx.py | 117 +++++++++++++++ .../nistoar/midas/dbio/wsgi/test_broker.py | 2 +- 8 files changed, 348 insertions(+), 10 deletions(-) create mode 100644 docker/midasserver/midas-dmpdap_conf.yml create mode 100644 python/nistoar/midas/dap/service/mdsx.py create mode 100644 python/tests/nistoar/midas/dap/service/test_mdsx.py diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml new file mode 100644 index 0000000..a33c145 --- /dev/null +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -0,0 +1,66 @@ +logfile: midas.log +loglevel: DEBUG +dbio: + factory: fsbased +about: + title: "MIDAS Authoring Services" + describedBy: "http://localhost:9091/midas/docs" + href: "http://localhost:9091/midas/" +services: + dap: + about: + message: "DMP Service is available" + title: "Digital Asset Publication (DAP) Authoring API" + describedBy: "http://localhost:9091/docs/dapsvc-elements.html" + href: "http://localhost:9091/midas/dap" + + broker: + assign_doi: always + doi_naan: "18434" + clients: + midas: + default_shoulder: mdsx + default: + default_shoulder: mdsx + + dbio: + superusers: [ "rlp3" ] + allowed_project_shoulders: ["mdsx", "mds3", "mds0", "pdr0"] + default_shoulder: mdsx + + default_convention: mdsx + conventions: + mdsx: + about: + title: "Digital Asset Publication (DAP) Authoring API (mds3 convention)" + describedBy: "http://localhost:9091/docs/dapsvc-elements.html" + href: "http://localhost:9091/midas/dap/mdsx" + version: mdsx + + dmp: + about: + message: "DMP Service is available" + title: "Data Management Plan (DMP) Authoring API" + describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" + href: "http://localhost:9091/midas/dmp" + + broker: + clients: + midas: + default_shoulder: mdm1 + default: + default_shoulder: mdm1 + + dbio: + superusers: [ "rlp3" ] + allowed_project_shoulders: ["mdm0", "mdm1"] + default_shoulder: mdm1 + + default_convention: mdm1 + conventions: + mdm1: + about: + title: "Data Management Plan (DMP) Authoring API (mdm1 convention)" + describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" + href: "http://localhost:9091/midas/dmp/mdm1" + version: mdm1 diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 56a3081..9e030f2 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -142,7 +142,9 @@ configext=`echo $CONFIGFILE | sed -e 's/^.*\.//' | tr A-Z a-z` echo "${prog}:" Config file type not recognized by extension: $configext false } -VOLOPTS="$VOLOPTS -v ${CONFIGFILE}:/app/midas-config.${configext}:ro" +configparent=`dirname $CONFIGFILE` +configfile=`(cd $configparent; pwd)`/`basename $CONFIGFILE` +VOLOPTS="$VOLOPTS -v ${configfile}:/app/midas-config.${configext}:ro" ENVOPTS="-e OAR_MIDASSERVER_CONFIG=/app/midas-config.${configext}" if [ -d "$repodir/docs" ]; then diff --git a/python/nistoar/midas/__init__.py b/python/nistoar/midas/__init__.py index fb2fe5e..c4c5c18 100644 --- a/python/nistoar/midas/__init__.py +++ b/python/nistoar/midas/__init__.py @@ -1,5 +1,14 @@ """ midas: A module providing infrastructure support for MIDAS applications. + +MIDAS, historically, stands for Manag... Invent... Digital Assets .... In its first generation, +the system collectively provided a Data Management Plan (DMP) generation tool, an Enterprise Data +Inventory (EDI) record generation tool, reporting functionality, and the generation of the NIST +Public Data Listing (PDL), the publicly viewable portion of the EDI (which is exported to data.gov). + +This module represents a successor implementation of the first generation system. It notably includes +implementations of a DMP Authoring service and a Digital Asset Publication (DAP) Authoring service +(successor to the EDI tool). """ from nistoar.base import OARException, SystemInfoMixin, config diff --git a/python/nistoar/midas/dap/service/mdsx.py b/python/nistoar/midas/dap/service/mdsx.py new file mode 100644 index 0000000..fed431b --- /dev/null +++ b/python/nistoar/midas/dap/service/mdsx.py @@ -0,0 +1,134 @@ +""" +Subapp supporting DAP +""" +from logging import Logger +from collections import OrderedDict +from collections.abc import Mapping, MutableMapping, Sequence, Callable + +from ...dbio import DBClient, DBClientFactory, ProjectRecord +from ...dbio.wsgi.broker import ProjectRecordBroker +from ...dbio.wsgi.project import MIDASProjectApp +from nistoar.base.config import ConfigurationException +from nistoar.nerdm.constants import core_schema_base, schema_versions +from nistoar.pdr import constants as const +from nistoar.pdr.publish.prov import PubAgent + +ASSIGN_DOI_NEVER = 'never' +ASSIGN_DOI_ALWAYS = 'always' +ASSIGN_DOI_REQUEST = 'request' +NERD_PRE = "nrd" +NERDPUB_PRE = "nrdp" +NERDM_SCH_ID_BASE = core_schema_base +NERDMPUB_SCH_ID_BASE = core_schema_base + "pub/" +NERDM_SCH_VER = schema_versions[0] +NERDMPUB_SCH_VER = NERDM_SCH_VER +NERDM_SCH_ID = NERDM_SCH_ID_BASE + NERDM_SCH_VER + "#" +NERDMPUB_SCH_ID = NERDMPUB_SCH_ID_BASE + NERDMPUB_SCH_VER + "#" +NERDPUB_DEF = NERDMPUB_SCH_ID + "/definitions/" +NERDM_CONTEXT = "https://data.nist.gov/od/dm/nerdm-pub-context.jsonld" + +class DAPBroker(ProjectRecordBroker): + """ + a project record request broker class for DAP records. + """ + + def __init__(self, dbclient: DBClient, config: Mapping={}, who: PubAgent=None, + wsgienv: dict=None, log: Logger=None): + """ + create a request handler + :param DBClient dbclient: the DBIO client instance to use to access and save project records + :param dict config: the handler configuration tuned for the current type of project + :param dict wsgienv: the WSGI request context + :param Logger log: the logger to use for log messages + """ + super(DAPBroker, self).__init__(dbclient, config, who, wsgienv, log) + + self.cfg.setdefault('assign_doi', ASSIGN_DOI_REQUEST) + if not self.cfg.get('doi_naan') and self.cfg.get('assign_doi') != ASSIGN_DOI_NEVER: + raise ConfigurationException("Missing configuration: doi_naan") + + def _new_data_for(self, recid, meta=None): + out = OrderedDict([ + ("_schema", NERDM_SCH_ID), + ("@context", NERDM_CONTEXT), + ("_extensionSchemas", [NERDPUB_DEF + "PublicDataResource"]) + ("@id", self._arkid_for(recid)), + ("@type", [":".join([NERDPUB_PRE, "PublicDataResource"]), "dcat:Resource"]), + ]) + + if self.cfg.get('assign_doi') == ASSIGN_DOI_ALWAYS: + out['doi'] = self._doi_for(recid) + + if meta: + if meta.get("resourceType"): + addtypes = [] + if meta['resourceType'].lower() == "software": + addtypes = [":".join([NERDPUB_PRE, "Software"])] + elif meta['resourceType'].lower() == "srd": + addtypes = [":".join([NERDPUB_PRE, "SRD"])] + out["@type"] = addtypes + out["@type"] + + if meta.get("softwareLink"): + swcomp = self._get_sw_desc_for(meta["softwareLink"]) + if not 'components' in out: + out['components'] = [] + out['components'] = [swcomp] + out['components'] + + # contact info + + return out + + def _get_sw_desc_for(self, link): + id = link.rsplit('/', 1)[-1] + id = "%s/repo:%s" % (const.LINKCMP_EXTENSION.lstrip('/'), id) + return OrderedDict([ + ("@id", id), + ("@type", ["nrd:AccessPage", "dcat:Distribution"]), + ("title", "Software Repository in GitHub"), + ("accessURL", link) + ]) + + def _doi_for(self, recid): + naan = self.cfg.get('doi_naan') + if not naan: + raise PublishingStateException("DOI NAAN not set in configuration") + return "%s/%s" % (naan, self._aipid_for(recid)) + + def _arkid_for(self, recid): + return "ark:/%s/%s" % (const.ARK_NAAN, self._aipid_for(recid)) + + def _aipid_for(self, recid): + return '-'.join(recid.split(':', 1)) + + def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): + out = super()._moderate_metadata(mdata, shoulder) + if isinstance(out.get('creatorisContact'), str): + out['creatorisContact'] = out['creatorisContact'].lower() == "true" + elif out.get('creatorisContact') is None: + out['creatorisContact'] = true + + allowed = "resourceType creatorisContact contactName willUpload provideLink softwareLink assocPageType".split() + for key in out: + if key not in allowed: + del out[key] + + return out + + def _new_metadata_for(self, shoulder=None): + return OrderedDict([ + ("resourceType", "data"), + ("creatorisContact", True) + ]) + + +class DAPApp(MIDASProjectApp): + """ + A MIDAS SubApp supporting a DAP service + """ + + def __init__(self, typename: str, log: Logger, dbcli_factory: DBClientFactory, config: dict={}): + if not typename: + typename = "dap" + super(DAPApp, self).__init__(typename, log, dbcli_factory, config, DAPBroker) + + diff --git a/python/nistoar/midas/dbio/wsgi/broker.py b/python/nistoar/midas/dbio/wsgi/broker.py index 14167c1..f1e48d1 100644 --- a/python/nistoar/midas/dbio/wsgi/broker.py +++ b/python/nistoar/midas/dbio/wsgi/broker.py @@ -54,12 +54,18 @@ def create_record(self, name, data=None, meta=None): shoulder = self._get_id_shoulder(self.who) prec = self.dbcli.create_record(name, shoulder) - prec.data = self._new_data_for(prec.id) if meta: - self._merge_into(self._moderate_metadata(meta), prec.meta) + meta = self._moderate_metadata(meta, shoulder) + if prec.meta: + self._merge_into(meta, prec.meta) + else: + prec.meta = meta + elif not prec.meta: + prec.meta = self._new_metadata_for(shoulder) + prec.data = self._new_data_for(prec.id, prec.meta) if data: self.update_data(prec.id, data, prec=prec) # this will call prec.save() - elif meta: + else: prec.save() return prec @@ -193,7 +199,7 @@ def _merge_into(self, update: Mapping, base: Mapping, depth: int=-1): else: base[prop] = update[prop] - def _new_data_for(self, recid): + def _new_data_for(self, recid, meta=None): """ return an "empty" data object set for a record with the given identifier. The returned dictionary can contain some minimal or default properties (which may or may not include @@ -201,7 +207,7 @@ def _new_data_for(self, recid): """ return OrderedDict() - def _new_metadata_for(self, recid): + def _new_metadata_for(self, shoulder=None): """ return an "empty" metadata object set for a record with the given identifier. The returned dictionary can contain some minimal or default properties (which may or may not include @@ -214,7 +220,7 @@ def _new_metadata_for(self, recid): """ return OrderedDict() - def _moderate_metadata(self, mdata: MutableMapping): + def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): """ massage and validate the given record metadata provided by the user client, returning a valid version of the metadata. The implementation may modify the given dictionary in place. @@ -224,7 +230,9 @@ def _moderate_metadata(self, mdata: MutableMapping): otherwise should not be settable by the client. :raises ValueError: if the mdata is disallowed in a way that should abort the entire request. """ - return OrderedDict() + out = self._new_metadata_for(shoulder) + out.update(mdata) + return out def replace_data(self, id, newdata, part=None, prec=None): """ diff --git a/python/nistoar/midas/dbio/wsgi/wsgiapp.py b/python/nistoar/midas/dbio/wsgi/wsgiapp.py index c99d0df..5122cad 100644 --- a/python/nistoar/midas/dbio/wsgi/wsgiapp.py +++ b/python/nistoar/midas/dbio/wsgi/wsgiapp.py @@ -34,6 +34,7 @@ from ... import system from . import project as prj, SubApp, Handler, DBIOHandler +from ...dap.service import mdsx from ..base import DBClientFactory from ..inmem import InMemoryDBClientFactory from ..fsbased import FSBasedDBClientFactory @@ -344,7 +345,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge _MIDASSubApps = { - "dmp/mdm1": prj.MIDASProjectApp + "dmp/mdm1": prj.MIDASProjectApp, + "dap/mdsx": mdsx.DAPApp } class MIDASApp: diff --git a/python/tests/nistoar/midas/dap/service/test_mdsx.py b/python/tests/nistoar/midas/dap/service/test_mdsx.py new file mode 100644 index 0000000..9befc88 --- /dev/null +++ b/python/tests/nistoar/midas/dap/service/test_mdsx.py @@ -0,0 +1,117 @@ +import os, json, pdb, logging, tempfile +import unittest as test + +from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio.wsgi import broker +from nistoar.midas.dap.service import mdsx +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_broker.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestProjectRecordBroker(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdsx" + }, + "default": { + "default_shoulder": "mdsx" + } + }, + "allowed_project_shoulders": ["mdsx", "spc1"], + "default_shoulder": "mdsx", + "assign_doi": "always", + "doi_naan": "88888" + } + self.fact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdsx": 2 }}) + self.dbcli = self.fact.create_client(base.DMP_PROJECTS, self.cfg, nistr.actor) + self.resp = [] + + def create_broker(self, request=None): + self.resp = [] + if not request: + request = {'REQUEST_METHOD': 'GRUB'} + self.broker = mdsx.DAPBroker(self.dbcli, self.cfg, nistr, request, rootlog.getChild("broker")) + return self.broker + + def test_ctor(self): + self.create_broker() + self.assertTrue(self.broker.dbcli) + self.assertEqual(self.broker.cfg, self.cfg) + self.assertEqual(self.broker.who.actor, "nstr1") + self.assertEqual(self.broker.who.group, "midas") + self.assertEqual(self.broker.env, {'REQUEST_METHOD': 'GRUB'}) + self.assertTrue(self.broker.log) + + def test_create_record(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("goob")) + + prec = self.broker.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdsx:0003") + self.assertEqual(prec.meta, {"creatorisContact": True, "resourceType": "data"}) + self.assertEqual(prec.owner, "nstr1") + for key in "_schema @context _extensionSchemas".split(): + self.assertIn(key, prec.data) + self.assertEqual(prec.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") + + self.assertTrue(self.broker.dbcli.name_exists("goob")) + prec2 = self.broker.get_record(prec.id) + self.assertEqual(prec2.name, "goob") + self.assertEqual(prec2.id, "mdsx:0003") + self.assertEqual(prec2.data['@id'], "ark:/88434/mdsx-0003") + self.assertEqual(prec2.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec2.meta, {"creatorisContact": True, "resourceType": "data"}) + self.assertEqual(prec2.owner, "nstr1") + + with self.assertRaises(broker.AlreadyExists): + self.broker.create_record("goob") + + def test_create_record_withdata(self): + self.create_broker() + self.assertTrue(not self.broker.dbcli.name_exists("gurn")) + + prec = self.broker.create_record("gurn", {"color": "red"}, + {"temper": "dark", "creatorisContact": "goob"}) + self.assertEqual(prec.name, "gurn") + self.assertEqual(prec.id, "mdsx:0003") + self.assertEqual(prec.meta, {"creatorisContact": False, "resourceType": "data"}) + for key in "_schema @context _extensionSchemas".split(): + self.assertIn(key, prec.data) + self.assertEqual(prec.data['color'], "red") + self.assertEqual(prec.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") + + + + + +if __name__ == '__main__': + test.main() + + diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py index 57ad3d6..501b0f9 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py @@ -125,7 +125,7 @@ def test_create_record_withdata(self): self.assertEqual(prec.name, "gurn") self.assertEqual(prec.id, "mdm1:0003") self.assertEqual(prec.data, {"color": "red"}) - self.assertEqual(prec.meta, {}) + self.assertEqual(prec.meta, {"temper": "dark"}) def test_get_data(self): self.create_broker() From 4a970b2bb4f7285bf7560ee038c43cc5649cda2d Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 30 Nov 2022 15:14:43 -0500 Subject: [PATCH 026/123] midasserver: switch config files for dap service --- docker/midasserver/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/midasserver/run.sh b/docker/midasserver/run.sh index 9e030f2..aecf5fa 100755 --- a/docker/midasserver/run.sh +++ b/docker/midasserver/run.sh @@ -14,7 +14,7 @@ SED_RE_OPT=r [ "$os" != "Darwin" ] || SED_RE_OPT=E PACKAGE_NAME=oar-pdr-py -DEFAULT_CONFIGFILE=$dockerdir/midasserver/midas-dmp_conf.yml +DEFAULT_CONFIGFILE=$dockerdir/midasserver/midas-dmpdap_conf.yml set -e From 89552a397ee635071e0e40454746dfdd4e5284c4 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 30 Nov 2022 15:38:24 -0500 Subject: [PATCH 027/123] midasserver: dap/mdsx: bug fixes (prop order, doi fmt) --- python/nistoar/midas/dap/service/mdsx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/nistoar/midas/dap/service/mdsx.py b/python/nistoar/midas/dap/service/mdsx.py index fed431b..cfb43f7 100644 --- a/python/nistoar/midas/dap/service/mdsx.py +++ b/python/nistoar/midas/dap/service/mdsx.py @@ -51,9 +51,9 @@ def _new_data_for(self, recid, meta=None): out = OrderedDict([ ("_schema", NERDM_SCH_ID), ("@context", NERDM_CONTEXT), - ("_extensionSchemas", [NERDPUB_DEF + "PublicDataResource"]) + ("_extensionSchemas", [NERDPUB_DEF + "PublicDataResource"]), ("@id", self._arkid_for(recid)), - ("@type", [":".join([NERDPUB_PRE, "PublicDataResource"]), "dcat:Resource"]), + ("@type", [":".join([NERDPUB_PRE, "PublicDataResource"]), "dcat:Resource"]) ]) if self.cfg.get('assign_doi') == ASSIGN_DOI_ALWAYS: @@ -92,7 +92,7 @@ def _doi_for(self, recid): naan = self.cfg.get('doi_naan') if not naan: raise PublishingStateException("DOI NAAN not set in configuration") - return "%s/%s" % (naan, self._aipid_for(recid)) + return "doi:%s/%s" % (naan, self._aipid_for(recid)) def _arkid_for(self, recid): return "ark:/%s/%s" % (const.ARK_NAAN, self._aipid_for(recid)) From 459e68b6d7a1a92174b54bf139631d43716d1441 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 1 Dec 2022 11:04:59 -0500 Subject: [PATCH 028/123] midasserver: fix bug when filtering out unrecognized meta values --- python/nistoar/midas/dap/service/mdsx.py | 9 ++++----- python/tests/nistoar/midas/dap/service/test_mdsx.py | 9 +++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/nistoar/midas/dap/service/mdsx.py b/python/nistoar/midas/dap/service/mdsx.py index cfb43f7..45b24ec 100644 --- a/python/nistoar/midas/dap/service/mdsx.py +++ b/python/nistoar/midas/dap/service/mdsx.py @@ -101,17 +101,16 @@ def _aipid_for(self, recid): return '-'.join(recid.split(':', 1)) def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): + # only accept expected keys + allowed = "resourceType creatorisContact contactName willUpload provideLink softwareLink assocPageType".split() + mdata = OrderedDict([p for p in mdata.items() if p[0] in allowed]) + out = super()._moderate_metadata(mdata, shoulder) if isinstance(out.get('creatorisContact'), str): out['creatorisContact'] = out['creatorisContact'].lower() == "true" elif out.get('creatorisContact') is None: out['creatorisContact'] = true - allowed = "resourceType creatorisContact contactName willUpload provideLink softwareLink assocPageType".split() - for key in out: - if key not in allowed: - del out[key] - return out def _new_metadata_for(self, shoulder=None): diff --git a/python/tests/nistoar/midas/dap/service/test_mdsx.py b/python/tests/nistoar/midas/dap/service/test_mdsx.py index 9befc88..03ccfe0 100644 --- a/python/tests/nistoar/midas/dap/service/test_mdsx.py +++ b/python/tests/nistoar/midas/dap/service/test_mdsx.py @@ -77,7 +77,7 @@ def test_create_record(self): self.assertEqual(prec.owner, "nstr1") for key in "_schema @context _extensionSchemas".split(): self.assertIn(key, prec.data) - self.assertEqual(prec.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec.data['doi'], "doi:88888/mdsx-0003") self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") self.assertTrue(self.broker.dbcli.name_exists("goob")) @@ -85,7 +85,7 @@ def test_create_record(self): self.assertEqual(prec2.name, "goob") self.assertEqual(prec2.id, "mdsx:0003") self.assertEqual(prec2.data['@id'], "ark:/88434/mdsx-0003") - self.assertEqual(prec2.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec2.data['doi'], "doi:88888/mdsx-0003") self.assertEqual(prec2.meta, {"creatorisContact": True, "resourceType": "data"}) self.assertEqual(prec2.owner, "nstr1") @@ -97,14 +97,15 @@ def test_create_record_withdata(self): self.assertTrue(not self.broker.dbcli.name_exists("gurn")) prec = self.broker.create_record("gurn", {"color": "red"}, - {"temper": "dark", "creatorisContact": "goob"}) + {"temper": "dark", "creatorisContact": "goob", + "softwarelink": "http://..." }) # misspelled key self.assertEqual(prec.name, "gurn") self.assertEqual(prec.id, "mdsx:0003") self.assertEqual(prec.meta, {"creatorisContact": False, "resourceType": "data"}) for key in "_schema @context _extensionSchemas".split(): self.assertIn(key, prec.data) self.assertEqual(prec.data['color'], "red") - self.assertEqual(prec.data['doi'], "88888/mdsx-0003") + self.assertEqual(prec.data['doi'], "doi:88888/mdsx-0003") self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") From 2109b988cb435a5952e2eb06c599c999dd68cd86 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 2 Dec 2022 06:44:47 -0500 Subject: [PATCH 029/123] move pdr.draft.nerdstore tests to midas.dap --- .../nistoar/{pdr/draft => midas/dap}/nerdstore/test_fsbased.py | 0 .../nistoar/{pdr/draft => midas/dap}/nerdstore/test_inmem.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename python/tests/nistoar/{pdr/draft => midas/dap}/nerdstore/test_fsbased.py (100%) rename python/tests/nistoar/{pdr/draft => midas/dap}/nerdstore/test_inmem.py (100%) diff --git a/python/tests/nistoar/pdr/draft/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py similarity index 100% rename from python/tests/nistoar/pdr/draft/nerdstore/test_fsbased.py rename to python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py diff --git a/python/tests/nistoar/pdr/draft/nerdstore/test_inmem.py b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py similarity index 100% rename from python/tests/nistoar/pdr/draft/nerdstore/test_inmem.py rename to python/tests/nistoar/midas/dap/nerdstore/test_inmem.py From 4eaa1bb85b77f2040191cd02e0dcf39103242d00 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 2 Dec 2022 07:59:21 -0500 Subject: [PATCH 030/123] complete migration of pdr.draft to midas.dap: fix tests, rm draft pkg --- python/nistoar/midas/dap/nerdstore/inmem.py | 2 +- python/nistoar/pdr/draft/__init__.py | 31 ------------------- .../midas/dap/nerdstore/test_fsbased.py | 24 ++++++++++---- .../nistoar/midas/dap/nerdstore/test_inmem.py | 9 ++++-- 4 files changed, 25 insertions(+), 41 deletions(-) delete mode 100644 python/nistoar/pdr/draft/__init__.py diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index bc403e5..b6d7805 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -48,7 +48,7 @@ def _load_data(self, items): if not itm.get('@id'): itm = copy.deepcopy(itm) itm['@id'] = self._get_default_id_for(itm) - self._data[itm['@id']] = itm + self._data[itm['@id']] = itm self._order.append(itm['@id']) def _new_id(self): diff --git a/python/nistoar/pdr/draft/__init__.py b/python/nistoar/pdr/draft/__init__.py deleted file mode 100644 index 4d36373..0000000 --- a/python/nistoar/pdr/draft/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Provide services and tools for drafting a submission to a PDR Publishing Service. - -A PDR Drafting Service differs from a Publishing Service (from the :py:mod:`~nistoar.pdr.publish` -package) in that the former allows for more user-driven interactivity to assemble and re-arrange a -submission information package (SIP). It can interact with a file management service where data -files are assembled, and its workflow includes processing the submission via a review service. -Because of its more complicated workflow and interactivity, it manages more state about the -submission. In summary, a Drafting Service is intended to be used by a user-driven (GUI) client, -while the client of the publishing service is expected to be an automated system. When a user -has completed assembling their submission and the submission has completed the review process, the -Drafting Service will submit the SIP to a Publishing Service. - -There can be different flavors of the Drafting service supported in this module to support -different interaction models or conventions or evolutions of the interface (i.e. inteface -versions). The default flavor is targeted for the MIDAS 3 client. The different flavors are -implemented within the :py:mod`service` subpackage. - -This package draws on some of the infrastructure the :py:mod:`~nistoar.pdr.publish` package, -including :py:mod:`provenence tracking` and -:py:mod:`README generation`. -""" -from ..publish import prov, readme - -# subpackages: -# nerdstore -# service -# filemgr -# review - - diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py index e119f0d..fd8a8a9 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py @@ -2,11 +2,11 @@ from pathlib import Path import unittest as test -import nistoar.pdr.draft.nerdstore as ns -from nistoar.pdr.draft.nerdstore import fsbased, inmem +import nistoar.midas.dap.nerdstore as ns +from nistoar.midas.dap.nerdstore import fsbased, inmem from nistoar.pdr.utils import read_json, write_json -testdir = Path(__file__).parents[2] / 'preserve' / 'data' / 'simplesip' +testdir = Path(__file__).parents[3] / 'pdr' / 'preserve' / 'data' / 'simplesip' sipnerd = testdir / '_nerdm.json' def load_simple(): @@ -336,6 +336,9 @@ def test_ctor(self): def test_load_references(self): nerd = load_simple() + for r in nerd.get('references', []): + if '@id' in r: + del r['@id'] self.refs.load_references(nerd['references']) self.assertEqual(self.refs._order, "ref_0".split()) @@ -356,15 +359,15 @@ def test_contains(self): nerd = load_simple() self.refs.load_references(nerd['references']) - self.assertIn("ref_0", self.refs) - self.assertNotIn("ref_2", self.refs) + self.assertIn("pdr:ref/doi:10.1364/OE.24.014100", self.refs) + self.assertNotIn("ref_0", self.refs) def test_getsetpop(self): nerd = load_simple() self.refs.load_references(nerd['references']) # test access by id or position - ref = self.refs.get("ref_0") + ref = self.refs.get("pdr:ref/doi:10.1364/OE.24.014100") self.assertEqual(ref['refType'], "IsReferencedBy") # add a reference @@ -372,8 +375,17 @@ def test_getsetpop(self): self.refs.append(ref) ref = self.refs.get(-1) self.assertEqual(ref['refType'], "IsSupplementTo") + self.assertEqual(ref['@id'], "ref_0") + self.assertEqual(self.refs.get(0)['refType'], "IsReferencedBy") + + # and another + ref['refType'] = "Documents" + self.refs.append(ref) + ref = self.refs.get(-1) + self.assertEqual(ref['refType'], "Documents") self.assertEqual(ref['@id'], "ref_1") self.assertEqual(self.refs.get(0)['refType'], "IsReferencedBy") + self.assertEqual(self.refs.get(1)['refType'], "IsSupplementTo") class TestFSBasedNonFileComps(test.TestCase): diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py index ea98986..08cf4a5 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py @@ -2,11 +2,11 @@ from pathlib import Path import unittest as test -import nistoar.pdr.draft.nerdstore as ns -from nistoar.pdr.draft.nerdstore import inmem +import nistoar.midas.dap.nerdstore as ns +from nistoar.midas.dap.nerdstore import inmem from nistoar.pdr.utils import read_json -testdir = Path(__file__).parents[2] / 'preserve' / 'data' / 'simplesip' +testdir = Path(__file__).parents[3] / 'pdr' / 'preserve' / 'data' / 'simplesip' sipnerd = testdir / '_nerdm.json' def load_simple(): @@ -722,6 +722,9 @@ class TestInMemoryRefList(test.TestCase): def setUp(self): nerd = load_simple() + for r in nerd.get('references', []): + if '@id' in r: + del r['@id'] self.refs = inmem.InMemoryRefList(nerd, nerd['references']) def test_ctor(self): From b8d90d33a2c0f3a312645830bbef1253c971f640 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sun, 4 Dec 2022 12:49:32 -0500 Subject: [PATCH 031/123] migrate dbio.wsgi.broker to dbio.project; ProjectRecordBroker -> ProjectService --- python/nistoar/midas/dbio/__init__.py | 5 +- python/nistoar/midas/dbio/base.py | 13 +- python/nistoar/midas/dbio/inmem.py | 2 +- .../midas/dbio/{wsgi/broker.py => project.py} | 129 ++++++++-- python/nistoar/midas/dbio/wsgi/project.py | 116 ++++----- .../tests/nistoar/midas/dbio/test_project.py | 237 ++++++++++++++++++ .../nistoar/midas/dbio/wsgi/test_broker.py | 198 --------------- .../nistoar/midas/dbio/wsgi/test_project.py | 40 +-- 8 files changed, 426 insertions(+), 314 deletions(-) rename python/nistoar/midas/dbio/{wsgi/broker.py => project.py} (72%) create mode 100644 python/tests/nistoar/midas/dbio/test_project.py delete mode 100644 python/tests/nistoar/midas/dbio/wsgi/test_broker.py diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index a51f3d7..20aca77 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -46,9 +46,9 @@ The database is made up of various *collections* to hold the different types of records. (The nature of the collections depends on the implementation of the database backend; for example, if the backend is an SQL relational database, then a collection would be represented by a table or interlinked tables.) -In particular, each *key* (or *project*) record type (dmp or draft) has its own collection associated +In particular, each *key* (or *project*) record type (dmp or dap) has its own collection associated with it; these collections have logical names (accessible via ``dbio.DMP_PROJECTS`` and -``dbio.DRAFT_PROJECTS``). Other collections are supported as well, including one that tracks +``dbio.DAP_PROJECTS``). Other collections are supported as well, including one that tracks user-defined user groups and another capturing people that can servce as authors or collaborators in a project. @@ -203,3 +203,4 @@ MIDASDBClientFactory = MongoDBClientFactory +from .project import ProjectService, ProjectServiceFactory diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index af6c545..4bc5e4c 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -20,10 +20,11 @@ from nistoar.base.config import ConfigurationException from .. import MIDASException -DRAFT_PROJECTS = "draft" -DMP_PROJECTS = "dmp" -GROUPS_COLL = "groups" -PEOPLE_COLL = "people" +DAP_PROJECTS = "dap" +DMP_PROJECTS = "dmp" +GROUPS_COLL = "groups" +PEOPLE_COLL = "people" +DRAFT_PROJECTS = "draft" # this name is deprecated DEF_PEOPLE_SHOULDER = "ppl0" DEF_GROUPS_SHOULDER = "grp0" @@ -32,7 +33,7 @@ ANONYMOUS = PUBLIC_GROUP __all__ = ["DBClient", "DBClientFactory", "ProjectRecord", "DBGroups", "Group", "ACLs", "PUBLIC_GROUP", - "ANONYMOUS", "DRAFT_PROJECTS", "DMP_PROJECTS", "ObjectNotFound", "NotAuthorized", "AlreadyExists"] + "ANONYMOUS", "DAP_PROJECTS", "DMP_PROJECTS", "ObjectNotFound", "NotAuthorized", "AlreadyExists"] Permissions = Union[str, Sequence[str], AbstractSet[str]] @@ -986,7 +987,7 @@ def create_client(self, servicetype: str, config: Mapping={}, foruser: str = ANO # connect to the DMP collection client = dbio.MIDASDBClientFactory(configdata).create_client(dbio.DMP_PROJECTS, config, userid) - :param str servicetype: the service data desired. The value should be one of ``DRAFT_PROJECTS`` + :param str servicetype: the service data desired. The value should be one of ``DAP_PROJECTS`` or ``DMP_PROJECTS`` :param Mapping config: the configuration to pass into the client. This will be merged into and override the configuration provided to the factory at construction time. diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index a0b1aee..dfd2bd2 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -88,7 +88,7 @@ def __init__(self, config: Mapping, _dbdata = None): """ super(InMemoryDBClientFactory, self).__init__(config) self._db = { - base.DRAFT_PROJECTS: {}, + base.DAP_PROJECTS: {}, base.DMP_PROJECTS: {}, base.GROUPS_COLL: {}, base.PEOPLE_COLL: {}, diff --git a/python/nistoar/midas/dbio/wsgi/broker.py b/python/nistoar/midas/dbio/project.py similarity index 72% rename from python/nistoar/midas/dbio/wsgi/broker.py rename to python/nistoar/midas/dbio/project.py index f1e48d1..3b3edd8 100644 --- a/python/nistoar/midas/dbio/wsgi/broker.py +++ b/python/nistoar/midas/dbio/project.py @@ -1,47 +1,86 @@ """ -a module providing the :py:class:`ProjectRecordBroker` class, a base for classes that hold the business -logic for creating and updating MIDAS DBIO project records. `ProjectRecordBroker` classes mediate -between a RESTful web interface and the :py:module:`~nistoar.midas.dbio` layer. Broker classes -can be subclassed to provide specialized logic for a particular project record type (e.g. DMP, -EDI draft). +a module providing a service for creating and manipulating MIDAS _projects_ stored in a DBIO +backend. + +A _project_ represents a draft description of a digital asset stored in the MIDAS database; it +is represented by a _project record_ that is compliant with the MIDAS Common Database project +data model. Different project types include DMP and Digital Asset Publication (DAP). This +module provides a base service class for manipulating such records. It is intended to be +subclassed to handle the creation of the different types of projects and conventions, policies, +and interaction models for manipulating them. """ -from logging import Logger +from logging import Logger, getLogger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence -from .. import DBClient, ProjectRecord -from ..base import AlreadyExists, NotAuthorized, ObjectNotFound -from ... import MIDASException +from .base import DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ObjectNotFound +from .. import MIDASException, MIDASSystem from nistoar.pdr.publish.prov import PubAgent - -class ProjectRecordBroker: +class ProjectService(MIDASSystem): """ - A base class for handling requests to create, access, or update a project record. This generic + A base class for a service to create, access, or update a project. This generic base can be used as is or extended and overridden to specialize the business logic for updating - a particular type of project. + a particular type of project under particular conventions or policies. The service is attached + to a particular user at construction time (as given by a :py:class:`~nistoar.pdr.publish.prov.PubAgent` + instance); thus, requests to this service are subject to internal Authorization checks. + + This base service supports a single parameter, ``clients``, that places restrictions on the + creation of records based on which group the user is part of. The value is an object whose keys + are user group name that are authorized to use this service, and whose values are themselves objects + that restrict the requests by that user group; for example: + + .. code-block:: + + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + } + + The special group name "default" will (if present) be applied to users whose group does not match + any of the other names. If not present the user will not be allowed to create new records. + + This implementation only supports one parameter as part of the group configuration: ``default_shoulder``. + This parameter gives the identifier shoulder that should be used the identifier for a new record + created under the user group. Subclasses of this service class may support other parameters. """ - def __init__(self, dbclient: DBClient, config: Mapping={}, who: PubAgent=None, - wsgienv: dict=None, log: Logger=None): + def __init__(self, project_type: str, dbclient_factory: DBClient, config: Mapping={}, + who: PubAgent=None, log: Logger=None): """ - create a request handler + create the service + :param str project_type: the project data type desired. This name is usually used as the + name of the collection in the backend database. Recognized values + include ``dbio.DAP_PROJECTS`` and ``dbio.DMP_PROJECTS`` :param DBClient dbclient: the DBIO client instance to use to access and save project records :param dict config: the handler configuration tuned for the current type of project - :param dict wsgienv: the WSGI request context + :param who PubAgent: the representation of the user that is requesting access :param Logger log: the logger to use for log messages """ - self.dbcli = dbclient + super(ProjectService, self).__init__("DBIO Project Service", "DBIO") self.cfg = config if not who: - who = PubAgent("unkwn", prov.PubAgent.USER, self.dbcli.user_id or "anonymous") + who = PubAgent("unkwn", prov.PubAgent.USER, "anonymous") self.who = who - if wsgienv is None: - wsgienv = {} - self.env = wsgienv + if not log: + log = getLogger(self.system_abbrev).getChild(self.subsystem_abbrev).getChild(project_type) self.log = log - def create_record(self, name, data=None, meta=None): + user = who.actor if who else None + self.dbcli = dbclient_factory.create_client(project_type, self.cfg.get("dbio", {}), user) + + @property + def user(self) -> PubAgent: + """ + the PubAgent instance representing the user that this service acts on behalf of. + """ + return self.who + + def create_record(self, name, data=None, meta=None) -> ProjectRecord: """ create a new project record with the given name. An ID will be assigned to the new record. :param str name: the mnuemonic name to assign to the record. This name cannot match that @@ -92,7 +131,7 @@ def _get_id_shoulder(self, user: PubAgent): "No default shoulder defined for client group, "+user.group) return out - def get_record(self, id): + def get_record(self, id) -> ProjectRecord: """ fetch the project record having the given identifier :raises ObjectNotFound: if a record with that ID does not exist @@ -317,6 +356,48 @@ def _validate_data(self, data): pass +class ProjectServiceFactory: + """ + a factory object that creates ProjectService instances attached to the backend DB implementation + and which acts on behalf of a specific user. + + As this is a concrete class, it can be instantiated directly to produce generic ProjectService + instances but serving a particular project type. Instances are also attached ot a particular + DB backend by virtue of the DBClientFactory instance that is passed in at factory construction + time. + + The configuration provided to this factory will be passed directly to the service instances + it creates. In addition parameters supported by the :py:class:`ProjectService` + (i.e. ``clients``), the configuration can also include a ``dbio`` parameter. + If provided, its value will be used when creating a DBClient to talk to the DB backend (see + :py:class:`~nistoar.midas.dbio.base.DBClientFactory` for details). Subclasses of this factory + class may support additional parameters. + """ + def __init__(self, project_type: str, dbclient_factory: DBClientFactory, config: Mapping={}, + log: Logger=None): + """ + create a service factory associated with a particulr DB backend. + :param str project_type: the project data type desired. This name is usually used as the + name of the collection in the backend database. Recognized values + include ``dbio.DAP_PROJECTS`` and ``dbio.DMP_PROJECTS`` + :param DBClientFactory dbclient_factory: the factory instance to use to create a DBClient to + talk to the DB backend. + :param Mapping config: the configuration for the service (see class-level documentation). + :param Logger log: the Logger to use in the service. + """ + self._dbclifact = dbclient_factory + self._prjtype = project_type + self._cfg = config + self._log = log + + def create_service_for(self, who: PubAgent=None): + """ + create a service that acts on behalf of a specific user. + :param PubAgent who: the user that wants access to a project + """ + return ProjectService(self._prjtype, self._dbclifact, self._cfg, who, self._log) + + class InvalidUpdate(MIDASException): """ an exception indicating that the user-provided data is invalid or otherwise would result in diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 9c7d547..1a6c387 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -19,26 +19,18 @@ from nistoar.pdr.publish.prov import PubAgent from nistoar.pdr.utils.webrecord import WebRecorder from ... import dbio -from ...dbio import ProjectRecord, DBClientFactory +from ...dbio import ProjectRecord, ProjectService, ProjectServiceFactory from .base import DBIOHandler -from .broker import ProjectRecordBroker class MIDASProjectApp(SubApp): """ a base web app for an interface handling project record """ - def_project_broker_class = ProjectRecordBroker - def __init__(self, projname, log: Logger, dbcli_factory: DBClientFactory, - config: dict={}, project_broker_cls=None): - super(MIDASProjectApp, self).__init__(projname, log, config) - - ## create dbio client from config - if not project_broker_cls: - project_broker_cls = self.def_project_broker_class - self._prjbrkr_cls = project_broker_cls - self._dbfact = dbcli_factory + def __init__(self, service_factory: ProjectServiceFactory, log: Logger, config: dict={}): + super(MIDASProjectApp, self).__init__(service_factory._prjtype, log, config) + self.svcfact = service_factory def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: """ @@ -51,9 +43,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge :param PubAgent who the authenticated user agent making the request """ - # set up dbio client and the request handler that will mediate with it - dbcli = self._dbfact.create_client(self._name, self.cfg.get('dbio'), who.actor) - pbroker = self._prjbrkr_cls(dbcli, self.cfg.get('broker'), who, env, self.log) + # create a service on attached to the user + service = self.svcfact.create_service_for(who) # now parse the requested path; we have different handlers for different types of paths path = path.strip('/') @@ -61,42 +52,41 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge if len(idattrpart) < 2: if not idattrpart[0]: # path is empty: this is used to list all available projects or create a new one - return ProjectSelectionHandler(pbroker, self, env, start_resp, who) + return ProjectSelectionHandler(service, self, env, start_resp, who) else: # path is just an ID: - return ProjectHandler(pbroker, self, env, start_resp, who, idattrpart[0]) + return ProjectHandler(service, self, env, start_resp, who, idattrpart[0]) elif idattrpart[1] == "name": # path=ID/name: get/change the mnumonic name of record ID - return ProjectNameHandler(pbroker, self, env, start_resp, who, idattrpart[0]) + return ProjectNameHandler(service, self, env, start_resp, who, idattrpart[0]) elif idattrpart[1] == "data": # path=ID/data[/...]: get/change the content of record ID if len(idattrpart) == 2: idattrpart.append("") - return ProjectDataHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[2]) + return ProjectDataHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) elif idattrpart[1] == "acls": # path=ID/acls: get/update the access control on record ID if len(idattrpart) < 3: idattrpart.append("") - return ProjectACLsHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[2]) + return ProjectACLsHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) # the fallback handler will return some arbitrary part of the record if len(idattrpart) > 2: idattrpart[1] = "/".join(idattrpart[1:]) - return ProjectInfoHandler(pbroker, self, env, start_resp, who, idattrpart[0], idattrpart[1]) + return ProjectInfoHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[1]) class ProjectRecordHandler(DBIOHandler): """ - base handler class for all requests on project records. This base allows requests to be funneled - through a :py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` instance. + base handler class for all requests on project records. """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, path: str="", config: dict=None, log: Logger=None): """ Initialize this handler with the request particulars. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -111,23 +101,23 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectRecordHandler, self).__init__(subapp, broker.dbcli, wsgienv, start_resp, who, + super(ProjectRecordHandler, self).__init__(subapp, service.dbcli, wsgienv, start_resp, who, path, config, log) - self._pbrkr = broker + self.svc = service class ProjectHandler(ProjectRecordHandler): """ handle access to the whole project record """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, id: str, config: dict=None, log: Logger=None): """ Initialize this handler with the request particulars. This constructor is called by the webs service SubApp. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -140,7 +130,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", config, log) + super(ProjectHandler, self).__init__(service, subapp, wsgienv, start_resp, who, "", config, log) self._id = id if not id: @@ -149,7 +139,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s def do_GET(self, path, ashead=False): try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -164,14 +154,14 @@ class ProjectInfoHandler(ProjectRecordHandler): handle retrieval of simple parts of a project record. Only GET requests are allowed via this handler. """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, id: str, attribute: str, config: dict={}, log: Logger=None): """ Initialize this handler with the request particulars. This constructor is called by the webs service SubApp. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -185,7 +175,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectInfoHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, attribute, + super(ProjectInfoHandler, self).__init__(service, subapp, wsgienv, start_resp, who, attribute, config, log) self._id = id if not id: @@ -197,7 +187,7 @@ def do_GET(self, path, ashead=False): # programming error raise ValueError("Missing ProjectRecord attribute") try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -221,14 +211,14 @@ class ProjectNameHandler(ProjectRecordHandler): handle retrieval/update of a project records mnumonic name """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, id: str, config: dict=None, log: Logger=None): """ Initialize this handler with the request particulars. This constructor is called by the webs service SubApp. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -241,7 +231,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s logger attached to the SubApp will be used. """ - super(ProjectNameHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", config, log) + super(ProjectNameHandler, self).__init__(service, subapp, wsgienv, start_resp, who, "", config, log) self._id = id if not id: @@ -250,7 +240,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s def do_GET(self, path, ashead=False): try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -266,7 +256,7 @@ def do_PUT(self, path): return self.send_fatal_error(ex) try: - prec = self._dbcli.get_record_for(self._id) + prec = self.svc.get_record(self._id) prec.name = name if not prec.authorized(dbio.ACLs.ADMIN): raise dbio.NotAuthorized(self._dbcli.user_id, "change record name") @@ -283,14 +273,14 @@ class ProjectDataHandler(ProjectRecordHandler): handle retrieval/update of a project record's data content """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, id: str, datapath: str, config: dict=None, log: Logger=None): """ Initialize this data request handler with the request particulars. This constructor is called by the webs service SubApp in charge of the project record interface. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -306,7 +296,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - super(ProjectDataHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, + super(ProjectDataHandler, self).__init__(service, subapp, wsgienv, start_resp, who, datapath, config, log) self._id = id if not id: @@ -322,7 +312,7 @@ def do_GET(self, path, ashead=False): :param bool ashead: if True, the request is actually a HEAD request for the data """ try: - out = self._pbrkr.get_data(self._id, path) + out = self.svc.get_data(self._id, path) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -340,7 +330,7 @@ def do_PUT(self, path): return self.send_fatal_error(ex) try: - data = self._pbrkr.replace_data(self._id, newdata, path) + data = self.svc.replace_data(self._id, newdata, path) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -361,7 +351,7 @@ def do_PATCH(self, path): return self.send_fatal_error(ex) try: - data = self._pbrkr.update_data(self._id, newdata, path) + data = self.svc.update_data(self._id, newdata, path) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -381,7 +371,7 @@ class ProjectSelectionHandler(ProjectRecordHandler): handle collection-level access searching for project records and creating new ones """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, config: dict=None, log: Logger=None): """ Initialize this record request handler with the request particulars. This constructor is called @@ -397,7 +387,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - super(ProjectSelectionHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, "", + super(ProjectSelectionHandler, self).__init__(service, subapp, wsgienv, start_resp, who, "", config, log) def do_GET(self, path, ashead=False): @@ -453,7 +443,7 @@ def do_POST(self, path): return self.send_error_resp(400, "Bad POST input", "No mneumonic name provided") try: - prec = self._pbrkr.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) + prec = self.svc.create_record(newdata['name'], newdata.get("data"), newdata.get("meta")) except dbio.NotAuthorized as ex: self.log.debug("Authorization failure: "+str(ex)) return self.send_unauthorized() @@ -468,14 +458,14 @@ class ProjectACLsHandler(ProjectRecordHandler): handle retrieval/update of a project record's data content """ - def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, start_resp: Callable, + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, who: PubAgent, id: str, datapath: str="", config: dict=None, log: Logger=None): """ Initialize this data request handler with the request particulars. This constructor is called by the webs service SubApp in charge of the project record interface. - :param ProjectRecordBroker broker: the ProjectRecordBroker instance to use to get and update - the project data through. + :param ProjectService service: the ProjectService instance to use to get and update + the project data. :param SubApp subapp: the web service SubApp receiving the request and calling this constructor :param dict wsgienv: the WSGI request context dictionary :param Callable start_resp: the WSGI start-response function used to send the response @@ -492,7 +482,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - super(ProjectACLsHandler, self).__init__(broker, subapp, wsgienv, start_resp, who, datapath, + super(ProjectACLsHandler, self).__init__(service, subapp, wsgienv, start_resp, who, datapath, config, log) self._id = id if not id: @@ -502,7 +492,7 @@ def __init__(self, broker: ProjectRecordBroker, subapp: SubApp, wsgienv: dict, s def do_GET(self, path, ashead=False): try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -551,7 +541,7 @@ def do_POST(self, path): # TODO: ensure input value is a bona fide user or group name try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -590,7 +580,7 @@ def do_PUT(self, path): # TODO: ensure input value is a bona fide user or group name try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -636,7 +626,7 @@ def do_PATCH(self, path): # TODO: ensure input value is a bona fide user or group name try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: @@ -672,7 +662,7 @@ def do_DELETE(self, path): # retrieve the record try: - prec = self._pbrkr.get_record(self._id) + prec = self.svc.get_record(self._id) except dbio.NotAuthorized as ex: return self.send_unauthorized() except dbio.ObjectNotFound as ex: diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py new file mode 100644 index 0000000..11674ef --- /dev/null +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -0,0 +1,237 @@ +import os, json, pdb, logging, tempfile +import unittest as test + +from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio import project +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_project.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestProjectService(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + }, + "dbio": { + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + } + self.fact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) + + def create_service(self, request=None): + self.project = project.ProjectService(base.DMP_PROJECTS, self.fact, self.cfg, nistr, + rootlog.getChild("project")) + return self.project + + def test_ctor(self): + self.create_service() + self.assertTrue(self.project.dbcli) + self.assertEqual(self.project.cfg, self.cfg) + self.assertEqual(self.project.who.actor, "nstr1") + self.assertEqual(self.project.who.group, "midas") + self.assertTrue(self.project.log) + + def test_get_id_shoulder(self): + self.create_service() + self.assertEqual(self.project._get_id_shoulder(nistr), "mdm1") + + usr = prov.PubAgent("malware", prov.PubAgent.USER, "nstr1") + self.assertEqual(self.project._get_id_shoulder(usr), "mdm0") + + del self.cfg['clients']['default']['default_shoulder'] + self.create_service() + with self.assertRaises(project.NotAuthorized): + self.project._get_id_shoulder(usr) + del self.cfg['clients']['default'] + self.create_service() + with self.assertRaises(project.NotAuthorized): + self.project._get_id_shoulder(usr) + + self.assertEqual(self.project._get_id_shoulder(nistr), "mdm1") + + def test_extract_data_part(self): + data = {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}} + self.create_service() + self.assertEqual(self.project._extract_data_part(data, "color"), "red") + self.assertEqual(self.project._extract_data_part(data, "pos"), + {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}) + self.assertEqual(self.project._extract_data_part(data, "pos/vec"), [22, 11, 0]) + self.assertEqual(self.project._extract_data_part(data, "pos/y"), 12) + self.assertEqual(self.project._extract_data_part(data, "pos/desc/a"), 1) + with self.assertRaises(project.ObjectNotFound): + self.project._extract_data_part(data, "pos/desc/b") + + + def test_create_record(self): + self.create_service() + self.assertTrue(not self.project.dbcli.name_exists("goob")) + + prec = self.project.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {}) + self.assertEqual(prec.meta, {}) + self.assertEqual(prec.owner, "nstr1") + + self.assertTrue(self.project.dbcli.name_exists("goob")) + prec2 = self.project.get_record(prec.id) + self.assertEqual(prec2.name, "goob") + self.assertEqual(prec2.id, "mdm1:0003") + self.assertEqual(prec2.data, {}) + self.assertEqual(prec2.meta, {}) + self.assertEqual(prec2.owner, "nstr1") + + with self.assertRaises(project.AlreadyExists): + self.project.create_record("goob") + + def test_create_record_withdata(self): + self.create_service() + self.assertTrue(not self.project.dbcli.name_exists("gurn")) + + prec = self.project.create_record("gurn", {"color": "red"}, {"temper": "dark"}) + self.assertEqual(prec.name, "gurn") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {"color": "red"}) + self.assertEqual(prec.meta, {"temper": "dark"}) + + def test_get_data(self): + self.create_service() + self.assertTrue(not self.project.dbcli.name_exists("gurn")) + prec = self.project.create_record("gurn", {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) + self.assertTrue(self.project.dbcli.name_exists("gurn")) + + self.assertEqual(self.project.get_data(prec.id), + {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) + self.assertEqual(self.project.get_data(prec.id, "color"), "red") + self.assertEqual(self.project.get_data(prec.id, "pos"), {"x": 23, "y": 12, "desc": {"a": 1}}) + self.assertEqual(self.project.get_data(prec.id, "pos/desc"), {"a": 1}) + self.assertEqual(self.project.get_data(prec.id, "pos/desc/a"), 1) + + with self.assertRaises(project.ObjectNotFound): + self.project.get_data(prec.id, "pos/desc/b") + with self.assertRaises(project.ObjectNotFound): + self.project.get_data("goober") + + + + def test_update_replace_data(self): + self.create_service() + self.assertTrue(not self.project.dbcli.name_exists("goob")) + + prec = self.project.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdm1:0003") + self.assertEqual(prec.data, {}) + self.assertEqual(prec.meta, {}) + + data = self.project.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + + data = self.project.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") + self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "B"}}) + + data = self.project.update_data(prec.id, "C", "pos/grid") + self.assertEqual(data, "C") + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "C"}}) + + # replace + data = self.project.replace_data(prec.id, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + self.assertEqual(data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + # update again + data = self.project.update_data(prec.id, "blue", "color") + self.assertEqual(data, "blue") + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "blue", "pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + with self.assertRaises(project.PartNotAccessible): + self.project.update_data(prec.id, 2, "pos/vec/x") + + +class TestProjectServiceFactory(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + }, + "dbio": { + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + } + + self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) + self.fact = project.ProjectServiceFactory("dmp", self.dbfact, self.cfg) + + def test_ctor(self): + self.assertEqual(self.fact._prjtype, "dmp") + self.assertTrue(self.fact._dbclifact) + self.assertIn("dbio", self.fact._cfg) + self.assertIsNone(self.fact._log) + + def test_create_service_for(self): + svc = self.fact.create_service_for(nistr) + + self.assertEqual(svc.cfg, self.cfg) + self.assertTrue(svc.dbcli) + self.assertEqual(svc.dbcli._cfg, self.cfg["dbio"]) + self.assertEqual(svc.who.actor, "nstr1") + self.assertEqual(svc.who.group, "midas") + self.assertTrue(svc.log) + + prec = svc.create_record("goob") + self.assertEqual(prec._coll, "dmp") + + + + + + +if __name__ == '__main__': + test.main() + + + + diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py b/python/tests/nistoar/midas/dbio/wsgi/test_broker.py deleted file mode 100644 index 501b0f9..0000000 --- a/python/tests/nistoar/midas/dbio/wsgi/test_broker.py +++ /dev/null @@ -1,198 +0,0 @@ -import os, json, pdb, logging, tempfile -import unittest as test - -from nistoar.midas.dbio import inmem, base -from nistoar.midas.dbio.wsgi import broker -from nistoar.pdr.publish import prov - -tmpdir = tempfile.TemporaryDirectory(prefix="_test_broker.") -loghdlr = None -rootlog = None -def setUpModule(): - global loghdlr - global rootlog - rootlog = logging.getLogger() - loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) - loghdlr.setLevel(logging.DEBUG) - rootlog.addHandler(loghdlr) - -def tearDownModule(): - global loghdlr - if loghdlr: - if rootlog: - rootlog.removeHandler(loghdlr) - loghdlr.flush() - loghdlr.close() - loghdlr = None - tmpdir.cleanup() - -nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") - -class TestProjectRecordBroker(test.TestCase): - - def setUp(self): - self.cfg = { - "clients": { - "midas": { - "default_shoulder": "mdm1" - }, - "default": { - "default_shoulder": "mdm0" - } - }, - "allowed_project_shoulders": ["mdm1", "spc1"], - "default_shoulder": "mdm0" - } - self.fact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) - self.dbcli = self.fact.create_client(base.DMP_PROJECTS, self.cfg, nistr.actor) - self.resp = [] - - def create_broker(self, request=None): - self.resp = [] - if not request: - request = {'REQUEST_METHOD': 'GRUB'} - self.broker = broker.ProjectRecordBroker(self.dbcli, self.cfg, nistr, request, - rootlog.getChild("broker")) - return self.broker - - def test_ctor(self): - self.create_broker() - self.assertTrue(self.broker.dbcli) - self.assertEqual(self.broker.cfg, self.cfg) - self.assertEqual(self.broker.who.actor, "nstr1") - self.assertEqual(self.broker.who.group, "midas") - self.assertEqual(self.broker.env, {'REQUEST_METHOD': 'GRUB'}) - self.assertTrue(self.broker.log) - - def test_get_id_shoulder(self): - self.create_broker() - self.assertEqual(self.broker._get_id_shoulder(nistr), "mdm1") - - usr = prov.PubAgent("malware", prov.PubAgent.USER, "nstr1") - self.assertEqual(self.broker._get_id_shoulder(usr), "mdm0") - - del self.cfg['clients']['default']['default_shoulder'] - self.create_broker() - with self.assertRaises(broker.NotAuthorized): - self.broker._get_id_shoulder(usr) - del self.cfg['clients']['default'] - self.create_broker() - with self.assertRaises(broker.NotAuthorized): - self.broker._get_id_shoulder(usr) - - self.assertEqual(self.broker._get_id_shoulder(nistr), "mdm1") - - def test_extract_data_part(self): - data = {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}} - self.create_broker() - self.assertEqual(self.broker._extract_data_part(data, "color"), "red") - self.assertEqual(self.broker._extract_data_part(data, "pos"), - {"x": 23, "y": 12, "grid": "A", "vec": [22, 11, 0], "desc": {"a": 1}}) - self.assertEqual(self.broker._extract_data_part(data, "pos/vec"), [22, 11, 0]) - self.assertEqual(self.broker._extract_data_part(data, "pos/y"), 12) - self.assertEqual(self.broker._extract_data_part(data, "pos/desc/a"), 1) - with self.assertRaises(broker.ObjectNotFound): - self.broker._extract_data_part(data, "pos/desc/b") - - - def test_create_record(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("goob")) - - prec = self.broker.create_record("goob") - self.assertEqual(prec.name, "goob") - self.assertEqual(prec.id, "mdm1:0003") - self.assertEqual(prec.data, {}) - self.assertEqual(prec.meta, {}) - self.assertEqual(prec.owner, "nstr1") - - self.assertTrue(self.broker.dbcli.name_exists("goob")) - prec2 = self.broker.get_record(prec.id) - self.assertEqual(prec2.name, "goob") - self.assertEqual(prec2.id, "mdm1:0003") - self.assertEqual(prec2.data, {}) - self.assertEqual(prec2.meta, {}) - self.assertEqual(prec2.owner, "nstr1") - - with self.assertRaises(broker.AlreadyExists): - self.broker.create_record("goob") - - def test_create_record_withdata(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("gurn")) - - prec = self.broker.create_record("gurn", {"color": "red"}, {"temper": "dark"}) - self.assertEqual(prec.name, "gurn") - self.assertEqual(prec.id, "mdm1:0003") - self.assertEqual(prec.data, {"color": "red"}) - self.assertEqual(prec.meta, {"temper": "dark"}) - - def test_get_data(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("gurn")) - prec = self.broker.create_record("gurn", {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) - self.assertTrue(self.broker.dbcli.name_exists("gurn")) - - self.assertEqual(self.broker.get_data(prec.id), - {"color": "red", "pos": {"x": 23, "y": 12, "desc": {"a": 1}}}) - self.assertEqual(self.broker.get_data(prec.id, "color"), "red") - self.assertEqual(self.broker.get_data(prec.id, "pos"), {"x": 23, "y": 12, "desc": {"a": 1}}) - self.assertEqual(self.broker.get_data(prec.id, "pos/desc"), {"a": 1}) - self.assertEqual(self.broker.get_data(prec.id, "pos/desc/a"), 1) - - with self.assertRaises(broker.ObjectNotFound): - self.broker.get_data(prec.id, "pos/desc/b") - with self.assertRaises(broker.ObjectNotFound): - self.broker.get_data("goober") - - - - def test_update_replace_data(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("goob")) - - prec = self.broker.create_record("goob") - self.assertEqual(prec.name, "goob") - self.assertEqual(prec.id, "mdm1:0003") - self.assertEqual(prec.data, {}) - self.assertEqual(prec.meta, {}) - - data = self.broker.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) - self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) - prec = self.broker.get_record(prec.id) - self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) - - data = self.broker.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") - self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) - prec = self.broker.get_record(prec.id) - self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "B"}}) - - data = self.broker.update_data(prec.id, "C", "pos/grid") - self.assertEqual(data, "C") - prec = self.broker.get_record(prec.id) - self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "C"}}) - - # replace - data = self.broker.replace_data(prec.id, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) - self.assertEqual(data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) - prec = self.broker.get_record(prec.id) - self.assertEqual(prec.data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) - - # update again - data = self.broker.update_data(prec.id, "blue", "color") - self.assertEqual(data, "blue") - prec = self.broker.get_record(prec.id) - self.assertEqual(prec.data, {"color": "blue", "pos": {"vec": [15, 22, 1], "grid": "Z"}}) - - with self.assertRaises(broker.PartNotAccessible): - self.broker.update_data(prec.id, 2, "pos/vec/x") - - - - -if __name__ == '__main__': - test.main() - - - - diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index f9728e0..2a489ef 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -45,14 +45,12 @@ def tostr(self, resplist): def setUp(self): self.cfg = { - "broker": { - "clients": { - "midas": { - "default_shoulder": "mdm1" - }, - "default": { - "default_shoulder": "mdm0" - } + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" } }, "dbio": { @@ -62,7 +60,9 @@ def setUp(self): } } self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) - self.app = prj.MIDASProjectApp(base.DMP_PROJECTS, rootlog.getChild("dmpapi"), self.dbfact, self.cfg) + self.svcfact = prj.ProjectServiceFactory(base.DMP_PROJECTS, self.dbfact, self.cfg, + rootlog.getChild("midas.prj")) + self.app = prj.MIDASProjectApp(self.svcfact, rootlog.getChild("dmpapi")) self.resp = [] self.rootpath = "/midas/dmp/" @@ -85,7 +85,7 @@ def test_create_handler_name(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectNameHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -178,7 +178,7 @@ def test_create_handler_full(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -189,7 +189,7 @@ def test_create_handler_full(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -258,7 +258,7 @@ def test_create(self): req['wsgi.input'] = StringIO(json.dumps({"data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("400 ", self.resp[0]) @@ -267,7 +267,7 @@ def test_create(self): req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("201 ", self.resp[0]) @@ -287,7 +287,7 @@ def test_search(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("200 ", self.resp[0]) @@ -340,7 +340,7 @@ def test_getput_data(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -411,7 +411,7 @@ def test_create_handler_datapart(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "authors") self.assertEqual(hdlr._id, "pdr0:0012") @@ -423,7 +423,7 @@ def test_create_handler_acls(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -460,7 +460,7 @@ def test_getupd_aclsperm(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "read") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -528,7 +528,7 @@ def test_getdel_aclspermmem(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, self.cfg) + self.assertEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "write/hank") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() From c7bbe2f1064e7f848af7d88c9c4f0a5325f81d0d Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 5 Dec 2022 09:17:57 -0500 Subject: [PATCH 032/123] move top-level midas app from midas.dbio.wsgi.wsgiapp to midas.wsgi and fix up: * fix contructor and factory signatures * update documentation --- python/nistoar/midas/__init__.py | 2 +- python/nistoar/midas/dap/__init__.py | 2 +- python/nistoar/midas/dap/service/mdsx.py | 73 +++++++++-- python/nistoar/midas/dbio/project.py | 21 +-- python/nistoar/midas/dbio/wsgi/__init__.py | 102 ++------------ python/nistoar/midas/dbio/wsgi/project.py | 20 +++ .../midas/{dbio/wsgi/wsgiapp.py => wsgi.py} | 124 +++++++++++++++--- .../nistoar/midas/dap/service/test_mdsx.py | 62 ++++----- .../tests/nistoar/midas/dbio/test_groups.py | 2 +- python/tests/nistoar/midas/dbio/test_inmem.py | 2 +- .../wsgi/test_wsgiapp.py => test_wsgi.py} | 39 +++--- 11 files changed, 258 insertions(+), 191 deletions(-) rename python/nistoar/midas/{dbio/wsgi/wsgiapp.py => wsgi.py} (75%) rename python/tests/nistoar/midas/{dbio/wsgi/test_wsgiapp.py => test_wsgi.py} (95%) diff --git a/python/nistoar/midas/__init__.py b/python/nistoar/midas/__init__.py index c4c5c18..292efbb 100644 --- a/python/nistoar/midas/__init__.py +++ b/python/nistoar/midas/__init__.py @@ -1,7 +1,7 @@ """ midas: A module providing infrastructure support for MIDAS applications. -MIDAS, historically, stands for Manag... Invent... Digital Assets .... In its first generation, +MIDAS, historically, stands for Managment Interface for Digital Assets. In its first generation, the system collectively provided a Data Management Plan (DMP) generation tool, an Enterprise Data Inventory (EDI) record generation tool, reporting functionality, and the generation of the NIST Public Data Listing (PDL), the publicly viewable portion of the EDI (which is exported to data.gov). diff --git a/python/nistoar/midas/dap/__init__.py b/python/nistoar/midas/dap/__init__.py index 3c92bf3..d5fb9fc 100644 --- a/python/nistoar/midas/dap/__init__.py +++ b/python/nistoar/midas/dap/__init__.py @@ -12,7 +12,7 @@ There can be different flavors of the Authoring service supported in this module to support different interaction models or conventions or evolutions of the interface (i.e. interface versions). The default flavor is targeted for the MIDAS 3 client. The different flavors are -implemented within the :py:mod`service` subpackage. +implemented within the :py:mod:`service` subpackage. This package draws on some of the infrastructure from the :py:mod:`~nistoar.pdr.publish` package, including :py:mod:`provenence tracking` and diff --git a/python/nistoar/midas/dap/service/mdsx.py b/python/nistoar/midas/dap/service/mdsx.py index 45b24ec..3d44ccb 100644 --- a/python/nistoar/midas/dap/service/mdsx.py +++ b/python/nistoar/midas/dap/service/mdsx.py @@ -1,12 +1,16 @@ """ -Subapp supporting DAP +The DAP Authoring Service implemented using the mdsx convention. This convention represents an +implementation provided for development purposes and not intended for production use. + +Support for the web service frontend is provided as +WSGI :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` implementation. """ from logging import Logger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence, Callable -from ...dbio import DBClient, DBClientFactory, ProjectRecord -from ...dbio.wsgi.broker import ProjectRecordBroker +from ...dbio import (DBClient, DBClientFactory, ProjectRecord, + ProjectService, ProjectServiceFactory, DAP_PROJECTS) from ...dbio.wsgi.project import MIDASProjectApp from nistoar.base.config import ConfigurationException from nistoar.nerdm.constants import core_schema_base, schema_versions @@ -27,13 +31,27 @@ NERDPUB_DEF = NERDMPUB_SCH_ID + "/definitions/" NERDM_CONTEXT = "https://data.nist.gov/od/dm/nerdm-pub-context.jsonld" -class DAPBroker(ProjectRecordBroker): +class DAPService(ProjectService): """ a project record request broker class for DAP records. + + In addition to the configuration parameters supported by the parent class, this specialization + also supports the following parameters: + + ``assign_doi`` + a label that indicates when a DOI should be assigned to new records. Supported values include: + * ``always`` -- assign a DOI to every newly created record + * ``request`` -- assign a DOI on when requested by the system + * ``never`` -- never assign a DOI to a record. + ``doi_naan`` + the Name Assigning Authority Number to use for the DOI (given as a string) + + Note that the DOI is not yet registered with DataCite; it is only internally reserved and included + in the record NERDm data. """ - def __init__(self, dbclient: DBClient, config: Mapping={}, who: PubAgent=None, - wsgienv: dict=None, log: Logger=None): + def __init__(self, dbclient_factory: DBClient, config: Mapping={}, who: PubAgent=None, + log: Logger=None, project_type=DAP_PROJECTS): """ create a request handler :param DBClient dbclient: the DBIO client instance to use to access and save project records @@ -41,7 +59,7 @@ def __init__(self, dbclient: DBClient, config: Mapping={}, who: PubAgent=None, :param dict wsgienv: the WSGI request context :param Logger log: the logger to use for log messages """ - super(DAPBroker, self).__init__(dbclient, config, who, wsgienv, log) + super(DAPService, self).__init__(project_type, dbclient_factory, config, who, log) self.cfg.setdefault('assign_doi', ASSIGN_DOI_REQUEST) if not self.cfg.get('doi_naan') and self.cfg.get('assign_doi') != ASSIGN_DOI_NEVER: @@ -119,15 +137,44 @@ def _new_metadata_for(self, shoulder=None): ("creatorisContact", True) ]) +class DAPServiceFactory(ProjectServiceFactory): + """ + Factory for creating DAPService instances attached to a backend DB implementation and which act + on behalf of a specific user. The configuration parameters that can be provided to this factory + is the union of those supported by the following classes: + * :py:class:`DAPService` (``assign_doi`` and ``doi_naan``) + * :py:class:`~nistoar.midas.dbio.project.ProjectService` (``clients`` and ``dbio``) + """ + + def __init__(self, dbclient_factory: DBClientFactory, config: Mapping={}, log: Logger=None, + project_coll: str=None): + """ + create a service factory associated with a particulr DB backend. + :param DBClientFactory dbclient_factory: the factory instance to use to create a DBClient to + talk to the DB backend. + :param Mapping config: the configuration for the service (see class-level documentation). + :param Logger log: the Logger to use in the service. + :param str project_coll: the project type (i.e. the DBIO project collection to access); + default: "dap". + """ + if not project_coll: + project_coll = DAP_PROJECTS + super(DAPServiceFactory, self).__init__(project_coll, dbclient_factory, config, log) + + def create_service_for(self, who: PubAgent=None): + """ + create a service that acts on behalf of a specific user. + :param PubAgent who: the user that wants access to a project + """ + return DAPService(self._dbclifact, self._cfg, who, self._log, self._prjtype) + class DAPApp(MIDASProjectApp): """ A MIDAS SubApp supporting a DAP service """ - - def __init__(self, typename: str, log: Logger, dbcli_factory: DBClientFactory, config: dict={}): - if not typename: - typename = "dap" - super(DAPApp, self).__init__(typename, log, dbcli_factory, config, DAPBroker) - + def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, project_coll: str=None): + service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) + super(DAPApp, self).__init__(service_factory, log.getChild(DAP_PROJECTS), dbcli_factory, config) + diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 3b3edd8..6878035 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -25,9 +25,13 @@ class ProjectService(MIDASSystem): to a particular user at construction time (as given by a :py:class:`~nistoar.pdr.publish.prov.PubAgent` instance); thus, requests to this service are subject to internal Authorization checks. - This base service supports a single parameter, ``clients``, that places restrictions on the - creation of records based on which group the user is part of. The value is an object whose keys - are user group name that are authorized to use this service, and whose values are themselves objects + This base service supports a two parameters, ``dbio`` and ``clients``. The optional ``dbio`` + parameter will be passed to the :py:class:`~nistoar.midas.dbio.base.DBClientFactory`'s + ``create_client()`` function to create the :py:class:`~nistoar.midas.dbio.base.DBClient`. + + The ``clients`` parameter is an object that places restrictions on the + creation of records based on which group the user is part of. The keys of the object + are user group names that are authorized to use this service, and whose values are themselves objects that restrict the requests by that user group; for example: .. code-block:: @@ -64,7 +68,7 @@ def __init__(self, project_type: str, dbclient_factory: DBClient, config: Mappin super(ProjectService, self).__init__("DBIO Project Service", "DBIO") self.cfg = config if not who: - who = PubAgent("unkwn", prov.PubAgent.USER, "anonymous") + who = PubAgent("unkwn", PubAgent.USER, "anonymous") self.who = who if not log: log = getLogger(self.system_abbrev).getChild(self.subsystem_abbrev).getChild(project_type) @@ -359,7 +363,7 @@ def _validate_data(self, data): class ProjectServiceFactory: """ a factory object that creates ProjectService instances attached to the backend DB implementation - and which acts on behalf of a specific user. + and which act on behalf of a specific user. As this is a concrete class, it can be instantiated directly to produce generic ProjectService instances but serving a particular project type. Instances are also attached ot a particular @@ -367,11 +371,8 @@ class ProjectServiceFactory: time. The configuration provided to this factory will be passed directly to the service instances - it creates. In addition parameters supported by the :py:class:`ProjectService` - (i.e. ``clients``), the configuration can also include a ``dbio`` parameter. - If provided, its value will be used when creating a DBClient to talk to the DB backend (see - :py:class:`~nistoar.midas.dbio.base.DBClientFactory` for details). Subclasses of this factory - class may support additional parameters. + it creates. See the :py:class:`ProjectService` documentation for the configuration + parameters supported by this implementation. """ def __init__(self, project_type: str, dbclient_factory: DBClientFactory, config: Mapping={}, log: Logger=None): diff --git a/python/nistoar/midas/dbio/wsgi/__init__.py b/python/nistoar/midas/dbio/wsgi/__init__.py index d32d042..e84f53b 100644 --- a/python/nistoar/midas/dbio/wsgi/__init__.py +++ b/python/nistoar/midas/dbio/wsgi/__init__.py @@ -1,96 +1,14 @@ """ -The WSGI interface to the DBIO layer. - -The :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.MIDASApp` WSGI application provides DBIO collections -and data and serves as the API for the suite of available MIDAS services. In particular, when so -configured, this application can provide the following endpoints: - - * ``/dmp/mdm1/`` -- the Data Management Plan (DMP) Authoring API, for creating and editing DMPs - (according to the "mdm1" convention) - * ``/dap/mds3/`` -- the Digital Assets Publication (DAP) Authoring API, for drafting data and - software publications (according to the mds3 convention) to be submitted to the Public Data - Repository (PDR) - * ``/groups/`` -- the API for creating and managing access permission groups for collaborative - authoring. - -These endpoint send and receive data stored in the backend database through the common -:py:module:` DBIO layer `. - -The app configuration determines which endpoints that are actually available. The authoring API -endpoints follow a common pattern: - - /_service_/_convention_/ - -where _service_ is MIDAS service name (like "dmp" or "dap") and _convention_ is name that represents -the version of the service interface. Usually, there is one convention available called "def", which -serves as a synonym for the convention that is considered the default convention. Through the -configuration, it is possible, then, to create additional authoring services or conventions of services. - -The configuration that is expected by ``MIDASApp`` is a (JSON) object with the following properties: - -``base_endpoint`` - (str) _optional_. the URL resource path where the base of the base of the service suite is accessed. - The default value is "/midas/". An empty string is equivalent to "/", the root path. -``strict`` - (bool) _optional_. if False and if a service type (see below) given in this configuration is not - recognized, a ``ConfiguraitonException`` will be raised. -``about`` - (object) _optional_. an object of data describing this suite of services that should be returned - when the base path is requested. (See the :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class - for an example.) There are no requirements on the properties in this object except that it should - _not_ include "services" or "versions". -``services`` - (object) _required_. an object in which each property is a service name (as referred to above in - the API endpoint pattern--e.g., "dmp" or "dap"), and its value is the configuration for that service. -``dbio`` - (object) _recommended_. an object that provides configuration for the DBIO client; typically, this - includes a ``factory`` property whose string value identifies the type of - backend storage to use ("mongo", "fsbased", or "inmem"). The other properties - are the parameters that are specific to the backend storage. - -Most of the properties in a service configuration object will be treated as default configuration -parameters for configuring a particular version, or _convention_, of the service. Convention-level -configuration will be merged with these properties (overriding the defaults) to produce the configuration -that is passed to the service SubApp that handles the service. The properties supported are -service-specific. In addition to the service-specific properties, two special-purpose properties are -supported: - -``about`` - (object) _optional_. an object of data describing the service catagory that should be returned - when the service name endpoint is requested. (See the - :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class for an example.) There are no requirements - on the properties in this object except that it should _not_ include "services" or "versions". -``conventions`` - (object) _optional_. an object in which each property is a convention name supported for the service - (as referred to above in the API endpoint pattern--e.g., "mdm1" for the DMP service), and its value is - the configuration for that convention (i.e. version) of the service. Any properties given here - override properties of the same name given at the service level, as discussed above. The properties - can be service- or convention-specific, apart from the required property, ``type`` (defined below). -``default_convention`` - (str) _optional_. the name of the convention (one of the names specified as a property of the - ``conventions`` field described above) that should be considered the default convention. If a client - requests the special convention name "def", the request will be routed to the version of the service - with that name. -``dbio`` - (object) _recommended_. the configuration parameters for the DBIO client which are specific to the - project service type (see below). In particular, this includes the authorization configurations; - see the :py:module:`dbio module documentation ` for this schema. - -There are two common properties that can appear in either the service or convention level (or both, where -the convention level takes precedence): ``project_name`` and ``type``. These optional properties are -defined as follows: - -``project_name`` - (str) _optional_. a name indicating the type of DBIO project the service manages. This name - corresponds to a DBIO project collection name. It defaults to the value of the name associated with - the configuration under the ``services`` property (described above). -``type`` - (str) _optional_. a name that serves as an alias for the Python ``SubApp`` class that implements - the service convention. The default value is the service and convention names combined as - "_service_/_convention_". - +The generic WSGI imfrastructure accessing the MIDAS DBIO layer. + +This module contributes to the MIDAS WSGI implementation in two ways. First, it the provides base +:ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` class, +:ref:class:`~nistoar.midas.dbio.wsgi.project.MIDASProjectApp` which can be specialized (or used as +is) to provide access to the different MIDAS _project_ types--namely, DMP and +:py:module:`DAP `. Second it provides the endpoint implementations for the +non-project collections in the DBIO layer--namely, the groups endpoint (which tracks the user +groups used for access control). """ from .base import SubApp, Handler, DBIOHandler -from .wsgiapp import MIDASApp +from .project import MIDASProjectApp -app = MIDASApp diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 1a6c387..f713f7b 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -76,6 +76,26 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge idattrpart[1] = "/".join(idattrpart[1:]) return ProjectInfoHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[1]) + class _factory: + def __init__(self, project_coll): + self._prjcoll = project_coll + def __call__(self, dbcli_factory: dbio.DBClientFactory, log: Logger, config: dict={}, + prjcoll: str=None): + if not prjcoll: + prjcoll = self._prjcoll + service_factory = ProjectServiceFactory(prjcoll, dbcli_factory, config, log) + return MIDASProjectApp(service_factory, log, config) + + @classmethod + def factory_for(cls, project_coll): + """ + return a factory function that instantiates this class connected to the given DBIO collection. + This is intended for plugging this SubApp into the main WSGI app as is. + :param str project_coll: the name of the DBIO project collection to use for creating and + updating project records. + """ + return cls._factory(project_coll) + class ProjectRecordHandler(DBIOHandler): """ base handler class for all requests on project records. diff --git a/python/nistoar/midas/dbio/wsgi/wsgiapp.py b/python/nistoar/midas/wsgi.py similarity index 75% rename from python/nistoar/midas/dbio/wsgi/wsgiapp.py rename to python/nistoar/midas/wsgi.py index 5122cad..168f57e 100644 --- a/python/nistoar/midas/dbio/wsgi/wsgiapp.py +++ b/python/nistoar/midas/wsgi.py @@ -1,17 +1,106 @@ """ -A module that provides the top-level WSGI App providing access to the MIDAS services via the DBIO layer. +A module that assembles all of the different endpoints of the MIDAS API into one WSGI App. The :ref:class:`MIDASApp` class is an WSGI application class that provides the suite of MIDAS services. Which services are actually made available depends on the configuration provided at construction time. -See the :py:module:`nistoar.midas.dbio.wsgi` module documentation for a description of the -configuraiton schema. +See the :py:mod:`nistoar.midas.dbio.wsgi` module documentation for a description of the +configuraiton schema. In particular, :ref:class:`MIDASApp` application can provide, when so configured, +the following endpoints: + + * ``/dmp/mdm1/`` -- the Data Management Plan (DMP) Authoring API, for creating and editing DMPs + (according to the "mdm1" convention) + * ``/dap/mds3/`` -- the Digital Assets Publication (DAP) Authoring API, for drafting data and + software publications (according to the mds3 convention) to be submitted to the Public Data + Repository (PDR) + * ``/groups/`` -- the API for creating and managing access permission groups for collaborative + authoring. + +These endpoint send and receive data stored in the backend database through the common +:py:mod:` DBIO layer `. + +The app configuration determines which endpoints that are actually available. The authoring API +endpoints follow a common pattern: + + /_service_/_convention_/ + +where _service_ is MIDAS service name (like "dmp" or "dap") and _convention_ is name that represents +the version of the service interface. Usually, there is one convention available called "def", which +serves as a synonym for the convention that is considered the default convention. Through the +configuration, it is possible, then, to create additional authoring services or conventions of services. + +The configuration that is expected by ``MIDASApp`` is a (JSON) object with the following properties: + +``base_endpoint`` + (str) _optional_. the URL resource path where the base of the base of the service suite is accessed. + The default value is "/midas/". An empty string is equivalent to "/", the root path. +``strict`` + (bool) _optional_. if False and if a service type (see below) given in this configuration is not + recognized, a ``ConfiguraitonException`` will be raised. +``about`` + (object) _optional_. an object of data describing this suite of services that should be returned + when the base path is requested. (See the :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class + for an example.) There are no requirements on the properties in this object except that it should + _not_ include "services" or "versions". +``services`` + (object) _required_. an object in which each property is a service name (as referred to above in + the API endpoint pattern--e.g., "dmp" or "dap"), and its value is the configuration for that service. +``dbio`` + (object) _recommended_. an object that provides configuration for the DBIO client; typically, this + includes a ``factory`` property whose string value identifies the type of + backend storage to use ("mongo", "fsbased", or "inmem"). The other properties + are the parameters that are specific to the backend storage. + +Most of the properties in a service configuration object will be treated as default configuration +parameters for configuring a particular version, or _convention_, of the service. Convention-level +configuration will be merged with these properties (overriding the defaults) to produce the configuration +that is passed to the service SubApp that handles the service. The properties supported are +service-specific. In addition to the service-specific properties, three special-purpose properties are +supported: + +``about`` + (object) _optional_. an object of data describing the service catagory that should be returned + when the service name endpoint is requested. (See the + :py:class:`~nistoar.midas.dbio.wsgi.wsgiapp.About` class for an example.) There are no requirements + on the properties in this object except that it should _not_ include "services" or "versions". +``conventions`` + (object) _optional_. an object in which each property is a convention name supported for the service + (as referred to above in the API endpoint pattern--e.g., "mdm1" for the DMP service), and its value is + the configuration for that convention (i.e. version) of the service. Any properties given here + override properties of the same name given at the service level, as discussed above. The properties + can be service- or convention-specific, apart from the required property, ``type`` (defined below). +``default_convention`` + (str) _optional_. the name of the convention (one of the names specified as a property of the + ``conventions`` field described above) that should be considered the default convention. If a client + requests the special convention name "def", the request will be routed to the version of the service + with that name. + +There are a few common properties that can appear in either the service or convention level (or both, where +the convention level takes precedence): + +``type`` + (str) _optional_. a name that serves as an alias for the Python ``SubApp`` class that implements + the service convention. The default value is the service and convention names combined as + "_service_/_convention_". +``project_name`` + (str) _optional_. a name indicating the type of DBIO project the service manages. This name + corresponds to a DBIO project collection name. If provided, it will override the collection used + by default ``SubApp`` specified by the ``type`` parameter. +``clients`` + (object) _required_. the configuration parameters restrict the scope of the clients that connect to + the web service. This is passed to the :py:class:`~nistoar.midas.dbio.project.ProjectService` + configured for the convention. +``dbio`` + (object) _recommended_. the configuration parameters for the DBIO client which are specific to the + project service type (see below). In particular, this includes the authorization configurations; + see the :py:mod:`dbio module documentation ` for this schema. This is passed to + the :py:class:`~nistoar.midas.dbio.project.ProjectService` configured for the convention. In addition to providing the :ref:class:`MIDASApp` class, this module provides a mechanism for plugging -addition _project_ services, particularly new conventions of services. The class constructor takes +in addition _project_ services, particularly new conventions of services. The class constructor takes an optional dictionary parameter that provides in its values the :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` class that implements a particular DBIO project service. The keys labels that correspond to the ``type`` parameter in the -:py:module:`configuration ` and which, by default, have the form +:py:mod:`configuration ` and which, by default, have the form _service_/_convention_ (e.g. ``dmp/mdm1``). If this dictionary is not provided to the constructur, an default defined in this module, ``_MIDASSubApps`` is used. Thus, the normal way to add a new service implementation to the suite is to add it to the internal ``_MIDASSubApps`` dictionary. @@ -32,13 +121,13 @@ from collections.abc import Mapping, MutableMapping, Callable from copy import deepcopy -from ... import system -from . import project as prj, SubApp, Handler, DBIOHandler -from ...dap.service import mdsx -from ..base import DBClientFactory -from ..inmem import InMemoryDBClientFactory -from ..fsbased import FSBasedDBClientFactory -from ..mongo import MongoDBClientFactory +from . import system +from .dbio.base import DBClientFactory +from .dbio.wsgi import project as prj, SubApp, Handler, DBIOHandler +from .dap.service import mdsx +from .dbio.inmem import InMemoryDBClientFactory +from .dbio.fsbased import FSBasedDBClientFactory +from .dbio.mongo import MongoDBClientFactory from nistoar.pdr.publish.prov import PubAgent from nistoar.base.config import ConfigurationException, merge_config @@ -96,7 +185,7 @@ def config_for_convention(self, appname: str, convention: str, typename: str = N return it as a complete convention-specific configuration, or None if the convention is not configured. :param str appname: the name of the MIDAS app to be configured. (Examples are "dmp", - "pdr") + "dap") :param str convention: the name of the API convention that is desired in the configuration. A special name "def" refers to the convention that is configured as the default for the app; an empty string and None behaves in the same way. @@ -128,7 +217,6 @@ def config_for_convention(self, appname: str, convention: str, typename: str = N appcfg['type'] = typename elif not appcfg.get('type'): appcfg['type'] = "%s/%s" % (appname, convention) - appcfg.setdefault("project_name", appname) return appcfg @@ -152,7 +240,7 @@ def create_subapp(self, log: Logger, dbio_client_factory: DBClientFactory, raise ConfigurationException("Missing configuration parameter: type") factory = self.subapps[typename] - return factory(appconfig.get('project_name', typename), log, dbio_client_factory, appconfig) + return factory(dbio_client_factory, log, appconfig, appconfig.get('project_name')) def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> MutableMapping: """ @@ -345,7 +433,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge _MIDASSubApps = { - "dmp/mdm1": prj.MIDASProjectApp, +# "dmp/mdm1": mdm1.DMPApp, + "dmp/mdm1": prj.MIDASProjectApp.factory_for("dmp"), "dap/mdsx": mdsx.DAPApp } @@ -367,7 +456,7 @@ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, """ initial the App :param Mapping config: the collected configuration for the App (see the - :py:module:`wsgi module documentation ` + :py:mod:`wsgi module documentation ` for the schema :param DBClientFactory dbio_client_factory: the DBIO client factory to use to create clients used to access the DBIO storage backend. If not specified, @@ -463,5 +552,6 @@ def handle_request(self, env, start_resp): def __call__(self, env, start_resp): return self.handle_request(env, start_resp) +app = MIDASApp diff --git a/python/tests/nistoar/midas/dap/service/test_mdsx.py b/python/tests/nistoar/midas/dap/service/test_mdsx.py index 03ccfe0..6de6513 100644 --- a/python/tests/nistoar/midas/dap/service/test_mdsx.py +++ b/python/tests/nistoar/midas/dap/service/test_mdsx.py @@ -1,8 +1,8 @@ import os, json, pdb, logging, tempfile import unittest as test -from nistoar.midas.dbio import inmem, base -from nistoar.midas.dbio.wsgi import broker +from nistoar.midas.dbio import inmem, base, AlreadyExists +from nistoar.midas.dbio import project as prj from nistoar.midas.dap.service import mdsx from nistoar.pdr.publish import prov @@ -29,7 +29,7 @@ def tearDownModule(): nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") -class TestProjectRecordBroker(test.TestCase): +class TestDAPService(test.TestCase): def setUp(self): self.cfg = { @@ -41,36 +41,32 @@ def setUp(self): "default_shoulder": "mdsx" } }, - "allowed_project_shoulders": ["mdsx", "spc1"], - "default_shoulder": "mdsx", + "dbio": { + "allowed_project_shoulders": ["mdsx", "spc1"], + "default_shoulder": "mdsx", + }, "assign_doi": "always", "doi_naan": "88888" } - self.fact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdsx": 2 }}) - self.dbcli = self.fact.create_client(base.DMP_PROJECTS, self.cfg, nistr.actor) - self.resp = [] + self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdsx": 2 }}) - def create_broker(self, request=None): - self.resp = [] - if not request: - request = {'REQUEST_METHOD': 'GRUB'} - self.broker = mdsx.DAPBroker(self.dbcli, self.cfg, nistr, request, rootlog.getChild("broker")) - return self.broker + def create_service(self, request=None): + self.service = mdsx.DAPService(self.dbfact, self.cfg, nistr, rootlog.getChild("broker")) + return self.service def test_ctor(self): - self.create_broker() - self.assertTrue(self.broker.dbcli) - self.assertEqual(self.broker.cfg, self.cfg) - self.assertEqual(self.broker.who.actor, "nstr1") - self.assertEqual(self.broker.who.group, "midas") - self.assertEqual(self.broker.env, {'REQUEST_METHOD': 'GRUB'}) - self.assertTrue(self.broker.log) + self.create_service() + self.assertTrue(self.service.dbcli) + self.assertEqual(self.service.cfg, self.cfg) + self.assertEqual(self.service.who.actor, "nstr1") + self.assertEqual(self.service.who.group, "midas") + self.assertTrue(self.service.log) def test_create_record(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("goob")) + self.create_service() + self.assertTrue(not self.service.dbcli.name_exists("goob")) - prec = self.broker.create_record("goob") + prec = self.service.create_record("goob") self.assertEqual(prec.name, "goob") self.assertEqual(prec.id, "mdsx:0003") self.assertEqual(prec.meta, {"creatorisContact": True, "resourceType": "data"}) @@ -80,8 +76,8 @@ def test_create_record(self): self.assertEqual(prec.data['doi'], "doi:88888/mdsx-0003") self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") - self.assertTrue(self.broker.dbcli.name_exists("goob")) - prec2 = self.broker.get_record(prec.id) + self.assertTrue(self.service.dbcli.name_exists("goob")) + prec2 = self.service.get_record(prec.id) self.assertEqual(prec2.name, "goob") self.assertEqual(prec2.id, "mdsx:0003") self.assertEqual(prec2.data['@id'], "ark:/88434/mdsx-0003") @@ -89,16 +85,16 @@ def test_create_record(self): self.assertEqual(prec2.meta, {"creatorisContact": True, "resourceType": "data"}) self.assertEqual(prec2.owner, "nstr1") - with self.assertRaises(broker.AlreadyExists): - self.broker.create_record("goob") + with self.assertRaises(AlreadyExists): + self.service.create_record("goob") def test_create_record_withdata(self): - self.create_broker() - self.assertTrue(not self.broker.dbcli.name_exists("gurn")) + self.create_service() + self.assertTrue(not self.service.dbcli.name_exists("gurn")) - prec = self.broker.create_record("gurn", {"color": "red"}, - {"temper": "dark", "creatorisContact": "goob", - "softwarelink": "http://..." }) # misspelled key + prec = self.service.create_record("gurn", {"color": "red"}, + {"temper": "dark", "creatorisContact": "goob", + "softwarelink": "http://..." }) # misspelled key self.assertEqual(prec.name, "gurn") self.assertEqual(prec.id, "mdsx:0003") self.assertEqual(prec.meta, {"creatorisContact": False, "resourceType": "data"}) diff --git a/python/tests/nistoar/midas/dbio/test_groups.py b/python/tests/nistoar/midas/dbio/test_groups.py index df7bcf3..db5b268 100644 --- a/python/tests/nistoar/midas/dbio/test_groups.py +++ b/python/tests/nistoar/midas/dbio/test_groups.py @@ -107,7 +107,7 @@ def test_create_group(self): self.assertTrue(grp.is_member(self.user)) self.assertGreater(grp.created, 0) self.assertLess(grp.created, time.time()) - self.assertEqual(grp.modified, grp.created) + self.assertGreaterEqual(grp.modified, grp.created) self.assertTrue(grp.authorized(base.ACLs.OWN)) diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index 077f1b6..37f96d0 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -13,7 +13,7 @@ def setUp(self): def test_ctor(self): self.assertEqual(self.fact._cfg, self.cfg) self.assertTrue(self.fact._db) - self.assertEqual(self.fact._db.get(base.DRAFT_PROJECTS), {}) + self.assertEqual(self.fact._db.get(base.DAP_PROJECTS), {}) self.assertEqual(self.fact._db.get(base.DMP_PROJECTS), {}) self.assertEqual(self.fact._db.get(base.GROUPS_COLL), {}) self.assertEqual(self.fact._db.get(base.PEOPLE_COLL), {}) diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py b/python/tests/nistoar/midas/test_wsgi.py similarity index 95% rename from python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py rename to python/tests/nistoar/midas/test_wsgi.py index f1cabe4..9342342 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_wsgiapp.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -4,7 +4,7 @@ import unittest as test from nistoar.midas.dbio import inmem, base -from nistoar.midas.dbio.wsgi import wsgiapp as app +from nistoar.midas import wsgi as app from nistoar.pdr.publish import prov tmpdir = tempfile.TemporaryDirectory(prefix="_test_wsgiapp.") @@ -208,7 +208,6 @@ def test_config_for_convention(self): self.assertEqual(cfg["default_convention"], "mdm1") self.assertEqual(cfg["about"]["version"], "mdm1") self.assertEqual(cfg["type"], "dmp/mdm1") - self.assertEqual(cfg["project_name"], "dmp") self.assertNotIn("conventions", cfg) cfg = self.fact.config_for_convention("dmp", "def") @@ -219,7 +218,7 @@ def test_config_for_convention(self): self.assertEqual(cfg["default_convention"], "mdm1") self.assertEqual(cfg["about"]["version"], "mdm1") self.assertEqual(cfg["type"], "dmp/mdm1") - self.assertEqual(cfg["project_name"], "dmp") + self.assertIsNone(cfg.get("project_name")) self.assertNotIn("conventions", cfg) cfg = self.fact.config_for_convention("dmp", "") @@ -230,7 +229,7 @@ def test_config_for_convention(self): self.assertEqual(cfg["default_convention"], "mdm1") self.assertEqual(cfg["about"]["version"], "mdm1") self.assertEqual(cfg["type"], "dmp/mdm1") - self.assertEqual(cfg["project_name"], "dmp") + self.assertIsNone(cfg.get("project_name")) self.assertNotIn("conventions", cfg) cfg = self.fact.config_for_convention("dmp", "mdm2") @@ -241,13 +240,13 @@ def test_config_for_convention(self): self.assertEqual(cfg["default_convention"], "mdm1") self.assertEqual(cfg["about"]["version"], "mdm2") self.assertEqual(cfg["type"], "dmp/mdm1") - self.assertEqual(cfg["project_name"], "dmp") + self.assertIsNone(cfg.get("project_name")) self.assertNotIn("conventions", cfg) cfg = self.fact.config_for_convention("dmp", "mdm2", "hank") self.assertEqual(cfg["foo"], "but") - self.assertEqual(cfg["project_name"], "dmp") self.assertEqual(cfg["type"], "hank") + self.assertIsNone(cfg.get("project_name")) cfg = self.fact.config_for_convention("dap", None) self.assertEqual(cfg["type"], "dmp/mdm1") @@ -260,7 +259,7 @@ def test_config_for_convention(self): cfg = self.fact.config_for_convention("pyu", "def") self.assertIn("about", cfg) - self.assertEqual(cfg["project_name"], "pyu") + self.assertIsNone(cfg.get("project_name")) self.assertEqual(cfg["type"], "pyu/def") @@ -325,14 +324,12 @@ def setUp(self): "describedBy": "https://midas3.nist.gov/midas/apidocs", "href": "http://midas3.nist.gov/midas/dmp" }, - "broker": { - "clients": { - "midas": { - "default_shoulder": "mdm1" - }, - "default": { - "default_shoulder": "mdm0" - } + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" } }, "dbio": { @@ -370,11 +367,9 @@ def setUp(self): }, "project_name": "drafts", "type": "dmp/mdm1", - "broker": { - "clients": { - "default": { - "default_shoulder": "mds3" - } + "clients": { + "default": { + "default_shoulder": "mds3" } }, "dbio": { @@ -408,10 +403,10 @@ def test_ctor(self): self.assertNotIn("pyu/def", self.app.subapps) self.assertNotIn("pyu", self.app.subapps) - self.assertTrue(self.app.subapps["dmp/mdm1"]._dbfact) + self.assertTrue(self.app.subapps["dmp/mdm1"].svcfact) self.assertEqual(self.data["dmp"], {}) - self.assertEqual(self.data["draft"], {}) + self.assertEqual(self.data["dap"], {}) def test_about_suite(self): req = { From f63f25350ef136f96e4c5a7e36bdf2da56ed378b Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 5 Dec 2022 16:05:09 -0500 Subject: [PATCH 033/123] update/debug midasserver for previous rewiring --- docker/midasserver/midas-dmp_conf.yml | 11 +++++----- docker/midasserver/midas-dmpdap_conf.yml | 26 +++++++++++------------- python/nistoar/midas/dap/service/mdsx.py | 2 +- python/nistoar/midas/wsgi.py | 11 ++++++++-- scripts/midas-uwsgi.py | 3 ++- 5 files changed, 29 insertions(+), 24 deletions(-) diff --git a/docker/midasserver/midas-dmp_conf.yml b/docker/midasserver/midas-dmp_conf.yml index 7b16b32..d886ad9 100644 --- a/docker/midasserver/midas-dmp_conf.yml +++ b/docker/midasserver/midas-dmp_conf.yml @@ -14,12 +14,11 @@ services: describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" href: "http://localhost:9091/midas/dmp" - broker: - clients: - midas: - default_shoulder: mdm1 - default: - default_shoulder: mdm1 + clients: + midas: + default_shoulder: mdm1 + default: + default_shoulder: mdm1 dbio: superusers: [ "rlp3" ] diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index a33c145..a853c98 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -14,14 +14,11 @@ services: describedBy: "http://localhost:9091/docs/dapsvc-elements.html" href: "http://localhost:9091/midas/dap" - broker: - assign_doi: always - doi_naan: "18434" - clients: - midas: - default_shoulder: mdsx - default: - default_shoulder: mdsx + clients: + midas: + default_shoulder: mdsx + default: + default_shoulder: mdsx dbio: superusers: [ "rlp3" ] @@ -36,6 +33,8 @@ services: describedBy: "http://localhost:9091/docs/dapsvc-elements.html" href: "http://localhost:9091/midas/dap/mdsx" version: mdsx + assign_doi: always + doi_naan: "18434" dmp: about: @@ -44,12 +43,11 @@ services: describedBy: "http://localhost:9091/docs/dmpsvc-elements.html" href: "http://localhost:9091/midas/dmp" - broker: - clients: - midas: - default_shoulder: mdm1 - default: - default_shoulder: mdm1 + clients: + midas: + default_shoulder: mdm1 + default: + default_shoulder: mdm1 dbio: superusers: [ "rlp3" ] diff --git a/python/nistoar/midas/dap/service/mdsx.py b/python/nistoar/midas/dap/service/mdsx.py index 3d44ccb..ee4b273 100644 --- a/python/nistoar/midas/dap/service/mdsx.py +++ b/python/nistoar/midas/dap/service/mdsx.py @@ -176,5 +176,5 @@ class DAPApp(MIDASProjectApp): def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, project_coll: str=None): service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) - super(DAPApp, self).__init__(service_factory, log.getChild(DAP_PROJECTS), dbcli_factory, config) + super(DAPApp, self).__init__(service_factory, log.getChild(DAP_PROJECTS), config) diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index 168f57e..cd1b9fe 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -285,10 +285,17 @@ def create_suite(self, log: Logger, dbio_client_factory: DBClientFactory) -> Mut aboutapp.add_version(conv, cnvcfg.get("about", {})) # if so configured, set as default + defdesc = None if appcfg.get("default_convention") == conv: - out["%s/def" % appname] = out[path] + defdesc = out[path] elif not appcfg.get("default_convention") and len(appcfg["conventions"]) == 1: - out["%s/def" % appname] = out[path] + defdesc = out[path] + if defdesc: + out["%s/def" % appname] = defdesc + aboutdesc = deepcopy(cnvcfg.get("about", {})) + if aboutdesc.get('href') and isinstance(aboutdesc['href'], str): + aboutdesc['href'] = re.sub(r'/%s/?$' % conv, '/def', aboutdesc['href']) + aboutapp.add_version("def", aboutdesc) else: # No conventions configured for this app name; try to create an app from the defaults diff --git a/scripts/midas-uwsgi.py b/scripts/midas-uwsgi.py index 755d65e..d6cc018 100644 --- a/scripts/midas-uwsgi.py +++ b/scripts/midas-uwsgi.py @@ -43,7 +43,8 @@ import nistoar from nistoar.base import config -from nistoar.midas.dbio import wsgi, MongoDBClientFactory, InMemoryDBClientFactory, FSBasedDBClientFactory +from nistoar.midas.dbio import MongoDBClientFactory, InMemoryDBClientFactory, FSBasedDBClientFactory +from nistoar.midas import wsgi try: import uwsgi From 22e63cdbb1657ad37cc100bbbf8249700250f224 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 7 Dec 2022 08:09:04 -0500 Subject: [PATCH 034/123] add dap.service.validate providing lenient validation for dap --- python/nistoar/midas/dap/service/validate.py | 73 +++++++++++++++++++ .../midas/dap/service/test_validate.py | 50 +++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 python/nistoar/midas/dap/service/validate.py create mode 100644 python/tests/nistoar/midas/dap/service/test_validate.py diff --git a/python/nistoar/midas/dap/service/validate.py b/python/nistoar/midas/dap/service/validate.py new file mode 100644 index 0000000..7cdf781 --- /dev/null +++ b/python/nistoar/midas/dap/service/validate.py @@ -0,0 +1,73 @@ +""" +validation utilities specialized for DAP editing +""" +from nistoar.nerdm.validate import * +from nistoar.nerdm.constants import core_schema_base as CORE_SCHEMA_BASE + +PUB_SCHEMA_BASE = CORE_SCHEMA_BASE + "pub/" + +class LenientSchemaLoader(ejs.SchemaLoader): + """ + this modifies the schema definitions on selected schemas to be more lenient for records + intended for use in the DAP Authoring API. + """ + def load_schema(self, uri): + out = super().load_schema(uri) + + if out.get("id"): + if out["id"].startswith(CORE_SCHEMA_BASE+"v"): + # this is the core NERDm schema: drop the "required" property from the + # Resource schema definition + sch = out.get("definitions",{}).get("Resource",{}) + if "required" in sch: + del sch["required"] + + elif out["id"].startswith(CORE_SCHEMA_BASE+"rls/"): + # this is the pub NERDm extension schema: drop the "required" property from the + # PublicDataResource schema definition + sch = out.get("definitions",{}).get("ReleasedResource",{}).get("allOf", [{},{}]) + if len(sch) > 1 and "required" in sch[1]: + del sch[1]["required"] + + elif out["id"].startswith(PUB_SCHEMA_BASE): + # this is the pub NERDm extension schema: drop the "required" property from the + # PublicDataResource schema definition + sch = out.get("definitions",{}).get("PublicDataResource",{}).get("allOf", [{},{}]) + if len(sch) > 1 and "required" in sch[1]: + del sch[1]["required"] + + return out + +def create_lenient_validator(schemadir, ejsprefix="_"): + """ + return a validator instance (ejsonschema.ExtValidator) that can validate + NERDm records, but which is slightly more lenient for NERDm schemas. + This is intended for use with the DAP Authoring Service in which + records are permitted to be more incomplete. + + The Validator assumes a particular prefix (usually "_" or "$") for + identifying the so-called "metaproperties" that are used for validation. + This can be set by the forprefix parameter. + + :param str schemadir: the directory where the NERDm schemas are cached + :param forprefix: Either a single character ("_" or "$") or a NERDm + data record used to determine the metaproperty + convention. If the value is a Mapping, it is + assumed to be a NERDm record that contains + metaproperties beginning either with "_" or "$"; + which ever convention this record appears to be + using will be the prefix assumed. + """ + if isinstance(forprefix, Mapping): + forprefix = get_mdval_flavor(forprefix) or "_" + if not isinstance(forprefix, (str, unicode)): + raise TypeError("create_validator: forprefix: not a str or dict") + + loader = LenientSchemaLoader.from_directory(schemadir) + + return ejs.ExtValidator.with_schema_dir(loader, forprefix) + + + + + diff --git a/python/tests/nistoar/midas/dap/service/test_validate.py b/python/tests/nistoar/midas/dap/service/test_validate.py new file mode 100644 index 0000000..0fb2521 --- /dev/null +++ b/python/tests/nistoar/midas/dap/service/test_validate.py @@ -0,0 +1,50 @@ +import os, json, pdb, logging, tempfile +import unittest as test + +import nistoar.midas.dap.service.validate as val +import nistoar.pdr as pdr +import nistoar.nerdm.constants as const + +class TestLenientSchemaLoader(test.TestCase): + + def setUp(self): + self.assertTrue(os.path.isdir(pdr.def_schema_dir)) + self.assertTrue(os.path.isfile(os.path.join(pdr.def_schema_dir, "nerdm-schema.json"))) + self.ldr = val.LenientSchemaLoader.from_directory(pdr.def_schema_dir) + + def test_loading_core(self): + sch = self.ldr.load_schema(const.CORE_SCHEMA_URI) + typedef = sch.get("definitions",{}).get("Resource") + self.assertIn("properties", typedef) + self.assertNotIn("required", typedef) + + typedef = sch.get("definitions",{}).get("Topic") + self.assertIn("properties", typedef) + self.assertIn("required", typedef) + + def test_loading_pub(self): + sch = self.ldr.load_schema(const.PUB_SCHEMA_URI) + typedef = sch.get("definitions",{}).get("PublicDataResource") + self.assertIn("allOf", typedef) + typedef = typedef.get("allOf", [{},{}])[1] + self.assertIn("properties", typedef) + self.assertNotIn("required", typedef) + + def test_loading_rls(self): + sch = self.ldr.load_schema(const.core_schema_base+"rls/v0.3") + typedef = sch.get("definitions",{}).get("ReleasedResource") + self.assertIn("allOf", typedef) + typedef = typedef.get("allOf", [{},{}])[1] + self.assertIn("properties", typedef) + self.assertNotIn("required", typedef) + + + + + + + +if __name__ == '__main__': + test.main() + + From c8a86bfaba6f7e91cc77e9dc95be522ca76de0c1 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 7 Dec 2022 17:41:22 -0500 Subject: [PATCH 035/123] dbio: add support for deactivated, add InvalidData exc --- python/nistoar/midas/dbio/base.py | 86 ++++++++++++++++++++++------ python/nistoar/midas/dbio/fsbased.py | 10 +++- python/nistoar/midas/dbio/inmem.py | 8 ++- 3 files changed, 84 insertions(+), 20 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 4bc5e4c..39bd90f 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -179,6 +179,9 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: recdata['acls'] = {} if not recdata.get('owner'): recdata['owner'] = self._cli.user_id if self._cli else "" + if 'deactivated' not in recdata: + # Should be None or a date + recdata['deactivated'] = None for perm in ACLs.OWN: if perm not in recdata['acls']: recdata['acls'][perm] = [recdata['owner']] if recdata['owner'] else [] @@ -223,6 +226,51 @@ def modified_date(self) -> str: """ return datetime.fromtimestamp(math.floor(self.modified)).isoformat() + @property + def deactivated(self) -> bool: + """ + True if this record has been deactivated. Record that are deactivated are generally + skipped over when being accessed or used. A deactivated record can only be retrieved + via its identifier. + """ + return bool(self._data.get('deactivated')) + + @property + def deactivated_date(self) -> str: + """ + the timestamp when this record was deactivated, formatted as an ISO string. An empty + string is returned if the record is not currently deactivated. + """ + if not self._data.get('deactivated'): + return "" + return datetime.fromtimestamp(math.floor(self._data.get('deactivated'))).isoformat() + + def deactivate(self) -> bool: + """ + mark this record as "deactivated". :py:meth:`deactivated` will now return True. + The :py:meth:`save` method should be called to commit this change. + :return: False if the state was not changed for any reason, including because the record + was already deactivated. + :rtype: True + """ + if self.deactivated: + return False + self._data['deactivated'] = time.time() + return True + + def reactivate(self) -> bool: + """ + reactivate this record; :py:meth:`deactivated` will now return False. + The :py:meth:`save` method should be called to commit this change. + :return: False if the state was not changed for any reason, including because the record + was already activated. + :rtype: True + """ + if not self.deactivated: + return False + self._data['deactivated'] = None + return True + @property def acls(self) -> ACLs: """ @@ -495,8 +543,8 @@ def name_exists(self, name: str, owner: str = None) -> bool: user ID attached to the `DBClient` is assumed. """ if not owner: - owner = self.user_id - it = self._cli._select_from_coll(GROUPS_COLL, name=name, owner=owner) + owner = self._cli.user_id + it = self._cli._select_from_coll(GROUPS_COLL, incl_deact=True, name=name, owner=owner) try: return bool(next(it)) except StopIteration: @@ -527,7 +575,7 @@ def get_by_name(self, name: str, owner: str = None) -> Group: """ if not owner: owner = self._cli.user_id - matches = self._cli._select_from_coll(GROUPS_COLL, name=name, owner=owner) + matches = self._cli._select_from_coll(GROUPS_COLL, incl_deact=True, name=name, owner=owner) for m in matches: m = Group(m, self._cli) if m.authorized(ACLs.READ): @@ -538,10 +586,11 @@ def select_ids_for_user(self, id: str) -> MutableSet: """ return all the groups that a user (or a group) is a member of. This implementation will resolve the groups that the user is indirectly a member of--i.e. a user's group itself is a - member of another group. + member of another group. Deactivated groups are not included. """ checked = set() out = set(g['id'] for g in self._cli._select_prop_contains(GROUPS_COLL, 'members', id)) + follow = list(out) while len(follow) > 0: g = follow.pop(0) @@ -594,9 +643,6 @@ def _initialize(self, rec: MutableMapping) -> MutableMapping: rec['meta'] = OrderedDict() if 'curators' not in rec: rec['curators'] = [] - if 'deactivated' not in rec: - # Should be None or a date - rec['deactivated'] = None self._initialize_data(rec) self._initialize_meta(rec) @@ -815,7 +861,7 @@ def name_exists(self, name: str, owner: str = None) -> bool: """ if not owner: owner = self.user_id - it = self._select_from_coll(self._projcoll, name=name, owner=owner) + it = self._select_from_coll(self._projcoll, incl_deact=True, name=name, owner=owner) try: return bool(next(it)) except StopIteration: @@ -828,7 +874,7 @@ def get_record_by_name(self, name: str, owner: str = None) -> Group: """ if not owner: owner = self.user_id - matches = self._select_from_coll(self._projcoll, name=name, owner=owner) + matches = self._select_from_coll(self._projcoll, incl_deact=True, name=name, owner=owner) for m in matches: m = ProjectRecord(self._projcoll, m, self) if m.authorized(ACLs.READ): @@ -921,7 +967,7 @@ def _get_from_coll(self, collname, id) -> MutableMapping: raise NotImplementedError() @abstractmethod - def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: + def _select_from_coll(self, collname, incl_deact=False, **constraints) -> Iterator[MutableMapping]: """ return an iterator to the records from a specified collection that match the set of given constraints. @@ -935,7 +981,7 @@ def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping] raise NotImplementedError() @abstractmethod - def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMapping]: + def _select_prop_contains(self, collname, prop, target, incl_deact=False) -> Iterator[MutableMapping]: """ return an iterator to the records from a specified collection in which the named list property contains a given target value. @@ -1009,7 +1055,7 @@ class NotAuthorized(DBIOException): an exception indicating that the user attempted an operation that they are not authorized to """ - def __init__(self, who: str=None, op: str=None, message: str=None): + def __init__(self, who: str=None, op: str=None, message: str=None, sys=None): """ create the exception :param str who: the identifier of the user who requested the operation @@ -1041,12 +1087,13 @@ class ObjectNotFound(DBIOException): """ an exception indicating that the requested record, or a requested part of a record, does not exist. """ - def __init__(self, recid, part=None, message=None): + def __init__(self, recid, part=None, message=None, sys=None): """ initialize this exception - :param str recid: the id of the record that was existed - :param str part: the part of the record that was requested. Do not provide this parameter if - the entire record does not exist. + :param str recid: the id of the record that was existed + :param str part: the part of the record that was requested. Do not provide this parameter if + the entire record does not exist. + :param str message: a brief description of the error (what object was not found) """ self.record_id = recid self.record_part = part @@ -1058,3 +1105,10 @@ def __init__(self, recid, part=None, message=None): message = "Requested record with id=%s does not exist" % recid super(ObjectNotFound, self).__init__(message) +class InvalidData(DBIOException): + """ + record create or update request includes invalid input data + """ + pass + + diff --git a/python/nistoar/midas/dbio/fsbased.py b/python/nistoar/midas/dbio/fsbased.py index 260a9ac..b43eb93 100644 --- a/python/nistoar/midas/dbio/fsbased.py +++ b/python/nistoar/midas/dbio/fsbased.py @@ -59,7 +59,7 @@ def _next_recnum(self, shoulder): def _get_from_coll(self, collname, id) -> MutableMapping: return self._read_rec(collname, id) - def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: + def _select_from_coll(self, collname, incl_deact=False, **constraints) -> Iterator[MutableMapping]: collpath = self._root / collname if not collpath.is_dir(): return @@ -70,6 +70,9 @@ def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping] except ValueError: # skip over corrupted records continue + + if rec.get('deactivated') and incl_deact: + continue cancel = False for ck, cv in constraints.items(): if rec.get(ck) != cv: @@ -79,7 +82,7 @@ def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping] continue yield rec - def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMapping]: + def _select_prop_contains(self, collname, prop, target, incl_deact=False) -> Iterator[MutableMapping]: collpath = self._root / collname if not collpath.is_dir(): return @@ -93,6 +96,9 @@ def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMappi continue except IOError as ex: raise DBIOException(recf+": file locking error: "+str(ex)) + + if rec.get('deactivated') and not incl_deact: + continue if prop in rec and isinstance(rec[prop], (list, tuple)) and target in rec[prop]: yield rec diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index dfd2bd2..9f9d527 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -28,8 +28,10 @@ def _next_recnum(self, shoulder): def _get_from_coll(self, collname, id) -> MutableMapping: return deepcopy(self._db.get(collname, {}).get(id)) - def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: + def _select_from_coll(self, collname, incl_deact=False, **constraints) -> Iterator[MutableMapping]: for rec in self._db.get(collname, {}).values(): + if rec.get('deactivated') and not incl_deact: + continue cancel = False for ck, cv in constraints.items(): if rec.get(ck) != cv: @@ -39,8 +41,10 @@ def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping] continue yield deepcopy(rec) - def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMapping]: + def _select_prop_contains(self, collname, prop, target, incl_deact=False) -> Iterator[MutableMapping]: for rec in self._db.get(collname, {}).values(): + if rec.get('deactivated') and not incl_deact: + continue if prop in rec and isinstance(rec[prop], (list, tuple)) and target in rec[prop]: yield deepcopy(rec) From 6700690976d1d79a5e995f4ec175837ed9624a55 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 8 Dec 2022 12:33:07 -0500 Subject: [PATCH 036/123] dbio: add support for deactivated to mongo impl. --- python/nistoar/midas/dbio/mongo.py | 13 +++++++-- python/tests/nistoar/midas/dbio/test_mongo.py | 28 +++++++++++++++++-- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index 87960e6..cec54bb 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -119,23 +119,30 @@ def _get_from_coll(self, collname, id) -> MutableMapping: except Exception as ex: raise base.DBIOException("Failed to access record with id=%s: %s" % (id, str(ex))) - def _select_from_coll(self, collname, **constraints) -> Iterator[MutableMapping]: + def _select_from_coll(self, collname, incl_deact=False, **constraints) -> Iterator[MutableMapping]: try: db = self.native coll = db[collname] + if not incl_deact: + constraints['deactivated'] = None + for rec in coll.find(constraints, {'_id': False}): yield rec except Exception as ex: raise base.DBIOException("Failed while selecting records: " + str(ex)) - def _select_prop_contains(self, collname, prop, target) -> Iterator[MutableMapping]: + def _select_prop_contains(self, collname, prop, target, incl_deact=False) -> Iterator[MutableMapping]: try: db = self.native coll = db[collname] - for rec in coll.find({prop: target}, {'_id': False}): + query = { prop: target } + if not incl_deact: + query['deactivated'] = None + + for rec in coll.find(query, {'_id': False}): yield rec except Exception as ex: diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index caee83c..9a6a498 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -146,6 +146,18 @@ def test_select_from_coll(self): recs = list(self.cli._select_from_coll(base.GROUPS_COLL, hobby="whittling")) self.assertEqual(len(recs), 2) + # test deactivated filter + self.cli.native[base.GROUPS_COLL].insert_one({"id": "p:gang", + "owner": "p:bob", "deactivated": 1.2 }) + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, owner="p:bob")) + self.assertEqual(len(recs), 1) + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, incl_deact=True, owner="p:bob")) + self.assertEqual(len(recs), 2) + self.cli.native[base.GROUPS_COLL].find_one_and_update({"id": "p:gang"}, + { "$set": { "deactivated": None } }) + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, owner="p:bob")) + self.assertEqual(len(recs), 2) + def test_select_prop_contains(self): # test query on unrecognized collection it = self.cli._select_prop_contains("alice", "hobbies", "whittling") @@ -180,6 +192,18 @@ def test_select_prop_contains(self): self.assertEqual(len(recs), 2) self.assertEqual(set([r.get('id') for r in recs]), set("p:bob stars".split())) + # test deactivated filter + self.cli.native[base.GROUPS_COLL].insert_one({"id": "p:gang", + "members": ["p:bob"], "deactivated": 1.2}) + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob")) + self.assertEqual(len(recs), 2) + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob", incl_deact=True)) + self.assertEqual(len(recs), 3) + self.cli.native[base.GROUPS_COLL].find_one_and_update({"id": "p:gang"}, + { "$set": { "deactivated": None } }) + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob")) + self.assertEqual(len(recs), 3) + def test_delete_from(self): # test delete on unrecognized, non-existent collection self.assertFalse(self.cli._delete_from("alice", "p:bob")) @@ -345,7 +369,7 @@ def test_create_group(self): with self.assertRaises(base.NotAuthorized): grp = self.dbg.create_group("friends", "alice") - self.cfg['superusers'] = [self.user] + self.cli._cfg['superusers'] = [self.user] grp = self.dbg.create_group("friends", "alice") self.assertEqual(grp.name, "friends") self.assertEqual(grp.owner, "alice") @@ -397,7 +421,7 @@ def test_get_by_name(self): self.assertIsNone(self.dbg.get_by_name("friends", "alice")) - self.cfg['superusers'] = [self.user] + self.cli._cfg['superusers'] = [self.user] grp = self.dbg.create_group("friends", "alice") grp = self.dbg.get_by_name("friends", "alice") self.assertEqual(grp.id, "grp0:alice:friends") From 647918376af944f9428d683eae5b79e187fa61cb Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 8 Dec 2022 16:12:58 -0500 Subject: [PATCH 037/123] dbio: more tests for deactivating --- .../tests/nistoar/midas/dbio/test_groups.py | 16 ++++++++++++++- python/tests/nistoar/midas/dbio/test_inmem.py | 20 +++++++++++++++++++ .../tests/nistoar/midas/dbio/test_record.py | 16 ++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/python/tests/nistoar/midas/dbio/test_groups.py b/python/tests/nistoar/midas/dbio/test_groups.py index db5b268..670d3c2 100644 --- a/python/tests/nistoar/midas/dbio/test_groups.py +++ b/python/tests/nistoar/midas/dbio/test_groups.py @@ -77,6 +77,21 @@ def test_authorized(self): self.assertFalse(self.rec.authorized(base.ACLs.ADMIN, "gary")) self.assertFalse(self.rec.authorized(base.ACLs.DELETE, "gary")) self.assertFalse(self.rec.authorized([base.ACLs.READ, base.ACLs.WRITE], "gary")) + + def test_deactivate(self): + self.assertFalse(self.rec.deactivated) + self.rec.save() + self.assertTrue(self.cli.groups.name_exists("friends")) + self.assertTrue(self.rec.deactivate()) + self.assertFalse(self.rec.deactivate()) + self.rec.save() + self.assertFalse(not self.rec.deactivated) + self.assertTrue(self.cli.groups.name_exists("friends")) + self.assertTrue(self.rec.reactivate()) + self.assertFalse(self.rec.reactivate()) + self.assertFalse(self.rec.deactivated) + self.rec.save() + self.assertTrue(self.cli.groups.name_exists("friends")) class TestDBGroups(test.TestCase): @@ -278,7 +293,6 @@ def test_select_ids_for_user(self): self.assertIn(base.PUBLIC_GROUP, matches) self.assertEqual(len(matches), 4) - if __name__ == '__main__': test.main() diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index 37f96d0..4e30cd1 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -96,6 +96,16 @@ def test_select_from_coll(self): recs = list(self.cli._select_from_coll(base.GROUPS_COLL, hobby="whittling")) self.assertEqual(len(recs), 2) + # test deactivated filter + self.cli._db[base.GROUPS_COLL]["p:gang"] = {"id": "p:gang", "owner": "p:bob", "deactivated": 1.2 } + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, owner="p:bob")) + self.assertEqual(len(recs), 1) + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, incl_deact=True, owner="p:bob")) + self.assertEqual(len(recs), 2) + self.cli._db[base.GROUPS_COLL]["p:gang"]["deactivated"] = None + recs = list(self.cli._select_from_coll(base.GROUPS_COLL, owner="p:bob")) + self.assertEqual(len(recs), 2) + def test_select_prop_contains(self): # test query on non-existent collection it = self.cli._select_prop_contains("alice", "hobbies", "whittling") @@ -124,6 +134,16 @@ def test_select_prop_contains(self): self.assertEqual(len(recs), 2) self.assertEqual(set([r.get('id') for r in recs]), set("p:bob stars".split())) + # test deactivated filter + self.cli._db[base.GROUPS_COLL]["p:gang"] = {"id": "p:gang", "members": ["p:bob"], "deactivated": 1.2} + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob")) + self.assertEqual(len(recs), 2) + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob", incl_deact=True)) + self.assertEqual(len(recs), 3) + self.cli._db[base.GROUPS_COLL]["p:gang"]["deactivated"] = None + recs = list(self.cli._select_prop_contains(base.GROUPS_COLL, "members", "p:bob")) + self.assertEqual(len(recs), 3) + def test_delete_from(self): # test query on non-existent collection self.assertFalse(self.cli._delete_from("alice", "p:bob")) diff --git a/python/tests/nistoar/midas/dbio/test_record.py b/python/tests/nistoar/midas/dbio/test_record.py index 2ec8d98..382be49 100644 --- a/python/tests/nistoar/midas/dbio/test_record.py +++ b/python/tests/nistoar/midas/dbio/test_record.py @@ -23,6 +23,7 @@ def test_ctor(self): self.assertEqual(self.rec.modified, self.rec.created) self.assertTrue(self.rec.created_date.startswith("20")) self.assertNotIn('.', self.rec.created_date) + self.assertFalse(self.rec.deactivated) self.assertEqual(self.rec.data, {}) self.assertEqual(self.rec.meta, {}) # self.assertEqual(self.rec.curators, []) @@ -77,7 +78,20 @@ def test_authorized(self): self.assertFalse(self.rec.authorized(base.ACLs.DELETE, "gary")) self.assertFalse(self.rec.authorized([base.ACLs.READ, base.ACLs.WRITE], "gary")) - + def test_deactivate(self): + self.assertFalse(self.rec.deactivated) + self.rec.save() + self.assertTrue(self.cli.name_exists("brains")) + self.assertTrue(self.rec.deactivate()) + self.assertFalse(self.rec.deactivate()) + self.rec.save() + self.assertFalse(not self.rec.deactivated) + self.assertTrue(self.cli.name_exists("brains")) + self.assertTrue(self.rec.reactivate()) + self.assertFalse(self.rec.reactivate()) + self.assertFalse(self.rec.deactivated) + self.rec.save() + self.assertTrue(self.cli.name_exists("brains")) if __name__ == '__main__': From eb9642cb1f877d651fe6d7b170d1e088c0e97243 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 8 Dec 2022 16:15:14 -0500 Subject: [PATCH 038/123] dbio: enable deleting project records --- python/nistoar/midas/dbio/base.py | 32 ++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 39bd90f..eb91073 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -7,6 +7,8 @@ * Each *record* in the collection represents a "project" that a user is working on via the service * A record can be expressed as a Python dictionary which can be exported into JSON +See the :py:mod:`DBIO package documentation ` for a fully description of the +model and how to interact with the database. """ import time, math from abc import ABC, ABCMeta, abstractmethod, abstractproperty @@ -142,7 +144,11 @@ def __str__(self): class ProtectedRecord(ABC): """ - a base class for records that have ACLs attached to them + a base class for records that have ACLs attached to them. + + This record represents a local copy of the record that exists in the "remote" database. The + client can make changes to this record; however, those changes are not persisted in the + database until the :py:meth:`save` method is called. """ def __init__(self, servicetype: str, recdata: Mapping, dbclient: DBClient=None): @@ -625,6 +631,10 @@ def delete_group(self, gid: str) -> bool: class ProjectRecord(ProtectedRecord): """ a single record from the project collection representing one project created by the user + + This record represents a local copy of the record that exists in the "remote" database. The + client can make changes to this record; however, those changes are not persisted in the + database until the :py:meth:`save` method is called. """ def __init__(self, projcoll: str, recdata: Mapping, dbclient: DBClient=None): @@ -891,6 +901,8 @@ def get_record_for(self, id: str, perm: str=ACLs.READ) -> ProjectRecord: :param str perm: the permission type that the user must be authorized for in order for the record to be returned; if user is not authorized, an exception is raised. Default: `ACLs.READ` + :raises ObjectNotFound: if the identifier does not exist + :raises NotAuthorized: if the user does not have the permission given by ``perm`` """ out = self._get_from_coll(self._projcoll, id) if not out: @@ -1005,6 +1017,24 @@ def _delete_from(self, collname, id): """ raise NotImplementedError() + def delete_record(self, id: str) -> bool: + """ + delete the specified group from the database. The user attached to this + :py:class:`DBClient` must either be the owner of the record or have `DELETE` permission + to carry out this option. + :return: True if the group was found and successfully deleted; False, otherwise + :rtype: bool + """ + try: + g = self.get_record_for(id, ACLs.DELETE) + except ObjectNotFound: + return False + if not g: + return False + + self._delete_from(self._projcoll, id) + return True + class DBClientFactory(ABC): """ From 36fa06b95bb7f7b2ea70423e5e54dc88aefa14f8 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 9 Dec 2022 07:58:08 -0500 Subject: [PATCH 039/123] dbio: add _try_push_recnum() in case of record error --- python/nistoar/midas/dbio/base.py | 9 +++++++++ python/nistoar/midas/dbio/fsbased.py | 12 +++++++++++ python/nistoar/midas/dbio/inmem.py | 8 ++++++++ python/nistoar/midas/dbio/mongo.py | 19 ++++++++++++++++++ .../tests/nistoar/midas/dbio/test_client.py | 20 +++++++++++++++++++ .../tests/nistoar/midas/dbio/test_fsbased.py | 14 +++++++++++++ python/tests/nistoar/midas/dbio/test_inmem.py | 9 +++++++++ python/tests/nistoar/midas/dbio/test_mongo.py | 14 +++++++++++++ 8 files changed, 105 insertions(+) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index eb91073..65dfd61 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -837,6 +837,15 @@ def _mint_id(self, shoulder): """ return "{0}:{1:04}".format(shoulder, self._next_recnum(shoulder)) + def _parse_id(self, id): + pair = id.rsplit(':', 1) + if len(pair) != 2: + return None, None + try: + return pair[0], int(pair[1]) + except ValueError: + return None, None + @abstractmethod def _next_recnum(self, shoulder): """ diff --git a/python/nistoar/midas/dbio/fsbased.py b/python/nistoar/midas/dbio/fsbased.py index b43eb93..b039e00 100644 --- a/python/nistoar/midas/dbio/fsbased.py +++ b/python/nistoar/midas/dbio/fsbased.py @@ -56,6 +56,15 @@ def _next_recnum(self, shoulder): self._write_rec("nextnum", shoulder, num) return num + def _try_push_recnum(self, shoulder, recnum): + recpath = self._root / "nextnum" / (shoulder+".json") + if not recpath.exists(): + return + num = self._read_rec("nextnum", shoulder) + if num >= 0 and num == recnum: + num -= 1 + self._write_rec("nextnum", shoulder, num) + def _get_from_coll(self, collname, id) -> MutableMapping: return self._read_rec(collname, id) @@ -106,6 +115,9 @@ def _delete_from(self, collname, id): recpath = self._root / collname / (id+".json") if recpath.is_file(): recpath.unlink() + shldr, num = self._parse_id(id) + if shldr: + self._try_push_recnum(shldr, num) return True return False diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index 9f9d527..0aede78 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -25,6 +25,11 @@ def _next_recnum(self, shoulder): self._db['nextnum'][shoulder] += 1 return self._db['nextnum'][shoulder] + def _try_push_recnum(self, shoulder, recnum): + n = self._db['nextnum'].get(shoulder, -1) + if n >= 0 and n == recnum: + self._db['nextnum'][shoulder] -= 1 + def _get_from_coll(self, collname, id) -> MutableMapping: return deepcopy(self._db.get(collname, {}).get(id)) @@ -51,6 +56,9 @@ def _select_prop_contains(self, collname, prop, target, incl_deact=False) -> Ite def _delete_from(self, collname, id): if collname in self._db and id in self._db[collname]: del self._db[collname][id] + shldr, num = self._parse_id(id) + if shldr: + self._try_push_recnum(shldr, num) return True return False diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index cec54bb..b67d005 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -107,6 +107,25 @@ def _next_recnum(self, shoulder): except Exception as ex: raise base.DBIOException("Failed to access named sequence, =%s: %s" % (shoulder, str(ex))) + def _try_push_recnum(self, shoulder, recnum): + key = {"slot": shoulder} + try: + db = self.native + coll = db["nextnum"] + + with self._mngocli.start_session() as session: + if coll.count_documents(key) == 0: + return + slot = coll.find_one(key) + if slot["next"] == recnum+1: + coll.update_one(key, {"$inc": {"next": -1}}) + + except base.DBIOException as ex: + raise + except Exception as ex: + # ignore database errors + pass + def _get_from_coll(self, collname, id) -> MutableMapping: key = {"id": id} diff --git a/python/tests/nistoar/midas/dbio/test_client.py b/python/tests/nistoar/midas/dbio/test_client.py index 98eca0a..c13ae99 100644 --- a/python/tests/nistoar/midas/dbio/test_client.py +++ b/python/tests/nistoar/midas/dbio/test_client.py @@ -34,7 +34,15 @@ def test_mint_id(self): self.cli._db["nextnum"]["go0"] = 22 self.assertEqual(self.cli._mint_id("go0"), "go0:0023") self.cli._db["nextnum"]["go0"] = 22222 + + # testing unminting + id = self.cli._mint_id("go0") + self.assertEqual(id, "go0:22223") + self.assertEqual(self.cli._parse_id(id), ("go0", 22223)) + self.cli._try_push_recnum("go0", 22223) # unmints 22223 self.assertEqual(self.cli._mint_id("go0"), "go0:22223") + self.cli._try_push_recnum("go0", 22222) # has no effect + self.assertEqual(self.cli._mint_id("go0"), "go0:22224") self.assertEqual(self.cli._mint_id("ncnr5"), "ncnr5:0001") @@ -74,6 +82,18 @@ def test_create_record(self): self.assertTrue(self.cli.exists("mds3:0001")) self.assertTrue(self.cli.name_exists("test", self.user)) + self.assertTrue(self.cli.delete_record("mds3:0001")) + self.assertTrue(not self.cli.exists("mds3:0001")) + rec = self.cli.create_record("test", "mds3") + self.assertEqual(rec.id, "mds3:0001") + rec = self.cli.create_record("test2", "mds3") + self.assertEqual(rec.id, "mds3:0002") + self.assertTrue(self.cli.exists("mds3:0001")) + self.assertTrue(self.cli.delete_record("mds3:0001")) + self.assertTrue(not self.cli.exists("mds3:0001")) + rec = self.cli.create_record("test", "mds3") + self.assertEqual(rec.id, "mds3:0003") + def test_get_record(self): with self.assertRaises(base.ObjectNotFound): self.cli.get_record_for("pdr0:0001") diff --git a/python/tests/nistoar/midas/dbio/test_fsbased.py b/python/tests/nistoar/midas/dbio/test_fsbased.py index 67d2760..bc9f5fa 100644 --- a/python/tests/nistoar/midas/dbio/test_fsbased.py +++ b/python/tests/nistoar/midas/dbio/test_fsbased.py @@ -80,6 +80,20 @@ def test_next_recnum(self): self.assertEqual(self.cli._next_recnum("goober"), 1) self.assertEqual(self.cli._next_recnum("gary"), 2) + recpath = self.cli._root / "nextnum" / ("goob.json") + self.assertTrue(recpath.is_file()) + self.assertEqual(self.cli._read_rec("nextnum", "goob"), 3) + self.cli._try_push_recnum("goob", 2) + self.assertEqual(self.cli._read_rec("nextnum", "goob"), 3) + + recpath = self.cli._root / "nextnum" / ("hank.json") + self.assertTrue(not recpath.exists()) + self.cli._try_push_recnum("hank", 2) + self.assertTrue(not recpath.exists()) + + self.cli._try_push_recnum("goob", 3) + self.assertEqual(self.cli._read_rec("nextnum", "goob"), 2) + def test_get_from_coll(self): # test query on non-existent collection self.assertIsNone(self.cli._get_from_coll("alice", "p:bob")) diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index 4e30cd1..b8b227c 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -45,6 +45,15 @@ def test_next_recnum(self): self.assertEqual(self.cli._next_recnum("goober"), 1) self.assertEqual(self.cli._next_recnum("gary"), 2) + self.assertEqual(self.cli._db["nextnum"]["goob"], 3) + self.cli._try_push_recnum("goob", 2) + self.assertEqual(self.cli._db["nextnum"]["goob"], 3) + self.assertNotIn("hank", self.cli._db["nextnum"]) + self.cli._try_push_recnum("hank", 2) + self.assertNotIn("hank", self.cli._db["nextnum"]) + self.cli._try_push_recnum("goob", 3) + self.assertEqual(self.cli._db["nextnum"]["goob"], 2) + def test_get_from_coll(self): # test query on non-existent collection self.assertIsNone(self.cli._get_from_coll("alice", "p:bob")) diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index 9a6a498..6601a4c 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -94,6 +94,20 @@ def test_next_recnum(self): self.assertEqual(self.cli._next_recnum("goober"), 1) self.assertEqual(self.cli._next_recnum("gary"), 2) + slot = self.cli.native.nextnum.find_one({"slot": "goob"}) + self.assertEqual(slot["next"], 4) + self.cli._try_push_recnum("goob", 2) + slot = self.cli.native.nextnum.find_one({"slot": "goob"}) + self.assertEqual(slot["next"], 4) + + self.assertEqual(self.cli.native.nextnum.count_documents({"slot": "hank"}), 0) + self.cli._try_push_recnum("hank", 2) + self.assertEqual(self.cli.native.nextnum.count_documents({"slot": "hank"}), 0) + + self.cli._try_push_recnum("goob", 3) + slot = self.cli.native.nextnum.find_one({"slot": "goob"}) + self.assertEqual(slot["next"], 3) + def test_get_from_coll(self): # test query on unrecognized collection self.assertIsNone(self.cli._get_from_coll("alice", "p:bob")) From d104e3ff7f8372855148d4afa1a34e66ac12a177 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 19 Dec 2022 06:50:46 -0500 Subject: [PATCH 040/123] nerdstore: provide factory, enable inst. from config, add more tests --- .../nistoar/midas/dap/nerdstore/__init__.py | 35 +++++++++ python/nistoar/midas/dap/nerdstore/base.py | 11 +++ python/nistoar/midas/dap/nerdstore/fsbased.py | 22 ++++++ python/nistoar/midas/dap/nerdstore/inmem.py | 30 ++++++-- .../midas/dap/nerdstore/test_fsbased.py | 11 +++ .../nistoar/midas/dap/nerdstore/test_inmem.py | 77 +++++++++++++++++++ 6 files changed, 179 insertions(+), 7 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/__init__.py b/python/nistoar/midas/dap/nerdstore/__init__.py index bd2fe9d..304d20e 100644 --- a/python/nistoar/midas/dap/nerdstore/__init__.py +++ b/python/nistoar/midas/dap/nerdstore/__init__.py @@ -6,4 +6,39 @@ the metadata from storage and into memory, enabling new metadata to be merge in, and storing the result. """ +from collections.abc import Mapping +from logging import Logger + from .base import * +from nistoar.pdr.exceptions import ConfigurationException, StateException + +from . import inmem +from . import fsbased + +_def_store_map = { + "inmem": inmem.InMemoryResourceStorage, + "fsbased": fsbased.FSBasedResourceStorage +} + +class NERDResourceStorageFactory: + """ + a factory class for creating :py:class:`~nistoar.midas.dap.nerdstore.NERDResourceStorage` instances. + + """ + + def __init__(self, storemap: Mapping=None): + if not storemap: + storemap = _def_store_map + self._byname = storemap + + def open_storage(self, config: Mapping, logger: Logger, implname: str=None) -> NERDResourceStorage: + if not implname: + implname = config.get("type") + if not implname: + raise ConfigurationException("Missing required configuration parameter: type") + + if implname not in self._byname: + raise StateException("Unrecognized nerdstore implementation type: "+implname) + + return self._byname[implname].from_config(config, logger) + diff --git a/python/nistoar/midas/dap/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py index de8c075..7fece13 100644 --- a/python/nistoar/midas/dap/nerdstore/base.py +++ b/python/nistoar/midas/dap/nerdstore/base.py @@ -5,6 +5,7 @@ from abc import ABC, ABCMeta, abstractproperty, abstractmethod from collections.abc import MutableMapping, Mapping, MutableSequence from typing import Iterable, Iterator, NewType +from logging import Logger import nistoar.nerdm.utils as nerdmutils from nistoar.pdr.preserve.bagit.builder import (DATAFILE_TYPE, SUBCOLL_TYPE, DOWNLOADABLEFILE_TYPE) @@ -812,6 +813,16 @@ class NERDResourceStorage(ABC): """ a factory function that creates or opens existing stored NERDm Resource records """ + + @classmethod + def from_config(cls, config: Mapping, logger: Logger): + """ + an abstract class method for creatng NERDResourceStorage instances + :param dict config: the configuraiton for the specific type of storage + :param Logger logger: the logger to use to capture messages + """ + raise NotImplementedError() + @abstractmethod def open(self, id: str=None) -> NERDResource: """ diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index dee614c..a585d9e 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -936,6 +936,28 @@ class FSBasedResourceStorage(NERDResourceStorage): _seqfile = "_seq.json" _idre = re.compile(r'^\w+\d*:0*(\d+)$') + @classmethod + def from_config(cls, config: Mapping, logger: Logger): + """ + an class method for creatng an FSBasedResourceStorage instance from configuration data. + + Recognized configuration paramters include: + + ``store_dir`` + (str) _required_. The root directory under which all resource data will be stored. + ``default_shoulder`` + (str) _optional_. The shoulder that new identifiers are minted under. This is not + normally used as direct clients of this class typically choose the shoulder on a + per-call basis. The default is "nrd". + + :param dict config: the configuraiton for the specific type of storage + :param Logger logger: the logger to use to capture messages + """ + if not config.get('store_dir'): + raise ConfigurationException("Missing required configuration parameter: store_dir") + + return cls(config['store_dir'], config.get("default_shoulder", "nrd"), logger) + def __init__(self, storeroot: str, newidprefix: str="nrd", logger: Logger=None): """ initialize a factory with with the resource data storage rooted at a given directory diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index b6d7805..ae287ca 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -11,7 +11,7 @@ from collections import OrderedDict from collections.abc import Mapping from logging import Logger -from typing import Iterable, Iterator +from typing import Iterable, Iterator, List from .base import * from .base import _NERDOrderedObjectList, DATAFILE_TYPE, SUBCOLL_TYPE, DOWNLOADABLEFILE_TYPE @@ -691,12 +691,28 @@ class InMemoryResourceStorage(NERDResourceStorage): a factory for opening records stored in memory """ - def __init__(self, newidprefix: str="nrd", existing: [Mapping]=[], logger: Logger=None): + @classmethod + def from_config(cls, config: Mapping, logger: Logger): + """ + an class method for creatng an FSBasedResourceStorage instance from configuration data. + + Recognized configuration paramters include: + ``default_shoulder`` + (str) _optional_. The shoulder that new identifiers are minted under. This is not + normally used as direct clients of this class typically choose the shoulder on a + per-call basis. The default is "nrd". + + :param dict config: the configuraiton for the specific type of storage + :param Logger logger: the logger to use to capture messages + """ + return cls(config.get("default_shoulder", "nrd"), logger=logger) + + def __init__(self, newidprefix: str="nrd", existing: List[Mapping]=[], logger: Logger=None): """ initialize a factory with some existing in-memory NERDm records :param str newidprefix: a prefix to use when minting new identifiers - :param Mapping existing: a list of NERDm records that should be made available via - :py:method:`open`. + :param [Mapping] existing: a list of NERDm records that should be made available via + :py:method:`open`. """ self.log = logger self._pfx = newidprefix @@ -716,14 +732,14 @@ def load_from(self, rec: Mapping, id: str=None): """ if not id: id = rec.get('@id') - m = _idre.search(id) + m = _idre.search(id) if id else None if m: n = int(m.group(1)) if n >= self._ididx: self._ididx = n + 1 if not id: id = self._new_id() - if id in self._res: + if id in self._recs: self._recs[id].replace_all_data(rec) else: self._recs[id] = InMemoryResource(id, rec, self.log) @@ -744,7 +760,7 @@ def open(self, id: str=None) -> NERDResource: if not id: id = self._new_id() if id not in self._recs: - self._recs[id] = InMemoryResource(id, None, self.log) + self._recs[id] = InMemoryResource(id, OrderedDict(), self.log) return self._recs[id] def exists(self, id: str) -> bool: diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py index fd8a8a9..695b137 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py @@ -931,6 +931,17 @@ def test_load_from(self): self.assertEqual(res.nonfiles.count, 1) self.assertEqual(res.files.count, 4) + def test_delete(self): + self.assertTrue(not self.fact.exists("pdr02p1s")) + nerd = load_simple() + self.fact.load_from(nerd) + self.assertTrue(self.fact.exists("pdr02p1s")) + + self.assertFalse(self.fact.delete("nobody")) + self.assertTrue(self.fact.exists("pdr02p1s")) + self.assertTrue(self.fact.delete("pdr02p1s")) + self.assertTrue(not self.fact.exists("pdr02p1s")) + diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py index 08cf4a5..de8dc96 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py @@ -791,6 +791,83 @@ def test_getsetpop(self): self.assertEqual(self.cmps.get(0)['mediaType'], "application/zip") +class TestInMemoryResourceStorage(test.TestCase): + + def setUp(self): + self.recs = [ + {"@id": "pdr:001", "title": "Gurns I've known"}, + {"title": "Goobers!", "references": [ {"title": "All About Goobers"}]} + ] + self.store = inmem.InMemoryResourceStorage() + + def test_ctor(self): + self.assertEqual(self.store._recs, {}) + + def test_load_from(self): + self.store.load_from(self.recs[0]) + self.assertEqual(len(self.store._recs), 1) + self.assertIn("pdr:001", self.store._recs) + + self.store.load_from(self.recs[1]) + self.assertEqual(len(self.store._recs), 2) + self.assertIn("pdr:001", self.store._recs) + self.assertIn("nrd_0", self.store._recs) + + self.assertEqual(self.store._recs["pdr:001"].get_res_data(), + {"@id": "pdr:001", "title": "Gurns I've known"}) + self.assertEqual(self.store._recs["nrd_0"].get_res_data(), + {"@id": "nrd_0", "title": "Goobers!"}) + + def test_ctor_existing(self): + self.store = inmem.InMemoryResourceStorage(existing=self.recs) + + self.assertEqual(len(self.store._recs), 2) + self.assertIn("pdr:001", self.store._recs) + self.assertIn("nrd_0", self.store._recs) + + self.assertEqual(self.store._recs["pdr:001"].get_res_data(), + {"@id": "pdr:001", "title": "Gurns I've known"}) + self.assertEqual(self.store._recs["nrd_0"].get_res_data(), + {"@id": "nrd_0", "title": "Goobers!"}) + + def test_open(self): + self.store = inmem.InMemoryResourceStorage(existing=self.recs) + self.assertEqual(len(self.store._recs), 2) + self.assertTrue(self.store.exists("nrd_0")) + nerd = self.store.open("nrd_0") + self.assertEqual(nerd.get_res_data(), {"@id": "nrd_0", "title": "Goobers!"}) + self.assertEqual(nerd.references.count, 1) + self.assertTrue(self.store.exists("nrd_0")) + + self.assertTrue(not self.store.exists("gary7")) + nerd = self.store.open("gary7") + self.assertEqual(len(self.store._recs), 3) + self.assertTrue(self.store.exists("gary7")) + self.assertEqual(nerd.get_res_data(), {"@id": "gary7"}) + + nerd = self.store.open() + self.assertEqual(nerd.id, "nrd_1") + self.assertEqual(len(self.store._recs), 4) + self.assertTrue(self.store.exists("nrd_1")) + + def test_delete(self): + self.store.load_from(self.recs[1]) + self.assertEqual(len(self.store._recs), 1) + nerd = self.store.open("gary7") + self.assertEqual(len(self.store._recs), 2) + + self.assertFalse(self.store.delete("nobody")) + self.assertEqual(len(self.store._recs), 2) + self.assertTrue(self.store.delete("nrd_0")) + self.assertEqual(len(self.store._recs), 1) + self.assertTrue(self.store.delete("gary7")) + self.assertEqual(len(self.store._recs), 0) + + def test_from_config(self): + self.store = inmem.InMemoryResourceStorage.from_config({"default_shoulder": "gary", "foo": "bar"}, + None) + nerd = self.store.open() + self.assertEqual(nerd.id, "gary_0") if __name__ == '__main__': test.main() From 257fb69bf655be4ac5923bcaded0e1c44a83f289 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 19 Dec 2022 06:55:50 -0500 Subject: [PATCH 041/123] dbio: tweak error handling, InvalidData -> InvalidUpdate, fix auth --- python/nistoar/midas/dbio/__init__.py | 2 +- python/nistoar/midas/dbio/base.py | 5 - python/nistoar/midas/dbio/project.py | 128 ++++++++++++++++++++++---- 3 files changed, 113 insertions(+), 22 deletions(-) diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index 20aca77..7982098 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -203,4 +203,4 @@ MIDASDBClientFactory = MongoDBClientFactory -from .project import ProjectService, ProjectServiceFactory +from .project import ProjectService, ProjectServiceFactory, InvalidUpdate diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 65dfd61..f92e3c9 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -1144,10 +1144,5 @@ def __init__(self, recid, part=None, message=None, sys=None): message = "Requested record with id=%s does not exist" % recid super(ObjectNotFound, self).__init__(message) -class InvalidData(DBIOException): - """ - record create or update request includes invalid input data - """ - pass diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 6878035..6a421bd 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -12,8 +12,10 @@ from logging import Logger, getLogger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence +from typing import List -from .base import DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ObjectNotFound +from .base import (DBClient, DBClientFactory, ProjectRecord, ACLs, + AlreadyExists, NotAuthorized, ObjectNotFound, DBIOException) from .. import MIDASException, MIDASSystem from nistoar.pdr.publish.prov import PubAgent @@ -54,7 +56,7 @@ class ProjectService(MIDASSystem): """ def __init__(self, project_type: str, dbclient_factory: DBClient, config: Mapping={}, - who: PubAgent=None, log: Logger=None): + who: PubAgent=None, log: Logger=None, _subsys=None, _subsysabbrev=None): """ create the service :param str project_type: the project data type desired. This name is usually used as the @@ -65,7 +67,11 @@ def __init__(self, project_type: str, dbclient_factory: DBClient, config: Mappin :param who PubAgent: the representation of the user that is requesting access :param Logger log: the logger to use for log messages """ - super(ProjectService, self).__init__("DBIO Project Service", "DBIO") + if not _subsys: + _subsys = "DBIO Project Service" + if not _subsysabbrev: + _subsysabbrev = "DBIO" + super(ProjectService, self).__init__(_subsys, _subsysabbrev) self.cfg = config if not who: who = PubAgent("unkwn", PubAgent.USER, "anonymous") @@ -193,14 +199,14 @@ def update_data(self, id, newdata, part=None, prec=None): would otherwise result in invalid data content. """ if not prec: - prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound/NotAuthorized + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not part: - # this is a complete replacement; merge it with a starter record + # updating data as a whole: merge given data into previously saved data self._merge_into(newdata, prec.data) else: - # replacing just a part of the data + # updating just a part of the data steps = part.split('/') data = prec.data while steps: @@ -297,7 +303,7 @@ def replace_data(self, id, newdata, part=None, prec=None): would otherwise result in invalid data content. """ if not prec: - prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound/NotAuthorized + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not part: # this is a complete replacement; merge it with a starter record @@ -359,6 +365,48 @@ def _save_data(self, indata: Mapping, prec: ProjectRecord = None) -> Mapping: def _validate_data(self, data): pass + def clear_data(self, id, part=None, prec=None): + """ + remove the stored data content of the record and reset it to its defaults. + :param str id: the identifier for the record whose data should be cleared. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + only that property will be cleared (either removed or set to an initial + default). + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if clearing of the part of the data specified by `part` is not allowed. + """ + if not prec: + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + + initdata = self._new_data_for(prec.id, prec.meta) + if not part: + # clearing everything: return record to its initial defaults + prec.data = initdata + + else: + # clearing only part of the data + steps = part.split('/') + data = prec.data + while steps: + prop = steps.pop(0) + if prop in initdata: + if not steps: + data[prop] = initdata[prop] + elif prop not in data: + data[prop] = {} + elif prop not in data: + break + elif not steps: + del data[prop] + break + data = data[prop] + initdata = initdata.get(prop, {}) + class ProjectServiceFactory: """ @@ -399,28 +447,76 @@ def create_service_for(self, who: PubAgent=None): return ProjectService(self._prjtype, self._dbclifact, self._cfg, who, self._log) -class InvalidUpdate(MIDASException): +class InvalidUpdate(DBIOException): """ an exception indicating that the user-provided data is invalid or otherwise would result in invalid data content for a record. + + The determination of invalid data may result from detailed data validation which may uncover + multiple errors. The ``errors`` property will contain a list of messages, each describing a + validation error encounted. The :py:meth:`format_errors` will format all these messages into + a single string for a (text-based) display. """ - def __init__(self, message, recid=None, part=None): + def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=None, sys=None): """ initialize the exception - :param str recid: the id of the record that was existed - :param str part: the part of the record that was requested. Do not provide this parameter if - the entire record does not exist. - """ + :param str message: a brief description of the problem with the user input + :param str recid: the id of the record that was existed + :param str part: the part of the record that was requested. Do not provide this parameter if + the entire record does not exist. + :param [str] errors: a listing of the individual errors uncovered in the data + """ + if errors: + if not message: + if len(errors) == 1: + message = "Validation Error: " + errors[0] + elif len(errors) == 0: + message = "Unknown validation errors encountered" + else: + message = "Encountered %d validation errors, including: %s" % (len(errors), errors[0]) + elif message: + errors = [message] + else: + message = "Unknown validation errors encountered while updating data" + errors = [] + super(InvalidUpdate, self).__init__(message) self.record_id = recid self.record_part = part + + def __str__(self): + out = "" + if self.record_id: + out += "%s: " % self.record_id + if self.record_part: + out += "%s: " % self.record_part + return out + super().__str__() + + def format_errors(self): + """ + format into a string the listing of the validation errors encountered that resulted in + this exception. The returned string will have embedded newline characters for multi-line + text-based display. + """ + if not self.errors: + return str(self) + + out = "" + if self.record_id: + out += "%s: " % self.record_id + out += "Validation errors encountered" + if self.record_part: + out += " in data submitted to update %s" % self.record_part + out += ":\n * " + out += "\n * ".join(self.errors) + return out -class PartNotAccessible(MIDASException): +class PartNotAccessible(DBIOException): """ an exception indicating that the user-provided data is invalid or otherwise would result in invalid data content for a record. """ - def __init__(self, recid, part, message=None): + def __init__(self, recid, part, message=None, sys=None): """ initialize the exception :param str recid: the id of the record that was existed @@ -432,7 +528,7 @@ def __init__(self, recid, part, message=None): if not message: message = "%s: data property, %s, is not in an updateable state" % (recid, part) - super(PartNotAccessible, self).__init__(message) + super(PartNotAccessible, self).__init__(message, sys=sys) From 73148c6a61dc445b34855ff0db4f0b4788b59c81 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 19 Dec 2022 11:33:59 -0500 Subject: [PATCH 042/123] allow res md to have different id internally from one used to retrieve it --- python/nistoar/midas/dap/nerdstore/inmem.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index ae287ca..fa64e5b 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -597,7 +597,7 @@ class InMemoryResource(NERDResource): """ an in-memory implementation of the NERDResource interface """ - _subprops = "authors references components @id".split() + _subprops = "authors references components".split() def __init__(self, id: str, rec: Mapping={}, parentlog: Logger=None): super(InMemoryResource, self).__init__(id, parentlog) @@ -666,7 +666,8 @@ def get_res_data(self): if self._data is None: return None out = copy.deepcopy(self._data) - out['@id'] = self.id + if '@id' not in out: + out['@id'] = self.id return out def get_data(self, inclfiles=True) -> Mapping: From 5aa83fdcf0a2b8c3d38d5f53746824d70f842081 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Feb 2023 12:03:20 -0500 Subject: [PATCH 043/123] dap: add mds3.py (unit tests incomplete) --- python/nistoar/midas/dap/service/mds3.py | 1143 +++++++++++++++++ python/nistoar/midas/dap/service/validate.py | 17 +- python/nistoar/midas/dbio/project.py | 5 +- .../nistoar/midas/dap/service/test_mds3.py | 486 +++++++ 4 files changed, 1647 insertions(+), 4 deletions(-) create mode 100644 python/nistoar/midas/dap/service/mds3.py create mode 100644 python/tests/nistoar/midas/dap/service/test_mds3.py diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py new file mode 100644 index 0000000..2c0b38a --- /dev/null +++ b/python/nistoar/midas/dap/service/mds3.py @@ -0,0 +1,1143 @@ +""" +The DAP Authoring Service implemented using the mds3 convention. This convention represents the +first DAP convention powered by the DBIO APIs. + +Support for the web service frontend is provided as a +WSGI :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` implementation. +""" +import os +from logging import Logger +from collections import OrderedDict +from collections.abc import Mapping, MutableMapping, Sequence, Callable +from typing import List + +from ...dbio import (DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ACLs, + InvalidUpdate, ProjectService, ProjectServiceFactory, DAP_PROJECTS) +from ...dbio.wsgi.project import MIDASProjectApp +from nistoar.base.config import ConfigurationException, merge_config +from nistoar.nerdm import constants as nerdconst, utils as nerdutils +from nistoar.pdr import def_schema_dir, constants as const +from nistoar.pdr.utils import build_mime_type_map +from nistoar.pdr.publish.prov import PubAgent + +from . import validate +from ..nerdstore import NERDResource, NERDResourceStorage, NERDResourceStorageFactory + +ASSIGN_DOI_NEVER = 'never' +ASSIGN_DOI_ALWAYS = 'always' +ASSIGN_DOI_REQUEST = 'request' + +NERDM_PRE = "nrd" +NERDM_SCH_ID_BASE = nerdconst.core_schema_base +NERDM_SCH_VER = nerdconst.schema_versions[0] +NERDM_SCH_ID = NERDM_SCH_ID_BASE + NERDM_SCH_VER + "#" +NERDM_DEF = NERDM_SCH_ID + "/definitions/" +NERDM_CONTEXT = "https://data.nist.gov/od/dm/nerdm-pub-context.jsonld" + +NERDMPUB_PRE = "nrdp" +NERDMPUB_SCH_ID_BASE = nerdconst.core_schema_base + "pub/" +NERDMPUB_SCH_VER = NERDM_SCH_VER +NERDMPUB_SCH_ID = NERDMPUB_SCH_ID_BASE + NERDMPUB_SCH_VER + "#" +NERDMPUB_DEF = NERDMPUB_SCH_ID + "/definitions/" + +NERDMAGG_PRE = "nrda" +NERDMAGG_SCH_ID_BASE = nerdconst.core_schema_base + "agg/" +NERDMAGG_SCH_VER = nerdconst.agg_ver +NERDMAGG_SCH_ID = NERDMAGG_SCH_ID_BASE + NERDMAGG_SCH_VER + "#" +NERDMAGG_DEF = NERDMAGG_SCH_ID + "/definitions/" + +NERDMEXP_PRE = "nrde" +NERDMEXP_SCH_ID_BASE = nerdconst.core_schema_base + "exp/" +NERDMEXP_SCH_VER = nerdconst.exp_ver +NERDMEXP_SCH_ID = NERDMEXP_SCH_ID_BASE + NERDMEXP_SCH_VER + "#" +NERDMEXP_DEF = NERDMEXP_SCH_ID + "/definitions/" + +NERDMSW_PRE = "nrdw" +NERDMSW_SCH_ID_BASE = nerdconst.core_schema_base + "sw/" +NERDMSW_SCH_VER = NERDM_SCH_VER +NERDMSW_SCH_ID = NERDMSW_SCH_ID_BASE + NERDMSW_SCH_VER + "#" +NERDMSW_DEF = NERDMSW_SCH_ID + "/definitions/" + +NERDMBIB_PRE = "nrdw" +NERDMBIB_SCH_ID_BASE = nerdconst.core_schema_base + "bib/" +NERDMBIB_SCH_VER = nerdconst.bib_ver +NERDMBIB_SCH_ID = NERDMBIB_SCH_ID_BASE + NERDMBIB_SCH_VER + "#" +NERDMBIB_DEF = NERDMBIB_SCH_ID + "/definitions/" + +NIST_NAME = "National Institute of Standards and Technology" +NIST_ABBREV = "NIST" +NIST_ROR = "ror:05xpvk416" + +EXTSCHPROP = "_extensionSchemas" + +class DAPService(ProjectService): + """ + a project record request broker class for DAP records. + + In addition to the configuration parameters supported by the parent class, this specialization + also supports the following parameters: + + ``assign_doi`` + a label that indicates when a DOI should be assigned to new records. Supported values include: + * ``always`` -- assign a DOI to every newly created record + * ``request`` -- assign a DOI on when requested by the system + * ``never`` -- never assign a DOI to a record. + ``doi_naan`` + the Name Assigning Authority Number to use for the DOI (given as a string) + ``validate_nerdm`` + if True (default), validate the updates to NERDm metadata. Incomplete NERDm records are + permitted. + + Note that the DOI is not yet registered with DataCite; it is only internally reserved and included + in the record NERDm data. + """ + + def __init__(self, dbclient_factory: DBClient, config: Mapping={}, who: PubAgent=None, + log: Logger=None, nerdstore: NERDResourceStorage=None, project_type=DAP_PROJECTS, + minnerdmver=(0, 6)): + """ + create a request handler + :param DBClient dbclient: the DBIO client instance to use to access and save project records + :param dict config: the handler configuration tuned for the current type of project + :param dict wsgienv: the WSGI request context + :param Logger log: the logger to use for log messages + """ + super(DAPService, self).__init__(project_type, dbclient_factory, config, who, log, + _subsys="Digital Asset Publication Authoring System", + _subsysabbrev="DAP") + + if not nerdstore: + nerdstore = NERDResourceStorageFactory().open_storage(config.get("nerdstorage", {}), log) + self._store = nerdstore + + self.cfg.setdefault('assign_doi', ASSIGN_DOI_REQUEST) + if not self.cfg.get('doi_naan') and self.cfg.get('assign_doi') != ASSIGN_DOI_NEVER: + raise ConfigurationException("Missing configuration: doi_naan") + + self._schemadir = self.cfg.get('nerdm_schema_dir', def_schema_dir) + self._valid8r = None + if self.cfg.get('validate_nerdm', True): + if not self._schemadir: + raise ConfigurationException("'validate_nerdm' is set but cannot find schema dir") + self._valid8r = validate.create_lenient_validator(self._schemadir, "_") + + self._mediatypes = { + "csv": "text/csv", "txt": "text/plain", "html": "text/html", "htm": "text/html", + "sha256": "text/plain", "md5": "text/plain" + } + mimefiles = self.cfg.get('mimetype_files', []) + if not isinstance(mimefiles, list): + mimefiles = [mimefiles] + if mimefiles: + self._mediatypes = build_mime_type_map(mimefiles) + + self._minnerdmver = minnerdmver + + def _guess_format(self, file_ext, mimetype=None): + if not mimetype: + mimetype = self._mediatypes.get(file_ext) + return None + + def create_record(self, name, data=None, meta=None) -> ProjectRecord: + """ + create a new project record with the given name. An ID will be assigned to the new record. + :param str name: the mnuemonic name to assign to the record. This name cannot match that + of any other record owned by the user. + :param dict data: the initial data content to assign to the new record. + :param dict meta: the initial metadata to assign to the new record. + :raises NotAuthorized: if the authenticated user is not authorized to create a record + :raises AlreadyExists: if a record owned by the user already exists with the given name + :raises InvalidUpdate: if the data given in either the ``data`` or ``meta`` parameters are + invalid (i.e. is not compliant with schemas and restrictions asociated + with this project type). + """ + shoulder = self._get_id_shoulder(self.who) + prec = self.dbcli.create_record(name, shoulder) + nerd = None + + try: + if meta: + meta = self._moderate_metadata(meta, shoulder) + if prec.meta: + self._merge_into(meta, prec.meta) + else: + prec.meta = meta + elif not prec.meta: + prec.meta = self._new_metadata_for(shoulder) + + # establish the version of NERDm we're using + schemaid = None + if data and data.get("_schema"): + schemaid = data["_schema"] + m = re.search(r'/v(\d+\.\d+(\.\d+)*)#?$', schemaid) + if schemaid.startswith(NERDM_SCH_ID_BASE) and m: + ver = m.group(1).split('.') + for v in range(len(ver)): + if i >= len(self._minnerdmver): + break; + if ver[i] < self._minnerdmver[1]: + raise InvalidUpdate("Requested NERDm schema version, " + m.group(1) + + " does not meet minimum requirement of " + + ".".join(self._minnerdmver), sys=self) + else: + raise InvalidUpdate("Unsupported schema for NERDm schema requested: " + schemaid, + sys=self) + + # create a record in the metadata store + if self._store.exists(prec.id): + self.log.warning("NERDm data for id=%s unexpectedly found in metadata store", prec.id) + self._store.load_from(self._new_data_for(prec.id, prec.meta, schemaid), prec.id) + nerd = self._store.open(prec.id) + prec.data = self._summarize(nerd) + + if data: + self.update_data(prec.id, data, prec=prec, nerd=nerd) # this will call prec.save() + else: + prec.save() + + except Exception as ex: + if nerd: + try: + nerd.delete() + except Exception as ex: + self.log.error("Error while cleaning up NERDm data after create failure: %s", str(ex)) + try: + prec.delete() + except Exception as ex: + self.log.error("Error while cleaning up DAP record after create failure: %s", str(ex)) + raise + + return prec + + def _new_data_for(self, recid, meta=None, schemaid=None): + if not schemaid: + schemaid = NERDM_SCH_ID + out = OrderedDict([ + ("_schema", schemaid), + ("@context", NERDM_CONTEXT), + (EXTSCHPROP, [NERDMPUB_DEF + "PublicDataResource"]), + ("@id", self._arkid_for(recid)), + ("@type", [":".join([NERDMPUB_PRE, "PublicDataResource"]), "dcat:Resource"]) + ]) + + if self.cfg.get('assign_doi') == ASSIGN_DOI_ALWAYS: + out['doi'] = self._doi_for(recid) + + if meta: + if meta.get("resourceType"): + addtypes = [] + if meta['resourceType'].lower() == "software": + addtypes = [":".join([NERDPUB_PRE, "Software"])] + elif meta['resourceType'].lower() == "srd": + addtypes = [":".join([NERDPUB_PRE, "SRD"])] + out["@type"] = addtypes + out["@type"] + + if meta.get("softwareLink"): + swcomp = self._get_sw_desc_for(meta["softwareLink"]) + if not 'components' in out: + out['components'] = [] + out['components'] = [swcomp] + out['components'] + + # contact info + + return out + + def _get_sw_desc_for(self, link): + id = link.rsplit('/', 1)[-1] + id = "%s/repo:%s" % (const.LINKCMP_EXTENSION.lstrip('/'), id) + out = OrderedDict([ + ("@id", id), + ("@type", ["nrd:AccessPage", "dcat:Distribution"]), + ("title", "Software Repository"), + ("accessURL", link) + ]) + if link.startswith("https://github.com/"): + out['title'] += " in GitHub" + return out + + def _doi_for(self, recid): + naan = self.cfg.get('doi_naan') + if not naan: + raise PublishingStateException("DOI NAAN not set in configuration") + return "doi:%s/%s" % (naan, self._aipid_for(recid)) + + def _arkid_for(self, recid): + return "ark:/%s/%s" % (const.ARK_NAAN, self._aipid_for(recid)) + + def _aipid_for(self, recid): + return '-'.join(recid.split(':', 1)) + + def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): + # only accept expected keys + allowed = "resourceType creatorisContact contactName willUpload provideLink softwareLink assocPageType".split() + mdata = OrderedDict([p for p in mdata.items() if p[0] in allowed]) + + out = super()._moderate_metadata(mdata, shoulder) + if isinstance(out.get('creatorisContact'), str): + out['creatorisContact'] = out['creatorisContact'].lower() == "true" + elif out.get('creatorisContact') is None: + out['creatorisContact'] = true + + return out + + def _new_metadata_for(self, shoulder=None): + return OrderedDict([ + ("resourceType", "data"), + ("creatorisContact", True) + ]) + + def replace_data(self, id, newdata, part=None, prec=None, nerd=None): + """ + Replace the currently stored data content of a record with the given data. It is expected that + the new data will be filtered/cleansed via an internal call to :py:method:`dress_data`. + :param str id: the identifier for the record whose data should be updated. + :param str newdata: the data to save as the new content. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + the given `newdata` is a value that should be set to the property pointed + to by `part`. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. + :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + would otherwise result in invalid data content. + """ + return self._update_data(id, ndwdata, part, prec, nerd, True) + + def update_data(self, id, newdata, part=None, prec=None, nerd=None): + """ + merge the given data into the currently save data content for the record with the given identifier. + :param str id: the identifier for the record whose data should be updated. + :param str newdata: the data to save as the new content. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + the given `newdata` is a value that should be set to the property pointed + to by `part`. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. + :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + would otherwise result in invalid data content. + """ + return self._update_data(id, newdata, part, prec, nerd, False) + + def clear_data(self, id, part=None, prec=None): + """ + remove the stored data content of the record and reset it to its defaults. + :param str id: the identifier for the record whose data should be cleared. + :param stt part: the slash-delimited pointer to an internal data property. If provided, + only that property will be cleared (either removed or set to an initial + default). + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. + If this is not provided, the record will by fetched anew based on the `id`. + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises PartNotAccessible: if clearing of the part of the data specified by `part` is not allowed. + """ + if not prec: + prec = self.dbcli.get_record_for(id, ACLs.WROTE) # may raise ObjectNotFound/NotAuthorized + + if not self._store.exists(id): + self.log.warning("NERDm data for id=%s not found in metadata store", prec.id) + nerd = self._new_data_for(prec.id, prec.meta) + self._store.load_from(nerd) + nerd = self._store.open(id) + + if part: + if part == "authors": + nerd.authors.empty() + elif part == "references": + nerd.references.empty() + elif part == "components": + nerd.files.empty() + nerd.nonfiles.empty() + elif part in "title rights disclaimer description".split(): + resmd = nerd.get_res_data() + del resmd[part] + nerd.replace_res_data(resmd) + else: + raise PartNotAccessible(prec.id, path, "Clearing %s not allowed" % path) + + else: + nerd.authors.empty() + nerd.references.empty() + nerd.files.empty() + nerd.nonfiles.empty() + nerd.replace_res_data(self._new_data_for(prec.id, prec.meta)) + + + def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=False): + if not prec: + prec = self.dbcli.get_record_for(id, ACLs.WROTE) # may raise ObjectNotFound/NotAuthorized + + if not nerd: + if not self._store.exists(id): + self.log.warning("NERDm data for id=%s not found in metadata store", prec.id) + nerd = self._new_data_for(prec.id, prec.meta) + if prec.data.get("title"): + nerd["title"] = prec.data.get("title") + self._store.load_from(nerd) + + nerd = self._store.open(id) + + if not part: + # this is a complete replacement; save updated NERDm data to the metadata store + try: + data = self._update_all_nerd(prec, nerd, newdata, replace) + except InvalidUpdate as ex: + ex.record_id = prec.id + raise + + else: + # replacing just a part of the data + try: + data = self._update_part_nerd(prec, nerd, part, newdata, replace) + except InvalidUpdate as ex: + ex.record_id = prec.id + ex.record_part = part + raise + + prec.data = self._summarize(nerd) + prec.save() + + return data + + def _summarize(self, nerd: NERDResource): + resmd = nerd.get_res_data() + out = OrderedDict() + out["@id"] = resmd.get("@id") + out["title"] = resmd.get("title","") + out["_schema"] = resmd.get("_schema", NERDM_SCH_ID) + out["@type"] = resmd.get("@type", ["nrd:Resource"]) + if 'doi' in resmd: + out["doi"] = resmd["doi"] + out["author_count"] = nerd.authors.count + out["file_count"] = nerd.files.count + out["nonfile_count"] = nerd.nonfiles.count + out["reference_count"] = nerd.references.count + return out + + _handsoff = ("@id @context publisher issued firstIssued revised annotated " + \ + "bureauCode programCode systemOfRecords primaryITInvestmentUII " + \ + "doi ediid releaseHistory status theme").split() + + def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mapping, replace=False): + # filter out properties that the user is not allow to update + newdata = OrderedDict() + for prop in data: + if not prop.startswith("_") and prop not in self._handsoff: + newdata[prop] = data[prop] + + errors = [] + authors = newdata.get('authors') + if authors: + del newdata['authors'] + authors = self._moderate_authors(authors, nerd, replace) + refs = newdata.get('references') + if refs: + del newdata['references'] + refs = self._moderate_references(refs, nerd, replace) + + comps = newdata.get('components') + files = [] + nonfiles = [] + if comps: + del newdata['components'] + for cmp in comps: + if 'filepath' in cmp: + files.append(self._moderate_file(cmp)) + else: + nonfiles.append(self._moderate_nonfile(cmp)) + comps = nonfiles + files + + # handle resource-level data: merge the new data into the old and validate the result + if replace: + oldresdata = self._new_data_for(prec.id, prec.meta, newdata.get("_schema")) + else: + oldresdata = nerd.get_data(False) + + # merge and validate the resource-level data + newdata = self._moderate_res_data(newdata, oldresdata, nerd, replace) # may raise InvalidUpdate + + # all data is merged and validated; now commit + nerd.replace_res_data(newdata) + if authors: + self._update_part_nerd("authors", prec, nerd, authors, replace, doval=False) + if refs: + self._update_part_nerd("references", prec, nerd, refs, replace, doval=False) + if comps: + self._update_part_nerd("components", prec, nerd, comps, replace, doval=False) + + return nerd.get_data(True) + + +################# + + + def validate_json(self, json, schemauri=None): + """ + validate the given JSON data record against the give schema, raising an exception if it + is not valid. + + :param dict json: the (parsed) JSON data to validate + :param str schemauri: the JSONSchema URI to validate the input against. + :raises InvalidUpdate: if the data is found to be invalid against the schema; the exception's + ``errors`` property will list all the errors found. + """ + errors = [] + if self._valid8r: + if not schemauri: + schemauri = json.get("_schema") + if not schemauri: + raise ValueError("validate_json(): No schema URI specified for input data") + errors = self._valid8r.validate(json, schemauri=schemauri, strict=True, raiseex=False) + else: + self.log.warning("Unable to validate submitted NERDm data") + + if len(errors) > 0: + raise InvalidUpdate("NERDm Schema validation errors found", errors=errors, sys=self) + + + def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data: Mapping, + replace=False, doval=True): + schemabase = prec.data.get("_schema") or NERDMPUB_SCH_ID + + m = re.search(r'^([a-z]+s)\[([\w\d]+)\]$', path) + if m: + # path is of the form xxx[k] and refers to an item in a list + key = m.group(3) + try: + key = int(key) + except ValueError: + pass + + if m.group(1) == "authors": + self._update_author(prec, nerd, data, replace, doval=doval) + elif m.group(1) == "references": + data["_schema"] = schemabase+"/definitions/BibliographicReference" + self._update_reference(prec, nerd, data, replace, doval=doval) + elif m.group(1) == "components": + data["_schema"] = schemabase+"/definitions/Component" + self._update_component(prec, nerd, data, replace, doval=doval) + else: + raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + + elif path == "authors": + if replace: + self._replace_authors(prec, nerd, data, doval=doval) + else: + self._update_authors(prec, nerd, data, doval=doval) + elif path == "references": + if replace: + self._replace_references(prec, nerd, data, doval=doval) + else: + self._update_references(prec, nerd, data, doval=doval) + elif path == "components": + if replace: + self._replace_components(prec, nerd, data, doval=doval) + else: + self._update_components(prec, nerd, data, doval=doval) + + elif path == "contactPoint": + if not isinstance(data, Mapping): + raise InvalidUpdate("contactPoint data is not an object", sys=self) + res = nerd.get_res_data() + res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) + # may raise InvalidUpdate + nerd.replace_res_data(res) + + elif path == "@type": + if not isinstance(data, (list, str)): + raise InvalidUpdate("@type data is not a list of strings", sys=self) + res = nerd.get_res_data() + res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) + nerd.replace_res_data(res) + + elif path == "description": + if not isinstance(data, (list, str)): + raise InvalidUpdate("description data is not a list of strings", sys=self) + res = nerd.get_res_data() + res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate + nerd.replace_res_data(res) + + elif path in "title rights disclaimer".split(): + if not isinstance(data, str): + raise InvalidUpdate("%s value is not a string" % path, sys=self) + res = nerd.get_res_data() + res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate + nerd.replace_res_data(res) + + else: + raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + + def _moderate_text(self, val, resmd=None, doval=True): + # make sure input value is the right type, is properly encoded, and + # does not contain any illegal bits + if doval and not isinstance(val, str): + raise InvalidUpdate("Text value is not a string", sys=self) + return val + + def _moderate_description(self, val, resmd=None, doval=True): + if not isinstance(val, list): + val = [val] + return [self._moderate_text(t, resmd, doval=doval) for t in val if t != ""] + + _pfx_for_type = OrderedDict([ + ("ScienceTheme", NERDMAGG_PRE), + ("ExperimentalData", NERDMEXP_PRE), + ("DataPublication", NERDMPUB_PRE), + ("SoftwarePublication", NERDMSW_PRE), + ("Aggregation", NERDMAGG_PRE), + ("PublicDataResource", NERDMPUB_PRE), + ("Resource", NERDM_PRE) + ]) + _schema_for_pfx = { + NERDM_PRE: NERDM_SCH_ID, + NERDMPUB_PRE: NERDMPUB_SCH_ID, + NERDMAGG_PRE: NERDMAGG_SCH_ID, + NERDMSW_PRE: NERDMSW_SCH_ID + } + + def _moderate_restype(self, types, resmd, nerd=None, replace=True, doval=True): + if not isinstance(types, list): + types = [types] + if any([not isinstance(t, str) for t in types]): + raise InvalidUpdate("@type data is not a list of strings", sys=self) + + # separate NERDm Resource types and allowed non-NERDm types; throw away others + if not replace: + types = resmd.get("@type",[]) + types + exttypes = [] + nrdtypes = set() + for tp in types: + parts = tp.split(':', 1) + if parts[-1] in self._pfx_for_type and (len(parts) == 1 or parts[0].startswith("nrd")): + nrdtypes.add(parts[-1]) + elif len(parts) == 2 and parts[0] in ["schema", "dcat"] and tp not in exttypes: + exttypes.append(tp) + + # set some default types based on the presence of other metadata + if nerd and nerd.authors.count > 0 and \ + "SoftwarePublication" not in nrdtypes and "DataPublication" not in nrdtypes: + nrdtypes.add("DataPublication") + + if "ExperimentalData" not in nrdtypes: + if self._has_exp_prop(resmd): + nrdtypes.add("ExperimentalData") + + if not nrdtypes: + nrdtypes.add("PublicDataResource") + + extschemas = [] + if "DataPublication" in nrdtypes: + extschemas.append(NERDMPUB_DEF + "DataPublication") + elif "PublicDataResource" in nrdtypes: + extschemas.append(NERDMPUB_DEF + "PublicDataResource") + if "SoftwarePublication" in nrdtypes: + extschemas.append(NERDMSW_DEF + "SoftwarePublication") + + if "ScienceTheme" in nrdtypes: + extschemas.append(NERDMAGG_DEF + "ScienceTheme") + elif "Aggregation" in nrdtypes: + extschemas.append(NERDMAGG_DEF + "Aggregation") + + if "ExperimentalData" in nrdtypes: + extschemas.append(NERDMEXP_DEF + "ExperimentalData") + + # ensure proper prefixes and conventional order for NERDm types + types = [] + for tp in self._pfx_for_type: + if tp in nrdtypes: + types.append("%s:%s" % (self._pfx_for_type[tp], tp)) + types += exttypes + + resmd["@type"] = types + if extschemas: + resmd[EXTSCHPROP] = extschemas + + if doval: + self.validate_json(resmd) + return resmd + + def _has_exp_prop(self, md): + for prop in ("instrumentsUsed isPartOfProjects acquisitionStartTime hasAcquisitionStart "+ + "acquisitionEndTime hasAcquisitionEnd").split(): + if prop in md: + return True + return False + + _contact_props = set("fn hasEmail postalAddress phoneNumber timezone proxyFor".split()) + def _moderate_contact(self, info, resmd=None, replace=False, doval=True): + if not isinstance(info, Mapping): + raise InvalidUpdate("contactPoint data is not an object", sys=self) + info = OrderedDict([(k,v) for k,v in info.items() if k in self._contact_props]) + + if not replace and resmd and resmd.get('contactInfo'): + info = self._merge_into(info, resmd['contactInfo']) + info['@type'] = "vcard:Contact" + + if doval: + schemauri = NERDM_SCH_ID + "/definitions/ContactInfo" + if resmd and resmd.get("_schema"): + schemauri = resmd["_schema"] + "/definitions/ContactInfo" + self.validate_json(info, schemauri) + + return info + + def _replace_authors(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("authors data is not a list", sys=self) + self._replace_listitems(nerd.authors, self._moderate_author, data) + + def _update_author(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): + if not isinstance(data, Mapping): + raise InvalidUpdate("author data is not an object", sys=self) + self._update_listitem(nerd.authors, self._moderate_author, data, pos, replace) + + def _update_authors(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("authors data is not a list", sys=self) + self._update_objlist(nerd.authors, self._moderate_author, data) + + def _replace_references(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("references data is not a list", sys=self) + self._replace_listitems(nerd.references, self._moderate_reference, data) + + def _update_reference(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): + if not isinstance(data, Mapping): + raise InvalidUpdate("reference data is not an object", sys=self) + self._update_listitem(nerd.references, self._moderate_reference, data, pos, replace) + + def _update_references(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("references data is not a list", sys=self) + self._update_objlist(nerd.references, self._moderate_reference, data) + + + def _replace_listitems(self, objlist, moderate_func, data: List[Mapping]): + data = [ moderate_func(a) for a in data ] # may raise InvalidUpdate + objlist.empty() + for item in data: + objlist.append(auth) + + def _update_listitem(self, objlist, moderate_func, data: Mapping, pos: int=None, replace=False): + key = pos + if key is None: + key = data.get("@id") + olditem = None + if key: + try: + olditem = objlist.get(key) + if not replace: + data = self._merge_into(data, olditem) + except (KeyError, IndexError) as ex: + pass + + data = moderate_func(data) # may raise InvalidUpdate + + if olditem is None: + objlist.append(data) + else: + objlist.set(key, data) + + def _update_objlist(self, objlist, moderate_func, data: List[Mapping]): + # merge and validate all items before committing them + for i, a in enumerate(data): + olditem = None + if a.get('@id'): + try: + olditem = objlist.get(a['@id']) + data[i] = self._merge_into(a, olditem) + except KeyError as ex: + pass + data[i] = moderate_func(data[i]) # may raise InvalidUpdate + + # now commit + for a in data: + if a.get('@id'): + objlist.set(a['@id'], a) + else: + objlist.append(a) + + def _replace_components(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("authors data is not a list", sys=self) + data = [ self._moderate_comp(a) for a in data ] # may raise InvalidUpdate + nerd.nonfiles.empty() + nerd.files.empty() + for cmp in data: + if 'filepath' in cmp: + nerd.files.set_file_at(cmp, cmp['filepath']) + else: + nerd.nonfiles.append(cmp) + + def _update_component(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): + if not isinstance(data, Mapping): + raise InvalidUpdate("component data is not an object", sys=self) + if 'filepath' in data: + self.update_listitem(nerd.files, self._moderate_file, pos, replace) + else: + self.update_listitem(nerd.nonfiles, self._moderate_nonfile, pos, replace) + + def _update_components(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): + if not isinstance(data, list): + raise InvalidUpdate("references data is not a list", sys=self) + + # merge and validate all items before committing them + for i, cmp in enumerate(data): + oldcmp = None + if cmp.get('@id'): + try: + oldcmp = objlist.get(a['@id']) + data[i] = self._merge_into(cmp, oldcmp) + except KeyError as ex: + pass + if 'filepath' in cmp: + data[i] = self._moderate_file(data[i]) # may raise InvalidUpdate + else: + data[i] = self._moderate_nonfile(data[i]) # may raise InvalidUpdate + + # now commit + for a in data: + objlist = nerd.files if 'filepath' in cmp else nerd.nonfiles + if a.get('@id'): + objlist.set(a['@id'], a) + else: + objlist.append(a) + + def _filter_props(self, obj, props): + delprops = [k for k in obj if k not in props or (not obj.get(k) and obj.get(k) is not False)] + for k in delprops: + del obj[k] + + _authprops = set("_schema fn familyName givenName middleName orcid affiliation proxyFor".split()) + _affilprops = set("@id title abbrev proxyFor location label description subunits".split()) + + def _moderate_author(self, auth, doval=True): + # we are assuming that merging has already occured + + self._filter_props(auth, self._authprops) + auth["@type"] = "foaf:Person" +# Set fn at finalization +# if not auth.get('fn') and auth.get('familyName') and auth.get('givenName'): +# auth['fn'] = auth['familyName'] +# if auth.get('givenName'): +# auth['fn'] += ", %s" % auth['givenName'] +# if auth.get('middleName'): +# auth['fn'] += " %s" % auth['middleName'] + + if isinstance(auth.get('affiliation',[]), str): + auth['affiliation'] = [OrderedDict([('title', auth['affiliation'])])] + elif not isinstance(auth.get('affiliation', []), list): + del auth['affiliation'] + if auth.get('affiliation'): + affils = auth['affiliation'] + for affil in affils: + self._filter_props(affil, self._affilprops) + affil["@type"] = "org:Organization" + if affil.get("title") == "NIST": + affil["title"] = NIST_NAME + if affil.get("title") == NIST_NAME: + affil["@id"] = NIST_ROR + if not affil.get("abbrev"): + affil["abbrev"] = [ NIST_ABBREV ] + else: + if not isinstance(affil["abbrev"], list): + raise InvalidUpdate("Affiliate abbrev property is not a list: "+ + str(affil["abbrev"])) + affil["abbrev"].append(NIST_ABBREV) + + # Finally, validate (if requested) + schemauri = NERDMPUB_SCH_ID + "/definitions/Person" + if auth.get("_schema"): + if not auth['_schema'].startswith(NERDMPUB_SCH_ID_BASE): + raise InvalidUpdate("Unsupported author schema: "+auth['_schema'], sys=self) + schemauri = auth['_schema'] + del auth['_schema'] + if doval: + self.validate_json(auth, schemauri) + + return auth + + _refprops = set(("@id _schema _extensionSchemas title abbrev proxyFor location label "+ + "description citation refType doi inprep").split()) + _reftypes = set(("IsDocumentedBy IsSupplementTo IsSupplementedBy IsCitedBy Cites IsReviewedBy "+ + "IsReferencedBy References IsSourceOf IsDerivedFrom "+ + "IsNewVersionOf IsPreviousVersionOf").split()) + def _moderate_reference(self, ref, doval=True): + # QUESTION/TODO: new properties? doi?, inprep? + # we are assuming that merging has already occured + self._filter_props(ref, self._refprops) + if not ref.get("refType"): + ref["refType"] = "References" + if not ref.get(EXTSCHPROP) and ref["refType"] in self._reftypes: + ref.setdefault(EXTSCHPROP, []) + try: + # upgrade the version of the BIB extension + if any(s.startswith(NERDMBIB_SCH_ID_BASE) and s != NERDMBIB_SCH_ID + for s in ref[EXTSCHPROP]): + ref[EXTSCHPROP] = [NERDMBIB_SCH_ID if s.startswith(NERDMBIB_SCH_ID_BASE) + else s for s in ref[EXTSCHPROP]] + except AttributeError as ex: + raise InvalidUpdate("_extensionSchemas: value is not a list of strings", sys=self) from ex + if NERDMBIB_SCH_ID not in ref[EXTSCHPROP]: + ref[EXTSCHPROP].append(NERDMBIB_SCH_ID) + + if not ref.get("@type"): + ref["@type"] = ["deo:BibliographicReference"] + + try: + if not ref.get("location") and ref.get("proxyFor"): + if ref["proxyFor"].startswith("doi:"): + ref["location"] = "https://doi.org/" + ref["proxyFor"][4:] + elif ref["proxyFor"].startswith("https://doi.org/"): + ref["location"] = ref["proxyFor"] + ref["proxyFor"] = "doi:" + ref["proxyFor"][len("https://doi.org/"):] + elif not ref.get("proxyFor") and ref.get("location","").startswith("https://doi.org/"): + ref["proxyFor"] = "doi:" + ref["location"][len("https://doi.org/"):] + + except AttributeError as ex: + raise InvalidUpdate("location or proxyFor: value is not a string", sys=self) from ex + + # Finally, validate (if requested) + schemauri = NERDM_SCH_ID + "/definitions/BibliographicReference" + if ref.get("_schema"): + if not ref['_schema'].startswith(NERDM_SCH_ID_BASE): + raise InvalidUpdate("Unsupported schema for a reference: "+ref['_schema'], sys=self) + schemauri = ref['_schema'] + del ref['_schema'] + if doval: + self.validate_json(ref, schemauri) + + return ref + + def _moderate_file(self, cmp, doval=True): + # Note private assumptions: cmp contains filepath property + if '_extensionSchemas' not in cmp: + cmp['_extensionSchemas'] = [] + if not isinstance(cmp.get('_extensionSchemas',[]), list) or \ + not all(isinstance(s, str) for s in cmp.get('_extensionSchemas',[])): + msg = "Component " + if cmp.get("filepath") or cmp.get("@id"): + msg += "%s " % (cmp.get("filepath") or cmp.get("@id")) + msg += "_extensionSchemas: not a list of strings" + raise InvalidUpdate(msg, sys=self) + + # ensure @type is set to something recognizable + if cmp.get('downloadURL'): + if not nerdutils.is_type(cmp, "DownloadableFile"): + nerdutils.insert_type(cmp, "nrdp:DownloadableFile", "dcat:Distribution") + if not nerdutils.is_any_type(cmp, ["DataFile", "ChecksumFile"]): + nerdutils.insert_type(cmp, "nrdp:DataFile", "nrdp:DownloadableFile", "dcat:Distribution") + else: + if not nerdutils.is_type(cmp, "Subcollection"): + nerdutils.insert_type(cmp, "nrdp:Subcollection") + + if self._has_exp_prop(cmp): + # contains experimental data + nerdutils.insert_type(cmp, "nrde:AcquisitionActivity", "dcat:Distribution") + + # set the mediaType and format if needed: + if nerdutils.is_type(cmp, "DownloadableFile"): + filext = os.path.splitext(cmp.get("filepath",""))[-1].lstrip('.') + if not cmp.get("mediaType"): + cmp["mediaType"] = self._mediatypes.get(filext, "application/octet-stream") + + if not cmp.get("format"): + fmt = self._guess_format(filext, cmp["mediaType"]) + if fmt: + cmp["format"] = fmt + + # make sure the _extensionSchemas list is filled out + cmp.setdefault(EXTSCHPROP, []) + if nerdutils.is_type(cmp, "DataFile") and \ + not any(s.endswith("#/definitions/DataFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DataFile") + elif nerdutils.is_type(cmp, "ChecksumFile") and \ + not any(s.endswith("#/definitions/ChecksumFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"ChecksumFile") + elif nerdutils.is_type(cmp, "DownloadableFile") and \ + not any(s.endswith("#/definitions/DownloadableFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DownloadableFile") + + if nerdutils.is_type(cmp, "Subcollection") and \ + not any(s.endswith("#/definitions/Subcollection") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"Subcollection") + if nerdutils.is_type(cmp, "AcquisitionActivity") and \ + not any(s.endswith("#/definitions/AcquisitionActivity") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMEXP_DEF+"AcquisitionActivity") + + # Finally, validate (if requested) + schemauri = NERDM_SCH_ID + "/definitions/Component" + if cmp.get("_schema"): + if not cmp['_schema'].startswith(NERDM_SCH_ID_BASE): + raise InvalidUpdate("Unsupported component schema: "+cmp['_schema'], sys=self) + schemauri = cmp['_schema'] + del cmp['_schema'] + if doval: + self.validate_json(cmp, schemauri) + + return cmp + + def _moderate_nonfile(self, cmp, doval=True): + if 'filepath' in cmp and not cmp.get('filepath'): + del cmp['filepath'] + if not cmp: + raise InvalidUpdate("Empty compomponent included: "+str(cmp)) + if cmp.get('filepath') or nerdutils.is_any_type(cmp, ["Subcollection", "DownloadableFile", + "DataFile", "ChecksumFile"]): + msg = cmp.get("@id","") + if msg: + msg += ": " + msg += "Non-file component includes some file component content" + raise InvalidUpdate(msg, sys=self) + + # we make sure a specific @type is set. First filter out in consequential ones. + cmp.setdefault("@type", []) + types = [t for t in cmp["@type"] + if not any(t.endswith(":"+p) for p in ["Component", "Distribution", "Document"])] + + # If a type is set, we'll make no assumptions as to the meaning of non-Component properties + # (and we'll let validation detect issues). Otherwise, guess the type based on properties. + if not types: + extschs = cmp.get(EXTSCHPROP, []) + if cmp.get("accessURL"): + # it's an access page of some kind + cmp["@type"].insert(0, "nrdp:AccessPage") + + elif cmp.get("searchURL"): + # it's a DynamicResourceSet + cmp["@type"].insert(0, "nrdg:DynamicResourceSet") + + elif cmp.get("resourceType") or cmp.get("proxyFor"): + # it's an included resource + cmp["@type"].insert(0, "nrd:IncludedResource") + + if self._has_exp_prop(cmp) and not nerdutils.is_type(cmp, "AcquisitionActivity"): + # points to experimental data + nerdutils.insert_type(cmp, "nrde:AcquisitionActivity", "dcat:Distribution") + + cmp.setdefault(EXTSCHPROP, []) + if nerdutils.is_type(cmp, "AccessPage") and \ + not any(s.endswith("#/definitions/AccessPage") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"AccessPage") + if nerdutils.is_type(cmp, "SearchPage") and \ + not any(s.endswith("#/definitions/SearchPage") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"SearchPage") + if nerdutils.is_type(cmp, "API") and \ + not any(s.endswith("#/definitions/API") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMSW_DEF+"API") + if nerdutils.is_type(cmp, "DynamicResourceSet") and \ + not any(s.endswith("#/definitions/DynamicResourceSet") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMAGG_DEF+"DynamicResourceSet") + if nerdutils.is_type(cmp, "IncludedResource") and \ + not any(s.endswith("#/definitions/IncludedResource") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDM_DEF+"IncludedResource") + if nerdutils.is_type(cmp, "AcquisitionActivity") and \ + not any(s.endswith("#/definitions/AcquisitionActivity") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMEXP_DEF+"AcquisitionActivity") + + # Finally, validate (if requested) + schemauri = NERDM_SCH_ID + "/definitions/Component" + if cmp.get("_schema"): + if not cmp['_schema'].startswith(NERDM_SCH_ID_BASE): + raise InvalidUpdate("Unsupported component schema: "+cmp['_schema'], sys=self) + schemauri = cmp['_schema'] + del cmp['_schema'] + if doval: + self.validate_json(cmp, schemauri) + + return cmp + + def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): + restypes = resmd.get("@type", []) + if not replace: + restypes += basemd.get("@type", []) + resmd = self._merge_into(resmd, basemd) + resmd["@type"] = restypes + + errors = [] + if 'contactPoint' in resmd: + if not resmd.get("contactPoint"): + del resmd["contactPoint"] + else: + try: + resmd["contactPoint"] = self._moderate_contact(resmd["contactPoint"], resmd, + replace=True, doval=False) + except InvalidUpdate as ex: + errors.extend(ex.errors) + + if 'description' in resmd: + if not resmd.get("description"): + del resmd["description"] + else: + try: + resmd["description"] = self._moderate_description(resmd["description"], resmd, + doval=False) + except InvalidUpdate as ex: + errors.extend(ex.errors) + + resmd.setdefault("@type", []) + try: + resmd = self._moderate_restype(resmd["@type"], resmd, nerd, replace=True, doval=False) + except InvalidUpdate as ex: + errors.extend(ex.errors) + + if errors: + raise InvalidUpdate(errors=errors, sys=self) + + if doval: + self.validate_json(resmd) + return resmd + + +class DAPServiceFactory(ProjectServiceFactory): + """ + Factory for creating DAPService instances attached to a backend DB implementation and which act + on behalf of a specific user. The configuration parameters that can be provided to this factory + is the union of those supported by the following classes: + * :py:class:`DAPService` (``assign_doi`` and ``doi_naan``) + * :py:class:`~nistoar.midas.dbio.project.ProjectService` (``clients`` and ``dbio``) + """ + + def __init__(self, dbclient_factory: DBClientFactory, config: Mapping={}, log: Logger=None, + project_coll: str=None): + """ + create a service factory associated with a particulr DB backend. + :param DBClientFactory dbclient_factory: the factory instance to use to create a DBClient to + talk to the DB backend. + :param Mapping config: the configuration for the service (see class-level documentation). + :param Logger log: the Logger to use in the service. + :param str project_coll: the project type (i.e. the DBIO project collection to access); + default: "dap". + """ + if not project_coll: + project_coll = DAP_PROJECTS + super(DAPServiceFactory, self).__init__(project_coll, dbclient_factory, config, log) + + def create_service_for(self, who: PubAgent=None): + """ + create a service that acts on behalf of a specific user. + :param PubAgent who: the user that wants access to a project + """ + return DAPService(self._dbclifact, self._cfg, who, self._log, self._prjtype) + + +class DAPApp(MIDASProjectApp): + """ + A MIDAS SubApp supporting a DAP service + """ + + def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, project_coll: str=None): + service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) + super(DAPApp, self).__init__(service_factory, log.getChild(DAP_PROJECTS), config) + diff --git a/python/nistoar/midas/dap/service/validate.py b/python/nistoar/midas/dap/service/validate.py index 7cdf781..ce15a9a 100644 --- a/python/nistoar/midas/dap/service/validate.py +++ b/python/nistoar/midas/dap/service/validate.py @@ -22,6 +22,12 @@ def load_schema(self, uri): if "required" in sch: del sch["required"] + sch = out.get("definitions",{}).get("BibliographicReference",{}).get("allOf", [{},{}]) + if len(sch) > 1 and "required" in sch[1] and "@id" in sch[1]["required"]: + sch[1]["required"] = [p for p in sch[1]["required"] if p != "@id"] + if not sch[1]["required"]: + del sch[1]["required"] + elif out["id"].startswith(CORE_SCHEMA_BASE+"rls/"): # this is the pub NERDm extension schema: drop the "required" property from the # PublicDataResource schema definition @@ -36,9 +42,14 @@ def load_schema(self, uri): if len(sch) > 1 and "required" in sch[1]: del sch[1]["required"] + # and from Person + sch = out.get("definitions",{}).get("Person",{}) + if "required" in sch: + del sch["required"] + return out -def create_lenient_validator(schemadir, ejsprefix="_"): +def create_lenient_validator(schemadir, forprefix="_"): """ return a validator instance (ejsonschema.ExtValidator) that can validate NERDm records, but which is slightly more lenient for NERDm schemas. @@ -60,12 +71,12 @@ def create_lenient_validator(schemadir, ejsprefix="_"): """ if isinstance(forprefix, Mapping): forprefix = get_mdval_flavor(forprefix) or "_" - if not isinstance(forprefix, (str, unicode)): + if not isinstance(forprefix, str): raise TypeError("create_validator: forprefix: not a str or dict") loader = LenientSchemaLoader.from_directory(schemadir) - return ejs.ExtValidator.with_schema_dir(loader, forprefix) + return ejs.ExtValidator(loader, forprefix) diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 6a421bd..f2bdd60 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -248,6 +248,8 @@ def _merge_into(self, update: Mapping, base: Mapping, depth: int=-1): else: base[prop] = update[prop] + return base + def _new_data_for(self, recid, meta=None): """ return an "empty" data object set for a record with the given identifier. The returned @@ -483,6 +485,7 @@ def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=N super(InvalidUpdate, self).__init__(message) self.record_id = recid self.record_part = part + self.errors = errors def __str__(self): out = "" @@ -508,7 +511,7 @@ def format_errors(self): if self.record_part: out += " in data submitted to update %s" % self.record_part out += ":\n * " - out += "\n * ".join(self.errors) + out += "\n * ".join([str(e) for e in self.errors]) return out class PartNotAccessible(DBIOException): diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py new file mode 100644 index 0000000..e3a0ba4 --- /dev/null +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -0,0 +1,486 @@ +import os, json, pdb, logging, tempfile +import unittest as test + +from nistoar.midas.dbio import inmem, base, AlreadyExists, InvalidUpdate +from nistoar.midas.dbio import project as prj +from nistoar.midas.dap.service import mds3 +from nistoar.pdr.publish import prov +from nistoar.nerdm.constants import CORE_SCHEMA_URI + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_mds3.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_mds3.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + + +class TestMDS3DAPService(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdsy" + }, + "default": { + "default_shoulder": "mdsy" + } + }, + "dbio": { + "allowed_project_shoulders": ["mdsy", "spc1"], + "default_shoulder": "mdsy", + }, + "assign_doi": "always", + "doi_naan": "88888", + "nerdstorage": { +# "type": "fsbased", +# "store_dir": os.path.join(tmpdir.name) + "type": "inmem", + } + } + self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdsy": 2 }}) + + def create_service(self): + self.svc = mds3.DAPService(self.dbfact, self.cfg, nistr, rootlog.getChild("mds3")) + self.nerds = self.svc._store + return self.svc + + def test_ctor(self): + self.create_service() + self.assertTrue(self.svc.dbcli) + self.assertEqual(self.svc.cfg, self.cfg) + self.assertEqual(self.svc.who.actor, "nstr1") + self.assertEqual(self.svc.who.group, "midas") + self.assertTrue(self.svc.log) + self.assertTrue(self.svc._store) + self.assertTrue(self.svc._valid8r) + self.assertEqual(self.svc._minnerdmver, (0, 6)) + + def test_create_record(self): + self.create_service() + self.assertTrue(not self.svc.dbcli.name_exists("goob")) + + prec = self.svc.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdsy:0003") + self.assertEqual(prec.meta, {"creatorisContact": True, "resourceType": "data"}) + self.assertEqual(prec.owner, "nstr1") + self.assertIn("_schema", prec.data) + self.assertNotIn("_extensionSchemas", prec.data) # contains only data summary + self.assertEqual(prec.data['doi'], "doi:88888/mdsy-0003") + self.assertEqual(prec.data['@id'], "ark:/88434/mdsy-0003") + + self.assertTrue(self.svc.dbcli.name_exists("goob")) + prec2 = self.svc.get_record(prec.id) + self.assertEqual(prec2.name, "goob") + self.assertEqual(prec2.id, "mdsy:0003") + self.assertEqual(prec2.data['@id'], "ark:/88434/mdsy-0003") + self.assertEqual(prec2.data['doi'], "doi:88888/mdsy-0003") + self.assertEqual(prec2.meta, {"creatorisContact": True, "resourceType": "data"}) + self.assertEqual(prec2.owner, "nstr1") + + with self.assertRaises(AlreadyExists): + self.svc.create_record("goob") + + def hold_test_create_record_withdata(self): + self.create_service() + self.assertTrue(not self.svc.dbcli.name_exists("gurn")) + + prec = self.svc.create_record("gurn", {"color": "red"}, + {"temper": "dark", "creatorisContact": "goob", + "softwarelink": "http://..." }) # misspelled key + self.assertEqual(prec.name, "gurn") + self.assertEqual(prec.id, "mdsx:0003") + self.assertEqual(prec.meta, {"creatorisContact": False, "resourceType": "data"}) + for key in "_schema @context _extensionSchemas".split(): + self.assertIn(key, prec.data) + self.assertEqual(prec.data['color'], "red") + self.assertEqual(prec.data['doi'], "doi:88888/mdsx-0003") + self.assertEqual(prec.data['@id'], "ark:/88434/mdsx-0003") + + def test_moderate_restype(self): + self.create_service() + + try: + resmd = self.svc._moderate_restype([], {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI }, + self.svc._store.open("nrd0:goob"), True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrdp:PublicDataResource"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/PublicDataResource")]), 1) + self.assertEqual(len(resmd), 4) + + with self.assertRaises(mds3.InvalidUpdate): + resmd = self.svc._moderate_restype([], {"@id": "goob", "_schema": CORE_SCHEMA_URI }, + self.svc._store.open("goob"), True, doval=True) + try: + resmd = self.svc._moderate_restype([], {"@id": "goob", "_schema": CORE_SCHEMA_URI }, + self.svc._store.open("goob"), True, doval=False) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrdp:PublicDataResource"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/PublicDataResource")]), 1) + self.assertEqual(len(resmd), 4) + + try: + resmd = self.svc._moderate_restype("nrdp:PublicDataResource", + {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI }, + self.svc._store.open("nrd0:goob"), True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrdp:PublicDataResource"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/PublicDataResource")]), 1) + self.assertEqual(len(resmd['_extensionSchemas']), 1) + self.assertEqual(len(resmd), 4) + + basemd = {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI } + nerd = self.svc._store.open("nrd0:goob") + nerd.replace_res_data(basemd) + nerd.authors.append({"fn": "Enya"}) + try: + resmd = self.svc._moderate_restype("PublicDataResource", basemd, nerd, True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrdp:DataPublication", "nrdp:PublicDataResource"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/PublicDataResource")]), 0) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/DataPublication")]), 1) + self.assertEqual(len(resmd['_extensionSchemas']), 1) + self.assertEqual(len(resmd), 4) + + basemd = {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI } + nerd = self.svc._store.open("nrd0:goob") + nerd.replace_res_data(basemd) + nerd.authors.append({"fn": "Enya"}) + try: + resmd = self.svc._moderate_restype("nrdx:SoftwarePublication", basemd, nerd, True, False) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + + self.assertEqual(resmd['@type'], ["nrdw:SoftwarePublication"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/PublicDataResource")]), 0) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/SoftwarePublication")]), 1) + self.assertEqual(len(resmd['_extensionSchemas']), 1) + self.assertEqual(len(resmd), 4) + + basemd = {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI, "instrumentsUsed": [] } + nerd = self.svc._store.open("nrd0:goob") + nerd.replace_res_data(basemd) + nerd.authors.append({"fn": "Enya"}) + try: + resmd = self.svc._moderate_restype("nrd:PublicDataResource", basemd, nerd, True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrde:ExperimentalData", "nrdp:DataPublication", + "nrdp:PublicDataResource"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/ExperimentalData")]), 1) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/DataPublication")]), 1) + self.assertEqual(len(resmd['_extensionSchemas']), 2) + self.assertEqual(len(resmd), 5) + + basemd = {"@id": "nrd0:goob", "_schema": CORE_SCHEMA_URI, "instrumentsUsed": [] } + nerd = self.svc._store.open("nrd0:goob") + nerd.replace_res_data(basemd) + nerd.authors.append({"fn": "Enya"}) + try: + resmd = self.svc._moderate_restype(["ScienceTheme", "dcat:Collection"], basemd, nerd, True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(resmd['@type'], ["nrda:ScienceTheme", "nrde:ExperimentalData", + "nrdp:DataPublication", "dcat:Collection"]) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/ExperimentalData")]), 1) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/DataPublication")]), 1) + self.assertEqual(len([t for t in resmd['_extensionSchemas'] + if t.endswith("/ScienceTheme")]), 1) + self.assertEqual(len(resmd['_extensionSchemas']), 3) + self.assertEqual(len(resmd), 5) + + def test_moderate_text(self): + self.create_service() + self.assertEqual(self.svc._moderate_text("goober"), "goober") + self.assertEqual(self.svc._moderate_text("goober", {}, False), "goober") + + with self.assertRaises(mds3.InvalidUpdate): + self.svc._moderate_text(5) + self.assertEqual(self.svc._moderate_text(5, doval=False), 5) + + def test_moderate_description(self): + self.create_service() + self.assertEqual(self.svc._moderate_description("goober"), ["goober"]) + self.assertEqual(self.svc._moderate_description(["goober", "Gurn"], {}, False), ["goober", "Gurn"]) + + with self.assertRaises(mds3.InvalidUpdate): + self.svc._moderate_description(["goober", 5]) + self.assertEqual(self.svc._moderate_description(["goober", 5], doval=False), ["goober", 5]) + self.assertEqual(self.svc._moderate_description(["goober", "", "gurn"]), ["goober", "gurn"]) + + def test_moderate_contact(self): + self.create_service() + + try: + contact = self.svc._moderate_contact({"fn": "Gurn Cranston", "hasEmail": "gurn.cranston@gmail.com", + "foo": "bar", "phoneNumber": "Penn6-5000"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(contact['fn'], "Gurn Cranston") + self.assertEqual(contact['hasEmail'], "gurn.cranston@gmail.com") + self.assertEqual(contact['phoneNumber'], "Penn6-5000") + self.assertNotIn("foo", contact) + self.assertEqual(contact["@type"], "vcard:Contact") + self.assertEqual(len(contact), 4) + + try: + contact = self.svc._moderate_contact({"fn": "Gurn J. Cranston", "goob": "gurn"}, + {"contactInfo": contact}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(contact['fn'], "Gurn J. Cranston") + self.assertEqual(contact['hasEmail'], "gurn.cranston@gmail.com") + self.assertEqual(contact['phoneNumber'], "Penn6-5000") + self.assertNotIn("foo", contact) + self.assertEqual(contact["@type"], "vcard:Contact") + self.assertEqual(len(contact), 4) + +# with self.assertRaises(mds3.InvalidUpdate): +# contact = self.svc._moderate_contact({"fn": "Gurn J. Cranston", "goob": "gurn"}, +# {"contactInfo": contact}, True) + + + try: + contact = self.svc._moderate_contact({"fn": "Gurn Cranston", "goob": "gurn"}, + {"contactInfo": contact}, True) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(contact['fn'], "Gurn Cranston") +# self.assertEqual(contact['hasEmail'], "gurn.cranston@gmail.com") + self.assertNotIn("hasEmail", contact) + self.assertNotIn("phoneNumber", contact) + self.assertNotIn("foo", contact) + self.assertEqual(contact["@type"], "vcard:Contact") + self.assertEqual(len(contact), 2) + + def test_moderate_author(self): + self.create_service() + + try: + auth = self.svc._moderate_author({"familyName": "Cranston", "firstName": "Gurn", + "middleName": "J."}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + + self.assertEqual(auth['familyName'], "Cranston") + self.assertEqual(auth['middleName'], "J.") + self.assertNotIn("firstName", auth) + self.assertNotIn("fn", auth) + self.assertEqual(auth['@type'], "foaf:Person") + + auth['affiliation'] = "NIST" + try: + auth = self.svc._moderate_author(auth) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(auth['familyName'], "Cranston") + self.assertEqual(auth['middleName'], "J.") + self.assertEqual(auth['@type'], "foaf:Person") + self.assertEqual(len(auth['affiliation']), 1) + self.assertEqual(auth['affiliation'][0]['title'], "National Institute of Standards and Technology") + self.assertEqual(auth['affiliation'][0]['abbrev'], ["NIST"]) + self.assertTrue(auth['affiliation'][0]['@id'].startswith("ror:")) + + def test_moderate_reference(self): + self.create_service() + + try: + ref = self.svc._moderate_reference({"location": "https://doi.org/10.18434/example", + "goob": "gurn"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(ref['location'], "https://doi.org/10.18434/example") + self.assertNotIn("goob", ref) + self.assertEqual(ref['refType'], "References") + self.assertIn('_extensionSchemas', ref) + self.assertEqual(len(ref['_extensionSchemas']), 1) + self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_SCH_ID) + self.assertEqual(ref["proxyFor"], "doi:10.18434/example") + self.assertEqual(len(ref), 5) + + try: + ref = self.svc._moderate_reference({"proxyFor": "doi:10.18434/example", + "goob": "gurn"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(ref['location'], "https://doi.org/10.18434/example") + self.assertNotIn("goob", ref) + self.assertEqual(ref['refType'], "References") + self.assertIn('_extensionSchemas', ref) + self.assertEqual(len(ref['_extensionSchemas']), 1) + self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_SCH_ID) + self.assertEqual(ref["proxyFor"], "doi:10.18434/example") + self.assertEqual(len(ref), 5) + + + try: + ref = self.svc._moderate_reference({"location": "doi:10.18434/example", "refType": "myown", + "title": "A Resource", "@id": "#doi:ex", + "abbrev": ["SRB-400"], "citation": "C", + "label": "drink me", "inprep": False, + "goob": "gurn"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(ref, {"location": "doi:10.18434/example", "refType": "myown", + "title": "A Resource", "@id": "#doi:ex", + "@type": ['deo:BibliographicReference'], + "abbrev": ["SRB-400"], "citation": "C", + "label": "drink me", "inprep": False}) + + def test_moderate_file(self): + self.create_service() + + with self.assertRaises(InvalidUpdate): + self.svc._moderate_file({"_extensionSchemas": ["s", None]}) + + try: + cmp = self.svc._moderate_file({"filepath": "top", "goob": "gurn"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"filepath": "top", "goob": "gurn", "@type": ["nrdp:Subcollection"], + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"Subcollection" ]}) + + try: + cmp = self.svc._moderate_file({"filepath": "data.zip", "downloadURL": "pdr:file"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", + "@type": ["nrdp:DataFile", "nrdp:DownloadableFile"], + "mediaType": "application/octet-stream", + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) + + try: + cmp = self.svc._moderate_file({"filepath": "data.zip", "downloadURL": "pdr:file", + "@type": ["dcat:Distribution"]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", + "@type": ["nrdp:DataFile", "nrdp:DownloadableFile", "dcat:Distribution"], + "mediaType": "application/octet-stream", + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) + + try: + cmp = self.svc._moderate_file({"filepath": "data.zip.md5", "downloadURL": "pdr:file", + "@type": ["nrdp:ChecksumFile"]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"filepath": "data.zip.md5", "downloadURL": "pdr:file", + "mediaType": "text/plain", + "@type": ["nrdp:ChecksumFile", "nrdp:DownloadableFile"], + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"ChecksumFile" ]}) + + try: + cmp = self.svc._moderate_file({"filepath": "data.zip.md5", "downloadURL": "pdr:file", + "@type": ["MagicFile"]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"filepath": "data.zip.md5", "downloadURL": "pdr:file", + "@type": ["MagicFile", "nrdp:DataFile", "nrdp:DownloadableFile"], + "mediaType": "text/plain", + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) + + try: + cmp = self.svc._moderate_file({"filepath": "data.zip.md5", "downloadURL": "pdr:file", + "@type": ["MagicFile"], + "instrumentsUsed": [{"title": "flugalhorn"}, + {"title": "knife"}]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp["@type"], + ["MagicFile", "nrdp:DataFile", "nrdp:DownloadableFile", "nrde:AcquisitionActivity"]) + self.assertEqual(cmp["_extensionSchemas"], + [ mds3.NERDMPUB_DEF+"DataFile", mds3.NERDMEXP_DEF+"AcquisitionActivity" ]) + self.assertEqual(cmp["mediaType"], "text/plain") + + try: + cmp = self.svc._moderate_file({"filepath": "data", "@type": ["MagicFile"], + "instrumentsUsed": [{"title": "flugalhorn"}, + {"title": "knife"}]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp["@type"], + ["MagicFile", "nrdp:Subcollection", "nrde:AcquisitionActivity"]) + self.assertEqual(cmp["_extensionSchemas"], + [ mds3.NERDMPUB_DEF+"Subcollection", mds3.NERDMEXP_DEF+"AcquisitionActivity" ]) + + def test_moderate_nonfile(self): + self.create_service() + + with self.assertRaises(InvalidUpdate): + self.svc._moderate_nonfile({"filepath": "foo/bar"}) + with self.assertRaises(InvalidUpdate): + self.svc._moderate_nonfile({}) + with self.assertRaises(InvalidUpdate): + self.svc._moderate_nonfile({"@type": ["nrdp:Subcollection"], "filepath": ""}) + + try: + cmp = self.svc._moderate_nonfile({"accessURL": "https://is.up/", "filepath": None}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"accessURL": "https://is.up/", "@type": ["nrdp:AccessPage"], + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"AccessPage" ]}) + try: + cmp = self.svc._moderate_nonfile({"accessURL": "https://is.up/", "@type": ["nrdp:SearchPage"]}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"accessURL": "https://is.up/", "@type": ["nrdp:SearchPage"], + "_extensionSchemas": [ mds3.NERDMPUB_DEF+"SearchPage" ]}) + + try: + cmp = self.svc._moderate_nonfile({"searchURL": "https://is.up/", "filepath": []}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"searchURL": "https://is.up/", "@type": ["nrdg:DynamicResourceSet"], + "_extensionSchemas": [ mds3.NERDMAGG_DEF+"DynamicResourceSet" ]}) + + try: + cmp = self.svc._moderate_nonfile({"resourceType": ["nrdp:DataPublication"], "description": "wow", + "proxyFor": "ark:/88434/bob", "title": "Bob the Blob"}) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(cmp, {"@type": ["nrd:IncludedResource"], "resourceType": ['nrdp:DataPublication'], + "proxyFor": "ark:/88434/bob", "title": "Bob the Blob", + "description": "wow", "_extensionSchemas": [mds3.NERDM_DEF+"IncludedResource"]}) + + + + + + +if __name__ == '__main__': + test.main() + + From 56e338c34d936c975458aad1eeb2f3fa868cd9ea Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Feb 2023 14:06:04 -0500 Subject: [PATCH 044/123] added etc/fext2format.json mapping file extensions to a format description --- etc/fext2format.json | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 etc/fext2format.json diff --git a/etc/fext2format.json b/etc/fext2format.json new file mode 100644 index 0000000..d60232c --- /dev/null +++ b/etc/fext2format.json @@ -0,0 +1,83 @@ +{ + "": "data", + "avi": "video", + "mp4": "video", + "wmv": "video", + "png": "image", + "pcd": "image", + "bmp": "image", + "gif": "image", + "jpg": "image", + "tif": "image", + "tiff": "image", + "mp3": "audio", + "ogg": "audio", + "flac": "audio", + "xlsx": "Excel spreadsheet", + "xls": "Excel spreadsheet", + "doc": "Word document", + "docx": "Word document", + "ppt": "Powerpoint slides", + "pptx": "Powerpoint slides", + "ods": "OpenDocument spreadsheet", + "odt": "OpenDocument text document", + "odp": "OpenDocument slides", + "csv": "data table", + "tsv": "data table (TAB-delimited)", + "tex": "TeX/LaTeX document", + "bib": "LaTeX Bibliographic data", + "ps": "PostScript document", + "eps": "Encapsulated PostScript document", + "fig": "Figure data", + "dream3d": "DREAM.3D workflow data", + "dwg": "AutoCAD data", + "prj": "project data", + "txt": "plain text", + "log": "log file", + "md": "markdown documentation", + "ink": "OpenFOAM file", + "obj": "OpenFOAM object file", + "stl": "Stereolithography file", + "cif": "Crystallographic data", + "vtk": "Paraview/VTK file", + "vtu": "Paraview/VTK data file", + "vtm": "Paraview/VTK data file", + "vtp": "Paraview/VTK data file", + "series": "Paraview/VTK data file", + "mat": "Matlab data", + "hdf": "HDF data", + "hdf5": "HDF data", + "json": "JSON data", + "jsonld": "JSON Linked-data", + "yml": "YAML metadata", + "yaml": "YAML metadata", + "xml": "XML data", + "xsd": "XML Schema file", + "xsl": "XML stylesheet", + "rdf": "RDF metadata", + "conf": "configuration data", + "config": "configuration data", + "kml": "Map data", + "parquet": "Parquet query data", + "sh": "shell script", + "csh": "C-shell script", + "py": "Python script", + "ipynb": "Python notebook", + "jinja2": "Jinja template", + "r": "R script", + "R": "R script", + "pm": "Perl module", + "pdf": "PDF document", + "pl": "Perl script", + "c": "C source code", + "exe": "Windows executable program", + "md5": "MD5 hash", + "sha256": "SHA-256 hash", + "sha512": "SHA-512 hash", + "swp": "VIM backup file", + "zip": "compressed file archive", + "7z": "compressed file archive", + "tgz": "compressed file archive", + "gz": "compressed data", + "tar": "file archive" +} From 63f759abdf4f0b87b3fb27c3baf3bdbc48264ac4 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Feb 2023 14:46:38 -0500 Subject: [PATCH 045/123] mds3: add support for format desc based on file extension --- python/nistoar/midas/dap/service/mds3.py | 24 +++++++++++++++++-- .../nistoar/midas/dap/service/test_mds3.py | 6 ++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 2c0b38a..0c189d2 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -16,8 +16,8 @@ from ...dbio.wsgi.project import MIDASProjectApp from nistoar.base.config import ConfigurationException, merge_config from nistoar.nerdm import constants as nerdconst, utils as nerdutils -from nistoar.pdr import def_schema_dir, constants as const -from nistoar.pdr.utils import build_mime_type_map +from nistoar.pdr import def_schema_dir, def_etc_dir, constants as const +from nistoar.pdr.utils import build_mime_type_map, read_json from nistoar.pdr.publish.prov import PubAgent from . import validate @@ -130,12 +130,32 @@ def __init__(self, dbclient_factory: DBClient, config: Mapping={}, who: PubAgent mimefiles = [mimefiles] if mimefiles: self._mediatypes = build_mime_type_map(mimefiles) + + self._formatbyext = {} + if 'file_format_maps' in self.cfg: + mimefiles = self.cfg.get('file_format_maps', []) + else: + mimefiles = os.path.join(def_etc_dir, "fext2format.json") + if not isinstance(mimefiles, list): + mimefiles = [mimefiles] + for ffile in mimefiles: + try: + fmp = read_json(ffile) + if not isinstance(fmp, Mapping): + raise ValueError("wrong format for format-map file: contains "+type(fmp)) + if fmp: + self._formatbyext.update(fmp) + except Exception as ex: + self.log.warning("Unable to read format-map file, %s: %s", ffile, str(ex)) self._minnerdmver = minnerdmver def _guess_format(self, file_ext, mimetype=None): if not mimetype: mimetype = self._mediatypes.get(file_ext) + fmtd = self._formatbyext.get(file_ext) + if fmtd: + return { "description": fmtd } return None def create_record(self, name, data=None, meta=None) -> ProjectRecord: diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index e3a0ba4..38a7419 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -380,6 +380,7 @@ def test_moderate_file(self): self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", "@type": ["nrdp:DataFile", "nrdp:DownloadableFile"], "mediaType": "application/octet-stream", + "format": {"description": "compressed file archive"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) try: @@ -390,6 +391,7 @@ def test_moderate_file(self): self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", "@type": ["nrdp:DataFile", "nrdp:DownloadableFile", "dcat:Distribution"], "mediaType": "application/octet-stream", + "format": {"description": "compressed file archive"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) try: @@ -400,6 +402,7 @@ def test_moderate_file(self): self.assertEqual(cmp, {"filepath": "data.zip.md5", "downloadURL": "pdr:file", "mediaType": "text/plain", "@type": ["nrdp:ChecksumFile", "nrdp:DownloadableFile"], + "format": {"description": "MD5 hash"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"ChecksumFile" ]}) try: @@ -409,7 +412,8 @@ def test_moderate_file(self): self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(cmp, {"filepath": "data.zip.md5", "downloadURL": "pdr:file", "@type": ["MagicFile", "nrdp:DataFile", "nrdp:DownloadableFile"], - "mediaType": "text/plain", + "mediaType": "text/plain", + "format": {"description": "MD5 hash"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) try: From 384094ed70ff384824fa22d44050c2b91d14a33b Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 4 Feb 2023 11:05:45 -0500 Subject: [PATCH 046/123] nerdstore: fix export bug (and enable test discovery) --- python/nistoar/midas/dap/nerdstore/fsbased.py | 52 +++++++++---------- python/nistoar/midas/dap/nerdstore/inmem.py | 52 +++++++++---------- python/tests/nistoar/midas/dap/__init__.py | 0 .../nistoar/midas/dap/nerdstore/__init__.py | 0 .../midas/dap/nerdstore/test_fsbased.py | 4 +- .../nistoar/midas/dap/nerdstore/test_inmem.py | 22 ++++---- 6 files changed, 67 insertions(+), 63 deletions(-) create mode 100644 python/tests/nistoar/midas/dap/__init__.py create mode 100644 python/tests/nistoar/midas/dap/nerdstore/__init__.py diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index a585d9e..6a1680b 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -427,10 +427,10 @@ def _read_file_md(self, mdfile: Path) -> Mapping: % (str(mdf), str(ex))) def _export_file(self, fmd): - out = OrderedDict([m for m in fmd.items() if not m[0].startswith("_")]) + out = OrderedDict([m for m in fmd.items() if not m[0].startswith("__")]) if self.is_collection(out): out['has_member'] = [OrderedDict([('@id', m[1]), ('name', m[0])]) - for m in fmd.get("_children",{}).items()] + for m in fmd.get("__children",{}).items()] return out def get_file_by_path(self, path: str) -> Mapping: @@ -457,7 +457,7 @@ def _find_fmd_id_by_relpath(self, children: Mapping, steps: [str], origpath): raise ObjectNotFound(origpath) fmd = self._read_file_md(mdf) - return self._find_fmd_id_by_relpath(fmd.get("_children", {}), steps, origpath) + return self._find_fmd_id_by_relpath(fmd.get("__children", {}), steps, origpath) def path_exists(self, filepath) -> bool: try: @@ -482,7 +482,7 @@ def get_ids_in_subcoll(self, collpath: str) -> [str]: except ObjectNotFound: return [] else: - children = coll.get('_children', []) + children = coll.get('__children', []) return list(children.values()) @@ -511,8 +511,8 @@ def __next__(self): mdf = self._fs._find_fmd_file(desc) if mdf and self._fs._is_coll_mdfile(mdf): descmd = self._fs._read_file_md(mdf) - if descmd.get('_children'): - self.descendents.extend(descmd.get('_children', {}).values()) + if descmd.get('__children'): + self.descendents.extend(descmd.get('__children', {}).values()) return desc raise StopIteration() @@ -556,11 +556,11 @@ def _import_file(self, fmd: Mapping, filepath: str=None, id: str=None, astype=No out['@type'] = [DATAFILE_TYPE, DOWNLOADABLEFILE_TYPE] # if self.is_collection(fmd) and 'has_member' in fmd: - # # convert 'has_member' to '_children' - # out['_children'] = OrderedDict() + # # convert 'has_member' to '__children' + # out['__children'] = OrderedDict() # for child in fmd['has_member']: # if '@id' in child and 'filepath' in child: - # out['_children'][self._basename(child['filepath'])] = child['@id'] + # out['__children'][self._basename(child['filepath'])] = child['@id'] return out def set_file_at(self, md, filepath: str=None, id=None, as_coll: bool=None) -> str: @@ -618,7 +618,7 @@ def set_file_at(self, md, filepath: str=None, id=None, as_coll: bool=None) -> st deldestfile = False if destfile and \ (destfile['@id'] != md['@id'] or self.is_collection(destfile) != self.is_collection(md)): - if destfile.get('_children'): + if destfile.get('__children'): # destination is a non-empty collection: won't clobber it raise CollectionRemovalDissallowed(destfile['filepath'], "collection is not empty") deldestfile = True @@ -626,9 +626,9 @@ def set_file_at(self, md, filepath: str=None, id=None, as_coll: bool=None) -> st if oldfile: if self.is_collection(oldfile) and self.is_collection(md): # updating a collection; preserve its contents - md['_children'] = oldfile.get('_children') - if md['_children'] is None: - md['_children'] = OrderedDict() + md['__children'] = oldfile.get('__children') + if md['__children'] is None: + md['__children'] = OrderedDict() if filepath != oldfile.get('filepath'): # this is a file move; deregister it from its old parent @@ -654,9 +654,9 @@ def _register_with_parent(self, filepath, id): name = self._basename(filepath) if not self.is_collection(parent): raise ObjectNotFound(parent, message=self._dirname(filepath)+": Not a subcollection") - if '_children' not in parent: - parent['_children'] = OrderedDict() - parent['_children'][name] = id + if '__children' not in parent: + parent['__children'] = OrderedDict() + parent['__children'][name] = id self._cache_file_md(parent) else: @@ -668,8 +668,8 @@ def _deregister_from_parent(self, filepath): try: parent = self._get_file_by_path(self._dirname(filepath)) name = self._basename(filepath) - if name in parent.get('_children',{}): - del parent['_children'][name] + if name in parent.get('__children',{}): + del parent['__children'][name] self._cache_file_md(parent) except ObjectNotFound: pass @@ -721,8 +721,8 @@ def load_file_components(self, cmps): # Go through a last time to set the subcollection content info into each subcollection component for cmp in subcolls: if cmp.get('filepath') in children: - if '_children' not in cmp: - cmp['_children'] = OrderedDict() + if '__children' not in cmp: + cmp['__children'] = OrderedDict() # base subcollection contents first on 'has_member' list as this captures order info if cmp.get('has_member'): @@ -730,12 +730,12 @@ def load_file_components(self, cmps): cmp['has_member'] = [cmp['has_member']] for child in cmp['has_member']: if child.get('@id') in saved and child.get('name'): - cmp['_children'][child['name']] = child.get('@id') + cmp['__children'][child['name']] = child.get('@id') # capture any that got missed by 'has_member' for child in children[cmp['filepath']]: - if child[0] not in cmp['_children']: - cmp['_children'][child[0]] = child[1] + if child[0] not in cmp['__children']: + cmp['__children'][child[0]] = child[1] self.set_file_at(cmp) @@ -778,9 +778,9 @@ def set_order_in_subcoll(self, collpath: str, ids: Iterable[str]) -> Iterable[st coll = self._get_file_by_path(collpath) if not self.is_collection(coll): raise ObjectNotFound(collpath, message=collpath+": not a subcollection component") - if '_children' not in coll: - coll['_children'] = OrderedDict() - children = coll['_children'] + if '__children' not in coll: + coll['__children'] = OrderedDict() + children = coll['__children'] # create an inverted child map byid = OrderedDict( [(itm[1], itm[0]) for itm in children.items()] ) diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index fa64e5b..1d2790e 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -273,7 +273,7 @@ def _load_from(self, cmps: [Mapping]): for cmp in subcolls: if cmp.get('filepath') in children: if '_children' not in cmp: - cmp['_children'] = OrderedDict() + cmp['__children'] = OrderedDict() # base subcollection contents first on 'has_member' list as this captures order info if cmp.get('has_member'): @@ -281,12 +281,12 @@ def _load_from(self, cmps: [Mapping]): cmp['has_member'] = [cmp['has_member']] for child in cmp['has_member']: if child.get('@id') in self._files and child.get('name'): - cmp['_children'][child['name']] = child.get('@id') + cmp['__children'][child['name']] = child.get('@id') # capture any that got missed by 'has_member' for child in children[cmp['filepath']]: - if child[0] not in cmp['_children']: - cmp['_children'][child[0]] = child[1] + if child[0] not in cmp['__children']: + cmp['__children'][child[0]] = child[1] def get_file_by_id(self, id: str) -> Mapping: @@ -300,14 +300,14 @@ def _get_file_by_id(self, id: str) -> Mapping: def get_file_by_path(self, path: str) -> Mapping: if not path: - raise ValueError("get_file__path(): No path specified") + raise ValueError("get_file_path(): No path specified") return self._export_file(self._get_file_by_path(path)) def _export_file(self, fmd): - out = OrderedDict([copy.deepcopy(m) for m in fmd.items() if not m[0].startswith("_")]) + out = OrderedDict([copy.deepcopy(m) for m in fmd.items() if not m[0].startswith("__")]) if self.is_collection(out): out['has_member'] = [OrderedDict([('@id', m[1]), ('name', m[0])]) - for m in fmd.get("_children",{}).items()] + for m in fmd.get("__children",{}).items()] return out def _get_file_by_path(self, path: str) -> Mapping: @@ -323,7 +323,7 @@ def _get_file_by_relpath(self, children: Mapping, steps: [str], origpath): if not self.is_collection(child): raise ObjectNotFound(origpath) - return self._get_file_by_relpath(child.get('_children',{}), steps, origpath) + return self._get_file_by_relpath(child.get('__children',{}), steps, origpath) @property def ids(self): @@ -350,8 +350,8 @@ def __iter__(self): def __next__(self): if self.descendents: desc = self._fs._get_file_by_id(self.descendents.pop(0)) - if desc.get('_children'): - self.descendents.extend(desc.get('_children', {}).values()) + if desc.get('__children'): + self.descendents.extend(desc.get('__children', {}).values()) return desc raise StopIteration() @@ -363,7 +363,7 @@ def get_ids_in_subcoll(self, collpath: str) -> [str]: except ObjectNotFound: return [] else: - children = coll.get('_children', []) + children = coll.get('__children', []) return list(children.values()) @@ -385,9 +385,9 @@ def set_order_in_subcoll(self, collpath: str, ids: Iterable[str]) -> Iterable[st coll = self._get_file_by_path(collpath) if not self.is_collection(coll): raise ObjectNotFound(collpath, message=collpath+": not a subcollection component") - if '_children' not in coll: - coll['_children'] = OrderedDict() - children = coll['_children'] + if '__children' not in coll: + coll['__children'] = OrderedDict() + children = coll['__children'] # create an inverted child map byid = OrderedDict( [(itm[1], itm[0]) for itm in children.items()] ) @@ -423,8 +423,8 @@ def _deregister_from_parent(self, filepath): try: parent = self._get_file_by_path(self._dirname(filepath)) name = self._basename(filepath) - if name in parent.get('_children',{}): - del parent['_children'][name] + if name in parent.get('__children',{}): + del parent['__children'][name] except ObjectNotFound: pass else: @@ -439,9 +439,9 @@ def _register_with_parent(self, filepath, id): name = self._basename(filepath) if not self.is_collection(parent): raise ObjectNotFound(parent, message=self._dirname(filepath)+": Not a subcollection") - if '_children' not in parent: - parent['_children'] = OrderedDict() - children = parent['_children'] + if '__children' not in parent: + parent['__children'] = OrderedDict() + children = parent['__children'] children[name] = id @@ -510,7 +510,7 @@ def set_file_at(self, md, filepath: str=None, id=None, as_coll: bool=None) -> st deldestfile = False if destfile and self.is_collection(destfile) and \ (destfile['@id'] != md['@id'] or not self.is_collection(md)): - if destfile.get('_children'): + if destfile.get('__children'): # destination is a non-empty collection: don't clobber collections raise CollectionRemovalDissallowed(destfile['filepath'], "collection is not empty") deldestfile = True @@ -518,9 +518,9 @@ def set_file_at(self, md, filepath: str=None, id=None, as_coll: bool=None) -> st if oldfile: if self.is_collection(oldfile) and self.is_collection(md): # updating a collection; preserve its contents - md['_children'] = oldfile.get('_children') - if md['_children'] is None: - md['_children'] = OrderedDict() + md['__children'] = oldfile.get('__children') + if md['__children'] is None: + md['__children'] = OrderedDict() if filepath != oldfile.get('filepath'): # this is a file move; deregister it from its old parent @@ -586,11 +586,11 @@ def _import_file(self, fmd: Mapping, filepath: str=None, id: str=None, astype=No out['@type'] = [DATAFILE_TYPE, DOWNLOADABLEFILE_TYPE] # if self.is_collection(fmd) and 'has_member' in fmd: - # # convert 'has_member' to '_children' - # out['_children'] = OrderedDict() + # # convert 'has_member' to '__children' + # out['__children'] = OrderedDict() # for child in fmd['has_member']: # if '@id' in child and 'filepath' in child: - # out['_children'][self._basename(child['filepath'])] = child['@id'] + # out['__children'][self._basename(child['filepath'])] = child['@id'] return out class InMemoryResource(NERDResource): diff --git a/python/tests/nistoar/midas/dap/__init__.py b/python/tests/nistoar/midas/dap/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/nistoar/midas/dap/nerdstore/__init__.py b/python/tests/nistoar/midas/dap/nerdstore/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py index 695b137..3282814 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py @@ -569,7 +569,7 @@ def test_get_file(self): self.assertEqual(f['@id'], "file_2") self.assertEqual(f['filepath'], "trial3") self.assertNotIn("downloadURL", f) - self.assertNotIn("_children", f) + self.assertNotIn("__children", f) self.assertTrue(isinstance(f['has_member'], list)) self.assertTrue(len(f['has_member']), 1) self.assertEqual(f['has_member'][0], {"@id": "file_3", "name": "trial3a.json"}) @@ -586,7 +586,7 @@ def test_get_file(self): self.assertEqual(f['@id'], "file_2") self.assertEqual(f['filepath'], "trial3") self.assertNotIn("downloadURL", f) - self.assertNotIn("_children", f) + self.assertNotIn("__children", f) self.assertTrue(isinstance(f['has_member'], list)) self.assertTrue(len(f['has_member']), 1) self.assertEqual(f['has_member'][0], {"@id": "file_3", "name": "trial3a.json"}) diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py index de8dc96..d3bf0fb 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_inmem.py @@ -57,7 +57,7 @@ def test_load_data(self): res = inmem.InMemoryResource("pdr0:0001", nerd) self.assertEqual(res.id, "pdr0:0001") data = res.get_res_data() - self.assertEqual(data.get('@id'), "pdr0:0001") +# self.assertEqual(data.get('@id'), "pdr0:0001") # does not need to be the same self.assertEqual(data.get('title'), nerd['title']) self.assertEqual(data.get('contactPoint'), nerd['contactPoint']) @@ -101,7 +101,8 @@ def test_replace_res_data(self): res.replace_res_data(md) data = res.get_data() self.assertEqual(res.id, "pdr0:0001") - self.assertEqual(data.get('@id'), "pdr0:0001") +# self.assertEqual(data.get('@id'), "pdr0:0001") # does not need to be the same + self.assertEqual(data.get('@id'), "Whahoo!") self.assertEqual(data.get('title'), "The Replacements") self.assertEqual(data.get('contactPoint'), [{"comment": "this is not real contact info"}]) self.assertEqual(data.get('color'), "green") @@ -113,7 +114,8 @@ def test_replace_res_data(self): res.replace_res_data(nerd) data = res.get_data() self.assertEqual(res.id, "pdr0:0001") - self.assertEqual(data.get('@id'), "pdr0:0001") +# self.assertEqual(data.get('@id'), "pdr0:0001") # does not need to be the same + self.assertEqual(data.get('@id'), "ark:/88434/pdr02p1s") self.assertTrue(data.get('title').startswith('OptSortSph: ')) self.assertEqual(data.get('contactPoint').get("fn"), "Zachary Levine") self.assertEqual(data.get('doi'), "doi:10.18434/T4SW26") @@ -126,7 +128,8 @@ def test_replace_res_data(self): res.replace_res_data(md) data = res.get_data() self.assertEqual(res.id, "pdr0:0001") - self.assertEqual(data.get('@id'), "pdr0:0001") +# self.assertEqual(data.get('@id'), "pdr0:0001") + self.assertEqual(data.get('@id'), "Whahoo!") self.assertEqual(data.get('title'), "The Replacements") self.assertEqual(data.get('contactPoint'), [{"comment": "this is not real contact info"}]) self.assertEqual(data.get('color'), "green") @@ -139,7 +142,8 @@ def test_replace_res_data(self): res.replace_res_data(md) # data = res.get_data() self.assertEqual(res.id, "pdr0:0001") - self.assertEqual(data.get('@id'), "pdr0:0001") +# self.assertEqual(data.get('@id'), "pdr0:0001") + self.assertEqual(data.get('@id'), "Whahoo!") self.assertEqual(data.get('title'), "The Replacements") self.assertEqual(data.get('contactPoint'), [{"comment": "this is not real contact info"}]) self.assertEqual(data.get('color'), "green") @@ -167,8 +171,8 @@ def test_load_from(self): coll = files._files[list(files._children.values())[-1]] self.assertTrue(coll.get('@id').startswith("coll_")) - self.assertIn('_children', coll) - self.assertEqual(list(coll['_children'].keys()), ["trial3a.json"]) + self.assertIn('__children', coll) + self.assertEqual(list(coll['__children'].keys()), ["trial3a.json"]) self.assertEqual(files._ididx, 4) files.empty() @@ -215,7 +219,7 @@ def test_get_file(self): self.assertEqual(f['@id'], "coll_2") self.assertEqual(f['filepath'], "trial3") self.assertNotIn("downloadURL", f) - self.assertNotIn("_children", f) + self.assertNotIn("__children", f) self.assertTrue(isinstance(f['has_member'], list)) self.assertTrue(len(f['has_member']), 1) self.assertEqual(f['has_member'][0], {"@id": "file_3", "name": "trial3a.json"}) @@ -232,7 +236,7 @@ def test_get_file(self): self.assertEqual(f['@id'], "coll_2") self.assertEqual(f['filepath'], "trial3") self.assertNotIn("downloadURL", f) - self.assertNotIn("_children", f) + self.assertNotIn("__children", f) self.assertTrue(isinstance(f['has_member'], list)) self.assertTrue(len(f['has_member']), 1) self.assertEqual(f['has_member'][0], {"@id": "file_3", "name": "trial3a.json"}) From 7e947d03ff59025ae804629568fbcc11c9f433e5 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 11 Feb 2023 14:19:00 -0500 Subject: [PATCH 047/123] finish md3's DAPService --- python/nistoar/midas/dap/nerdstore/base.py | 22 +- python/nistoar/midas/dap/nerdstore/inmem.py | 8 +- python/nistoar/midas/dap/service/mds3.py | 861 +++++++++++++----- python/nistoar/midas/dbio/__init__.py | 2 +- python/nistoar/midas/dbio/project.py | 6 +- .../nistoar/midas/dap/service/test_mds3.py | 468 +++++++++- .../nistoar/pdr/publish/data/ncnrexp0.json | 2 +- 7 files changed, 1134 insertions(+), 235 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py index 7fece13..e0fd111 100644 --- a/python/nistoar/midas/dap/nerdstore/base.py +++ b/python/nistoar/midas/dap/nerdstore/base.py @@ -1,10 +1,10 @@ """ Abstract base classes providing the interface to metadata storage. """ -import logging +import logging, re from abc import ABC, ABCMeta, abstractproperty, abstractmethod from collections.abc import MutableMapping, Mapping, MutableSequence -from typing import Iterable, Iterator, NewType +from typing import Iterable, Iterator, NewType, List from logging import Logger import nistoar.nerdm.utils as nerdmutils @@ -319,6 +319,17 @@ def append(self, md: Mapping) -> str: """ return self.insert(self.count, md) + def replace_all_with(self, md: List[Mapping]): + """ + replace the current list of items with the given list. The currently saved items will + first be removed, and then the given items will be added in order. + """ + if not isinstance(md, list): + raise TypeError("replace_all_with(): md is not a list") + self.empty() + for item in md: + self.append(item) + def pop(self, key): """ remove and return an item from the list. This method, along with :py:method:`insert` or @@ -682,6 +693,13 @@ def delete_file(self, id: str) -> bool: """ raise NotImplementedError() + @abstractmethod + def empty(self): + """ + remove all files and folders from this collection of file components + """ + raise NotImplementedError() + @abstractmethod def exists(self, id: str) -> bool: """ diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index 1d2790e..420e428 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -676,15 +676,15 @@ def get_data(self, inclfiles=True) -> Mapping: return None if self._auths.count > 0: - out['authors'] = self._auths.data() + out['authors'] = self._auths.get_data() if self._refs.count > 0: - out['references'] = self._auths.data() + out['references'] = self._refs.get_data() if self._nonfiles.count > 0 or self._files.count > 0: out['components'] = [] if self._nonfiles.count > 0: - out['components'].extend(self._nonfiles.data()) + out['components'].extend(self._nonfiles.get_data()) if self._files.count > 0: - out['components'].extend(self._files.data()) + out['components'].extend(self._files.get_files()) return out class InMemoryResourceStorage(NERDResourceStorage): diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 0c189d2..5d616f4 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -5,14 +5,16 @@ Support for the web service frontend is provided as a WSGI :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` implementation. """ -import os +import os, re from logging import Logger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence, Callable from typing import List +from copy import deepcopy from ...dbio import (DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ACLs, - InvalidUpdate, ProjectService, ProjectServiceFactory, DAP_PROJECTS) + InvalidUpdate, ObjectNotFound, PartNotAccessible, + ProjectService, ProjectServiceFactory, DAP_PROJECTS) from ...dbio.wsgi.project import MIDASProjectApp from nistoar.base.config import ConfigurationException, merge_config from nistoar.nerdm import constants as nerdconst, utils as nerdutils @@ -306,10 +308,66 @@ def _new_metadata_for(self, shoulder=None): ("creatorisContact", True) ]) - def replace_data(self, id, newdata, part=None, prec=None, nerd=None): + def get_nerdm_data(self, id: str, part: str=None): + """ + return the full NERDm metadata. This differs from the :py:method:`get_data` method which (in + this implementation) only returns a summary of hte NERDm metadata. + :param str id: the identifier for the record whose NERDm data should be returned. + :param str part: a path to the part of the record that should be returned + """ + prec = self.dbcli.get_record_for(id, ACLs.READ) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(prec.id) + + if not part: + out = nerd.get_data() + + else: + m = re.search(r'^([a-z]+s)\[([\w\d]+)\]$', path) + if m: + # path is of the form xxx[k] and refers to an item in a list + key = m.group(3) + try: + key = int(key) + except ValueError: + pass + + if m.group(1) == "authors": + out = nerd.authors.get(key) + elif m.group(1) == "references": + out = nerd.reference.get(key) + elif m.group(1) == "components": + out = None + try: + out = nerd.nonfiles.get(key) + except (KeyError, IndexError) as ex: + pass + if not out: + try: + out = nerd.files.get_file_by_id(key) + except ObjectNotFound as ex: + pass + if not out: + out = nerd.files.get_file_by_path(key) + + elif part == "authors": + out = nerd.authors.get_data() + elif part == "authors": + out = nerd.references.get_data() + elif part == "components": + out = nerd.nonfiles.get_data() + nerd.files.get_data() + else: + out = nerd.get_res_data() + if part in out: + out = out[part] + else: + raise PartNotAccessible(prec.id, path, "Accessing %s not supported" % path) + + return out + + def replace_data(self, id, newdata, part=None): """ Replace the currently stored data content of a record with the given data. It is expected that - the new data will be filtered/cleansed via an internal call to :py:method:`dress_data`. + the new data will be filtered/cleansed via an internal call to :py:method:`moderate_data`. :param str id: the identifier for the record whose data should be updated. :param str newdata: the data to save as the new content. :param stt part: the slash-delimited pointer to an internal data property. If provided, @@ -325,9 +383,9 @@ def replace_data(self, id, newdata, part=None, prec=None, nerd=None): :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or would otherwise result in invalid data content. """ - return self._update_data(id, ndwdata, part, prec, nerd, True) + return self._update_data(id, newdata, part, replace=True) - def update_data(self, id, newdata, part=None, prec=None, nerd=None): + def update_data(self, id, newdata, part=None): """ merge the given data into the currently save data content for the record with the given identifier. :param str id: the identifier for the record whose data should be updated. @@ -345,9 +403,9 @@ def update_data(self, id, newdata, part=None, prec=None, nerd=None): :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or would otherwise result in invalid data content. """ - return self._update_data(id, newdata, part, prec, nerd, False) + return self._update_data(id, newdata, part, replace=False) - def clear_data(self, id, part=None, prec=None): + def clear_data(self, id, part=None, _prec=None): """ remove the stored data content of the record and reset it to its defaults. :param str id: the identifier for the record whose data should be cleared. @@ -362,12 +420,12 @@ def clear_data(self, id, part=None, prec=None): given by `id`. :raises PartNotAccessible: if clearing of the part of the data specified by `part` is not allowed. """ - if not prec: - prec = self.dbcli.get_record_for(id, ACLs.WROTE) # may raise ObjectNotFound/NotAuthorized + if not _prec: + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not self._store.exists(id): - self.log.warning("NERDm data for id=%s not found in metadata store", prec.id) - nerd = self._new_data_for(prec.id, prec.meta) + self.log.warning("NERDm data for id=%s not found in metadata store", _prec.id) + nerd = self._new_data_for(_prec.id, _prec.meta) self._store.load_from(nerd) nerd = self._store.open(id) @@ -384,19 +442,19 @@ def clear_data(self, id, part=None, prec=None): del resmd[part] nerd.replace_res_data(resmd) else: - raise PartNotAccessible(prec.id, path, "Clearing %s not allowed" % path) + raise PartNotAccessible(_prec.id, path, "Clearing %s not allowed" % path) else: nerd.authors.empty() nerd.references.empty() nerd.files.empty() nerd.nonfiles.empty() - nerd.replace_res_data(self._new_data_for(prec.id, prec.meta)) + nerd.replace_res_data(self._new_data_for(_prec.id, prec.meta)) def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=False): if not prec: - prec = self.dbcli.get_record_for(id, ACLs.WROTE) # may raise ObjectNotFound/NotAuthorized + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not nerd: if not self._store.exists(id): @@ -419,7 +477,7 @@ def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=Fal else: # replacing just a part of the data try: - data = self._update_part_nerd(prec, nerd, part, newdata, replace) + data = self._update_part_nerd(part, prec, nerd, newdata, replace) except InvalidUpdate as ex: ex.record_id = prec.id ex.record_part = part @@ -445,38 +503,46 @@ def _summarize(self, nerd: NERDResource): out["reference_count"] = nerd.references.count return out - _handsoff = ("@id @context publisher issued firstIssued revised annotated " + \ - "bureauCode programCode systemOfRecords primaryITInvestmentUII " + \ + _handsoff = ("@id @context publisher issued firstIssued revised annotated version " + \ + "bureauCode programCode systemOfRecords primaryITInvestmentUII " + \ "doi ediid releaseHistory status theme").split() def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mapping, replace=False): # filter out properties that the user is not allow to update newdata = OrderedDict() for prop in data: - if not prop.startswith("_") and prop not in self._handsoff: + if not prop.startswith("__") and prop not in self._handsoff: newdata[prop] = data[prop] errors = [] authors = newdata.get('authors') if authors: del newdata['authors'] - authors = self._moderate_authors(authors, nerd, replace) + try: + authors = self._merge_objlist_for_update(nerd.authors, self._moderate_author, + authors, replace, True) + except InvalidUpdate as ex: + errors.extend(ex.errors) + refs = newdata.get('references') if refs: del newdata['references'] - refs = self._moderate_references(refs, nerd, replace) + try: + refs = self._merge_objlist_for_update(nerd.references, self._moderate_reference, + refs, replace, True) + except InvalidUpdate as ex: + errors.extend(ex.errors) comps = newdata.get('components') files = [] nonfiles = [] if comps: del newdata['components'] - for cmp in comps: - if 'filepath' in cmp: - files.append(self._moderate_file(cmp)) - else: - nonfiles.append(self._moderate_nonfile(cmp)) - comps = nonfiles + files + try: + files, nonfiles = self._merge_comps_for_update(nerd, comps, replace, True) + comps = nonfiles + files + except InvalidUpdate as ex: + errors.extend(ex.errors) # handle resource-level data: merge the new data into the old and validate the result if replace: @@ -485,86 +551,114 @@ def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mappin oldresdata = nerd.get_data(False) # merge and validate the resource-level data - newdata = self._moderate_res_data(newdata, oldresdata, nerd, replace) # may raise InvalidUpdate + try: + newdata = self._moderate_res_data(newdata, oldresdata, nerd, replace) + except InvalidUpdate as ex: + errors = ex.errors + errors + + if len(errors) > 1: + raise InvalidUpdate("Input metadata data would create invalid record (%d errors detected)" + % len(errors), prec.id, errors=errors) + elif len(errors) == 1: + raise InvalidUpdate("Input validation error: "+errors[0], prec.id, errors=errors) # all data is merged and validated; now commit nerd.replace_res_data(newdata) - if authors: - self._update_part_nerd("authors", prec, nerd, authors, replace, doval=False) - if refs: - self._update_part_nerd("references", prec, nerd, refs, replace, doval=False) - if comps: - self._update_part_nerd("components", prec, nerd, comps, replace, doval=False) - - return nerd.get_data(True) - - -################# - - - def validate_json(self, json, schemauri=None): - """ - validate the given JSON data record against the give schema, raising an exception if it - is not valid. - - :param dict json: the (parsed) JSON data to validate - :param str schemauri: the JSONSchema URI to validate the input against. - :raises InvalidUpdate: if the data is found to be invalid against the schema; the exception's - ``errors`` property will list all the errors found. - """ - errors = [] - if self._valid8r: - if not schemauri: - schemauri = json.get("_schema") - if not schemauri: - raise ValueError("validate_json(): No schema URI specified for input data") - errors = self._valid8r.validate(json, schemauri=schemauri, strict=True, raiseex=False) + if replace: + nerd.authors.empty() + if authors: + nerd.authors.replace_all_with(authors) + nerd.references.empty() + if refs: + nerd.references.replace_all_with(refs) + nerd.nonfiles.empty() + if nonfiles: + nerd.nonfiles.replace_all_with(nonfiles) else: - self.log.warning("Unable to validate submitted NERDm data") + def put_listitem_into(item, objlist): + if item.get("@id"): + objlist.set(item.get("@id"), item) + else: + objlist.append(item) + def put_each_into(data, objlist): + for item in data: + put_listitem_into(item, objlist) + + if authors: + put_each_into(authors, nerd.authors) + if refs: + put_each_into(refs, nerd.references) + if nonfiles: + put_each_into(nonfiles, nerd.nonfiles) - if len(errors) > 0: - raise InvalidUpdate("NERDm Schema validation errors found", errors=errors, sys=self) + if replace: + nerd.files.empty() + for fmd in files: + nerd.files.set_file_at(fmd) + return nerd.get_data(True) def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data: Mapping, replace=False, doval=True): + schemabase = prec.data.get("_schema") or NERDMPUB_SCH_ID - m = re.search(r'^([a-z]+s)\[([\w\d]+)\]$', path) + m = re.search(r'^([a-z]+s)\[([\w\d\.\/]+)\]$', path) if m: # path is of the form xxx[k] and refers to an item in a list - key = m.group(3) + key = m.group(2) try: key = int(key) except ValueError: pass - + if m.group(1) == "authors": - self._update_author(prec, nerd, data, replace, doval=doval) + data["_schema"] = schemabase+"/definitions/Person" + data = self._update_listitem(nerd.authors, self._moderate_author, data, key, replace, doval) elif m.group(1) == "references": data["_schema"] = schemabase+"/definitions/BibliographicReference" - self._update_reference(prec, nerd, data, replace, doval=doval) + data = self._update_listitem(nerd.references, self._moderate_reference, data, key, + replace, doval) elif m.group(1) == "components": data["_schema"] = schemabase+"/definitions/Component" - self._update_component(prec, nerd, data, replace, doval=doval) + data = self._update_component(nerd, data, key, replace, doval=doval) else: raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) elif path == "authors": + if not isinstance(data, list): + err = "authors data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) if replace: - self._replace_authors(prec, nerd, data, doval=doval) + data = self._replace_objlist(nerd.authors, self._moderate_author, data, doval) else: - self._update_authors(prec, nerd, data, doval=doval) + data = self._update_objlist(nerd.authors, self._moderate_author, data, doval) + elif path == "references": + if not isinstance(data, list): + err = "references data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) if replace: - self._replace_references(prec, nerd, data, doval=doval) + data = self._replace_objlist(nerd.references, self._moderate_reference, data, doval) else: - self._update_references(prec, nerd, data, doval=doval) + data = self._update_objlist(nerd.references, self._moderate_reference, data, doval) + elif path == "components": + if not isinstance(data, list): + err = "components data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + files, nonfiles = self._merge_comps_for_update(nerd, data, replace, doval) if replace: - self._replace_components(prec, nerd, data, doval=doval) - else: - self._update_components(prec, nerd, data, doval=doval) + nerd.nonfiles.empty() + nerd.files.empty() + for cmp in nonfiles: + if cmp.get("@id"): + nerd.nonfiles.set(cmp['@id']) + else: + nerd.nonfiles.append(cmp) + for cmp in files: + nerd.files.set_file_at(cmp) + data = nerd.nonfiles.get_data() + nerd.files.get_files() elif path == "contactPoint": if not isinstance(data, Mapping): @@ -573,6 +667,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) # may raise InvalidUpdate nerd.replace_res_data(res) + data = res[path] elif path == "@type": if not isinstance(data, (list, str)): @@ -580,6 +675,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) nerd.replace_res_data(res) + data = res[path] elif path == "description": if not isinstance(data, (list, str)): @@ -587,6 +683,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate nerd.replace_res_data(res) + data = res[path] elif path in "title rights disclaimer".split(): if not isinstance(data, str): @@ -594,10 +691,440 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate nerd.replace_res_data(res) + data = res[path] else: raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + + return data + + + def set_file_component(self, id, filemd, filepath=None): + """ + add a file to the specified dataset as described by the given metadata. If the dataset + already has a file with the specified filepath, it will be replaced. + :param str id: the identifier for the dataset to add the file to + :param dict filemd: the NERDm file metadata describing the new file to add. If + the "@id" property is set, it will be ignored. + :param str filepath: the path within the dataset to assign to the file. If provided, + it will override the corresponding value in `filemd`; if not + provided, the filepath must be set within `filemd`. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + if filepath: + data = deepcopy(data) + data['filepath'] = filepath + else: + filepath = filemd.get("filepath") + if not filepath: + raise InvalidUpdate("filepath not set in the given file description to be added to " + id) + nerd = self._store.open(id) + + oldfile = None + try: + oldfile = nerd.files.get_file_by_path(filepath) # it must have na id + except ObjectNotFound as ex: + pass + + return self._update_file_comp(nerd, data, oldfile, replace=True, doval=True) + + def update_file_component_at(self, id: str, filemd: Mapping, filepath: str=None): + """ + Update the metadata for a file component at a particular filepath. The given metadata will + be merged with that of the existing file. If a file is not currently registered at + that filepath, an exception is raised. + :param str id: the identifier for the dataset containing the file + :param dict filemd: the file metadata to update + :param str filepath: the path of the file within the dataset to update + :raises ObjectNotFound: if there does not exist a file at the given filepath + :raises ValueError: if filepath is not set in either the `filepath` argument or the + filepath property. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + if not filepath: + filepath = filemd.get("filepath") + if not filepath: + raise InvalidUpdate("filepath not set in the given file description to be added to " + id) + if filemd.get("filepath") != filepath: + filemd = deepcopy(filemd) + filemd['filepath'] = filepath + + nerd = self._store.open(id) + oldfile = nerd.files.get_file_by_path(filepath) # may raise ObjectNotFound + + return self._update_file_comp(nerd, filemd, oldfile, replace=False, doval=True) + + def update_file_component(self, id: str, filemd: Mapping, fileid: str=None): + """ + Update the metadata for a file component at a particular filepath. The given metadata will + be merged with that of the existing file. If a file is not currently registered at + that filepath, an exception is raised. + :param str id: the identifier for the dataset containing the file + :param dict filemd: the file metadata to update + :param str fileid: the id of the file within the dataset to update + :raises ObjectNotFound: if there does not exist a resource with the given id + :raises ValueError: if id is not set in either the `fileid` argument or the `filemd` object's + `@id` property. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + if not fileid: + fileid = filemd.get("@id") + if not fileid: + raise InvalidUpdate("file-id not set in the given file description to be added to " + id) + if filemd.get("@id") != fileid: + filemd = deepcopy(filemd) + filemd['@id'] = fileid + + nerd = self._store.open(id) + oldfile = nerd.files.get_file_by_path(filepath) # may raise ObjectNotFound + + return self._update_file_comp(nerd, filemd, oldfile, replace=False, doval=True) + + def replace_files(self, id: str, files: List[Mapping]): + """ + replace all currently saved files and folder components with the given list. Each component + must include a `filepath` property. + :param str id: the identifier for the dataset containing the file + :raises ObjectNotFound: if there does not exist a resource with the given id + """ + if not isinstance(files, (list, tuple)): + err = "components data is not a list" + raise InvalidUpdate(err, id, "components", errors=[err]) + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + errors = [] + newfiles = [] + nbad = 0 + for cmp in files: + try: + files[i] = self._moderate_file(cmp, True) + except InvalidUpdate as ex: + nbad += 1 + errors.extend(ex.errors) + + if errors: + raise InvalidUpdate("%s: %d files in given list produced validation errors" % (id, nbad), + errors=ex.errors) + + newfiles.sort(key=lambda cmp: cmp.get("filepath")) + nerd.files.empty() + for cmp in newfiles: + nerd.files.set_file_at(cmp) + + + def _update_file_comp(self, nerd: NERDResource, md: Mapping, oldmd: Mapping = None, + replace: bool=False, doval: bool=False): + if oldmd and not replace: + md = self._merge_into(md, oldmd) + + md = self._moderate_file(md, doval=doval) # may raise InvalidUpdate + + id = nerd.files.set_file_at(md, md['filepath'], md.get('@id')) + if not md.get('@id'): + md['@id'] = id + return md + + def add_nonfile_component(self, id: str, cmpmd: Mapping): + """ + add a new non-file component to the specified dataset as described by the given metadata. + :param str id: the identifier for the dataset to add a new component to + :param dict cmpmd: the NERDm component metadata describing the new component to add. If + the "@id" property is set, it will be ignored. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._add_listitem(nerd.nonfiles, self._moderate_nonfile, cmpmd, doval=True) + + def update_nonfile_component(self, id: str, cmpmd: Mapping, idorpos=None, replace=False): + """ + update the metadata for a non-file component in the specified dataset as identified either + by its ID or position in the list of non-file components. If identified component does not + exist, an exception is raised. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._update_listitem(nerd.nonfiles, self._moderate_nonfile, cmpmd, idorpos, replace, True) + + def replace_nonfile_components(self, id: str, cmps: List[Mapping]): + """ + replace all currently saved non-file components with the given list. The order of given list + will be the order in which they are saved. + """ + if not isinstance(cmps, (list, tuple)): + err = "components data is not a list" + raise InvalidUpdate(err, id, "components", errors=[err]) + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + self._replace_objlist(nerd.nonfiles, self._moderate_nonfile, cmps, True) + + def add_author(self, id: str, authmd: Mapping): + """ + add a new author to the specified dataset as described by the given metadata. + :param str id: the identifier for the dataset to add a new author to + :param dict authmd: the NERDm Person metadata describing the new author to add. If + the "@id" property is set, it will be ignored. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._add_listitem(nerd.authors, self._moderate_author, authmd, doval=True) + + def update_author(self, id: str, authmd: Mapping, idorpos=None, replace=False): + """ + update the metadata for an author in the specified dataset as identified either + by its ID or position in the list of authors. If identified author does not + exist, an exception is raised. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._update_listitem(nerd.authors, self._moderate_author, authmd, idorpos, replace, True) + + def replace_authors(self, id: str, authors: List[Mapping]): + """ + replace all currently saved authors with the given list. The order of given list will be + the order in which they are saved. + """ + if not isinstance(authors, (list, tuple)): + err = "authors data is not a list" + raise InvalidUpdate(err, id, "authors", errors=[err]) + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + self._replace_objlist(nerd.authors, self._moderate_author, authors, True) + + def add_reference(self, id: str, refmd: Mapping): + """ + add a new author to the specified dataset as described by the given metadata. + :param str id: the identifier for the dataset to add a new reference to + :param dict authmd: the NERDm Reference metadata describing the new reference to add. If + the "@id" property is set, it will be ignored. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._add_listitem(nerd.references, self._moderate_reference, refmd, doval=True) + + def update_reference(self, id: str, refmd: Mapping, idorpos=None, replace=False): + """ + update the metadata for a references in the specified dataset as identified either + by its ID or position in the list of references. If identified reference does not + exist, an exception is raised. + """ + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + + return self._update_listitem(nerd.references, self._moderate_reference, refmd, idorpos, replace, True) + + def replace_references(self, id: str, refs: List[Mapping]): + """ + replace all currently saved references with the given list. The order of given list will be + the order in which they are saved. + """ + if not isinstance(refs, (list, tuple)): + err = "references data is not a list" + raise InvalidUpdate(err, id, "references", errors=[err]) + prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + nerd = self._store.open(id) + self._replace_objlist(nerd.references, self._moderate_reference, refs, True) + + def _add_listitem(self, objlist, moderate_func, data: Mapping, doval: bool=False): + data = moderate_func(data) + id = objlist.append(data) + data['@id'] = id + return data + + def _update_listitem(self, objlist, moderate_func, data: Mapping, idorpos=None, + replace: bool=False, doval: bool=False): + try: + olditem = objlist.get(idorpos) + if not replace: + data = self._merge_into(data, olditem) + except IndexError as ex: + raise ObjectNotFound("Item not found at position "+str(ex)) from ex + except KeyError as ex: + raise ObjectNotFound("Item not found with id="+str(ex)) from ex + + data = moderate_func(data, doval=doval) # may raise InvalidUpdate + objlist.set(olditem["@id"], data) + if not data.get("@id"): + data["@id"] = olditem["@id"] + return data + + def _merge_comps_for_update(self, nerd: NERDResource, data: List[Mapping], + replace: bool=False, doval: bool=False): + # the point of this function is to prep the data for update, collecting as many + # validation errors upfront as possible + nonfiles = [] + files = [] + errors = [] + + # collate the components + for cmp in data: + # is it a file or a non-file? + cmplist = None + if cmp.get("@id"): + if cmp['@id'] in nerd.nonfiles.ids: + cmplist = nonfiles + elif cmp['@id'] in nerd.files.ids: + cmplist = files + + if cmplist is None: + if cmp.get('filepath'): + cmplist = files + else: + cmplist = nonfiles + + cmplist.append(cmp) + + try: + nonfiles = self._merge_objlist_for_update(nerd.nonfiles, self._moderate_nonfile, nonfiles, + replace, doval) + except InvalidUpdate as ex: + errors.extend(ex.errors) + + for i, cmp in enumerate(files): + oldcmp = None + if not replace: + if cmp.get("@id") in nerd.files.ids: + oldcmp = nerd.files.get(cmp['@id']) + elif cmp.get("filepath") and nerd.files.exists(cmp["filepath"]): + oldcmp = nerd.files.get(cmp['filepath']) + + if oldcmp: + cmp = self._merge_into(cmp, oldcmp) + elif cmp.get("@id"): + cmp = deepcopy(cmp) + del cmp["@id"] + + try: + files[i] = self._moderate_file(cmp, doval) + except InvalidUpdate as ex: + errors.extend(ex.errors) + + if errors: + raise InvalidUpdate("%d file validation errors detected" % len(ex.errors), + errors=ex.errors) + + files.sort(key=lambda cmp: cmp.get("filepath")) # this places subcollections before their contents + return files, nonfiles + + def _merge_objlist_for_update(self, objlist, moderate_func, data: List[Mapping], + replace: bool=False, doval: bool=False): + # the point of this function is to prep the data for update, collecting as many + # validation errors upfront as possible + def merge_item(item): + olditem = None + if not replace and item.get("@id"): + try: + olditem = objlist.get(item["@id"]) + except KeyError: + pass + + if olditem: + item = self._merge_into(item, olditem) + elif item.get("@id"): + item = deepcopy(item) + del item["@id"] + + return moderate_func(item, doval) + + out = [] + errors = [] + for item in data: + try: + out.append(merge_item(item)) + except InvalidUpdate as ex: + errors.extend(ex.errors) + if errors: + raise InvalidUpdate("%d item validation errors detected" % len(errors), errors=errors) + return out + + def _replace_objlist(self, objlist, moderate_func, data: List[Mapping], doval: bool=False): + data = [ moderate_func(a, doval=doval) for a in data ] # may raise InvalidUpdate + objlist.empty() + for item in data: + objlist.append(item) + + def _update_objlist(self, objlist, moderate_func, data: List[Mapping], doval: bool=False): + # match the items in the given list to existing items currently store by their ids; for each + # match, the item metadata will be merged with the matching metadata. If there is no + # match, the item will be appended. This method attempts to ferret out all errors + # before updating any items + newitems = [] + errors = [] + nbad = 0 + for item in data: + olditem = None + if item.get("@id"): + olditem = objlist.get(item["@id"]) + item = self._merge_into(item, olditem) + try: + item = moderate_func(item, doval=doval) + except InvalidUpdate as ex: + errors.extend(ex.errors) + nbad += 1 + newitems.append(item) + + if errors: + raise InvalidUpdate("%d items contained validation errors" % nbad, errors=errors) + + for item in newitems: + if item.get("@id"): + objlist.set(item["@id"], item) + else: + objlist.append(item) + + ## This is an implementation based on position rather than id + # curcount = len(objlist) + # for i, item in enumerate(data): + # olditem = None + # if i < curcount: + # olditem = objlist.get(i) + # data[i] = self._merge_into(data[i], olditem) + # data[i] = moderate_func(data[i], doval=doval) # may raise InvalidUpdate + # + # for i, item in enumerate(data): + # if i < curcount: + # objlist.set(i, item) + # else: + # objlist.append(item) + + + + + + +################# + + + def validate_json(self, json, schemauri=None): + """ + validate the given JSON data record against the give schema, raising an exception if it + is not valid. + + :param dict json: the (parsed) JSON data to validate + :param str schemauri: the JSONSchema URI to validate the input against. + :raises InvalidUpdate: if the data is found to be invalid against the schema; the exception's + ``errors`` property will list all the errors found. + """ + errors = [] + if self._valid8r: + if not schemauri: + schemauri = json.get("_schema") + if not schemauri: + raise ValueError("validate_json(): No schema URI specified for input data") + errors = self._valid8r.validate(json, schemauri=schemauri, strict=True, raiseex=False) + else: + self.log.warning("Unable to validate submitted NERDm data") + + if len(errors) > 0: + raise InvalidUpdate("NERDm Schema validation errors found", errors=errors, sys=self) + + def _moderate_text(self, val, resmd=None, doval=True): # make sure input value is the right type, is properly encoded, and # does not contain any illegal bits @@ -712,141 +1239,40 @@ def _moderate_contact(self, info, resmd=None, replace=False, doval=True): return info - def _replace_authors(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("authors data is not a list", sys=self) - self._replace_listitems(nerd.authors, self._moderate_author, data) - - def _update_author(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): - if not isinstance(data, Mapping): - raise InvalidUpdate("author data is not an object", sys=self) - self._update_listitem(nerd.authors, self._moderate_author, data, pos, replace) - - def _update_authors(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("authors data is not a list", sys=self) - self._update_objlist(nerd.authors, self._moderate_author, data) - - def _replace_references(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("references data is not a list", sys=self) - self._replace_listitems(nerd.references, self._moderate_reference, data) - - def _update_reference(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): - if not isinstance(data, Mapping): - raise InvalidUpdate("reference data is not an object", sys=self) - self._update_listitem(nerd.references, self._moderate_reference, data, pos, replace) - - def _update_references(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("references data is not a list", sys=self) - self._update_objlist(nerd.references, self._moderate_reference, data) - - def _replace_listitems(self, objlist, moderate_func, data: List[Mapping]): - data = [ moderate_func(a) for a in data ] # may raise InvalidUpdate - objlist.empty() - for item in data: - objlist.append(auth) - - def _update_listitem(self, objlist, moderate_func, data: Mapping, pos: int=None, replace=False): - key = pos - if key is None: - key = data.get("@id") - olditem = None - if key: - try: - olditem = objlist.get(key) - if not replace: - data = self._merge_into(data, olditem) - except (KeyError, IndexError) as ex: - pass - - data = moderate_func(data) # may raise InvalidUpdate - - if olditem is None: - objlist.append(data) - else: - objlist.set(key, data) - - def _update_objlist(self, objlist, moderate_func, data: List[Mapping]): - # merge and validate all items before committing them - for i, a in enumerate(data): - olditem = None - if a.get('@id'): - try: - olditem = objlist.get(a['@id']) - data[i] = self._merge_into(a, olditem) - except KeyError as ex: - pass - data[i] = moderate_func(data[i]) # may raise InvalidUpdate - - # now commit - for a in data: - if a.get('@id'): - objlist.set(a['@id'], a) - else: - objlist.append(a) - - def _replace_components(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("authors data is not a list", sys=self) - data = [ self._moderate_comp(a) for a in data ] # may raise InvalidUpdate - nerd.nonfiles.empty() - nerd.files.empty() - for cmp in data: - if 'filepath' in cmp: - nerd.files.set_file_at(cmp, cmp['filepath']) - else: - nerd.nonfiles.append(cmp) - - def _update_component(self, nerd: NERDResource, data: Mapping, pos: int=None, replace=False): + def _update_component(self, nerd: NERDResource, data: Mapping, key=None, replace=False, doval=False): if not isinstance(data, Mapping): raise InvalidUpdate("component data is not an object", sys=self) - if 'filepath' in data: - self.update_listitem(nerd.files, self._moderate_file, pos, replace) + id = key if isinstance(key, str) else data.get("@id") + filepath = data.get('filepath') + + oldfile = None + if id: + oldfile = nerd.files.get(id) + if not filepath and oldfile: + filepath = oldfile.get('filepath') + pos = key if isinstance(key, int) else None + + if filepath: + data = self._update_file_comp(nerd, data, oldfile, replace=replace, doval=doval) else: - self.update_listitem(nerd.nonfiles, self._moderate_nonfile, pos, replace) - - def _update_components(self, prec: ProjectRecord, nerd: NERDResource, data: List[Mapping]): - if not isinstance(data, list): - raise InvalidUpdate("references data is not a list", sys=self) - - # merge and validate all items before committing them - for i, cmp in enumerate(data): - oldcmp = None - if cmp.get('@id'): - try: - oldcmp = objlist.get(a['@id']) - data[i] = self._merge_into(cmp, oldcmp) - except KeyError as ex: - pass - if 'filepath' in cmp: - data[i] = self._moderate_file(data[i]) # may raise InvalidUpdate - else: - data[i] = self._moderate_nonfile(data[i]) # may raise InvalidUpdate - - # now commit - for a in data: - objlist = nerd.files if 'filepath' in cmp else nerd.nonfiles - if a.get('@id'): - objlist.set(a['@id'], a) - else: - objlist.append(a) + data = self._update_listitem(nerd.nonfiles, self._moderate_nonfile, data, pos, replace, doval) + return data def _filter_props(self, obj, props): delprops = [k for k in obj if k not in props or (not obj.get(k) and obj.get(k) is not False)] for k in delprops: del obj[k] - _authprops = set("_schema fn familyName givenName middleName orcid affiliation proxyFor".split()) + _authprops = set("_schema @id fn familyName givenName middleName orcid affiliation proxyFor".split()) _affilprops = set("@id title abbrev proxyFor location label description subunits".split()) def _moderate_author(self, auth, doval=True): # we are assuming that merging has already occured self._filter_props(auth, self._authprops) - auth["@type"] = "foaf:Person" + if not auth.get("@type"): + auth["@type"] = "foaf:Person" # Set fn at finalization # if not auth.get('fn') and auth.get('familyName') and auth.get('givenName'): # auth['fn'] = auth['familyName'] @@ -874,7 +1300,8 @@ def _moderate_author(self, auth, doval=True): if not isinstance(affil["abbrev"], list): raise InvalidUpdate("Affiliate abbrev property is not a list: "+ str(affil["abbrev"])) - affil["abbrev"].append(NIST_ABBREV) + if NIST_ABBREV not in affil["abbrev"]: + affil["abbrev"].append(NIST_ABBREV) # Finally, validate (if requested) schemauri = NERDMPUB_SCH_ID + "/definitions/Person" @@ -901,16 +1328,17 @@ def _moderate_reference(self, ref, doval=True): ref["refType"] = "References" if not ref.get(EXTSCHPROP) and ref["refType"] in self._reftypes: ref.setdefault(EXTSCHPROP, []) - try: - # upgrade the version of the BIB extension - if any(s.startswith(NERDMBIB_SCH_ID_BASE) and s != NERDMBIB_SCH_ID - for s in ref[EXTSCHPROP]): - ref[EXTSCHPROP] = [NERDMBIB_SCH_ID if s.startswith(NERDMBIB_SCH_ID_BASE) - else s for s in ref[EXTSCHPROP]] - except AttributeError as ex: - raise InvalidUpdate("_extensionSchemas: value is not a list of strings", sys=self) from ex - if NERDMBIB_SCH_ID not in ref[EXTSCHPROP]: - ref[EXTSCHPROP].append(NERDMBIB_SCH_ID) + try: + # upgrade the version of the BIB extension + for i, uri in enumerate(ref.get(EXTSCHPROP,[])): + if uri.startswith(NERDMBIB_SCH_ID_BASE) and not uri.startswith(NERDMBIB_SCH_ID): + parts = ref[EXTSCHPROP][i].split('#', 1) + if len(parts) == 2: + ref[EXTSCHPROP][i] = NERDMBIB_SCH_ID + parts[1] + except AttributeError as ex: + raise InvalidUpdate("_extensionSchemas: value is not a list of strings", sys=self) from ex + if ref.get("refType") in self._reftypes and NERDMBIB_DEF+"DCiteReference" not in ref[EXTSCHPROP]: + ref[EXTSCHPROP].append(NERDMBIB_DEF+"DCiteReference") if not ref.get("@type"): ref["@type"] = ["deo:BibliographicReference"] @@ -979,15 +1407,15 @@ def _moderate_file(self, cmp, doval=True): # make sure the _extensionSchemas list is filled out cmp.setdefault(EXTSCHPROP, []) - if nerdutils.is_type(cmp, "DataFile") and \ - not any(s.endswith("#/definitions/DataFile") for s in cmp[EXTSCHPROP]): - cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DataFile") - elif nerdutils.is_type(cmp, "ChecksumFile") and \ - not any(s.endswith("#/definitions/ChecksumFile") for s in cmp[EXTSCHPROP]): - cmp[EXTSCHPROP].append(NERDMPUB_DEF+"ChecksumFile") - elif nerdutils.is_type(cmp, "DownloadableFile") and \ - not any(s.endswith("#/definitions/DownloadableFile") for s in cmp[EXTSCHPROP]): - cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DownloadableFile") + if nerdutils.is_type(cmp, "DataFile"): + if not any(s.endswith("#/definitions/DataFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DataFile") + elif nerdutils.is_type(cmp, "ChecksumFile"): + if not any(s.endswith("#/definitions/ChecksumFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"ChecksumFile") + elif nerdutils.is_type(cmp, "DownloadableFile"): + if not any(s.endswith("#/definitions/DownloadableFile") for s in cmp[EXTSCHPROP]): + cmp[EXTSCHPROP].append(NERDMPUB_DEF+"DownloadableFile") if nerdutils.is_type(cmp, "Subcollection") and \ not any(s.endswith("#/definitions/Subcollection") for s in cmp[EXTSCHPROP]): @@ -1079,6 +1507,9 @@ def _moderate_nonfile(self, cmp, doval=True): return cmp def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): + if not resmd.get("_schema"): + resmd["_schema"] = NERDM_SCH_ID + restypes = resmd.get("@type", []) if not replace: restypes += basemd.get("@type", []) @@ -1087,7 +1518,7 @@ def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): errors = [] if 'contactPoint' in resmd: - if not resmd.get("contactPoint"): + if "contactPoint" not in resmd and not resmd.get("contactPoint"): del resmd["contactPoint"] else: try: @@ -1097,7 +1528,7 @@ def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): errors.extend(ex.errors) if 'description' in resmd: - if not resmd.get("description"): + if "description" not in resmd and not resmd.get("description"): del resmd["description"] else: try: diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index 7982098..c126e11 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -203,4 +203,4 @@ MIDASDBClientFactory = MongoDBClientFactory -from .project import ProjectService, ProjectServiceFactory, InvalidUpdate +from .project import ProjectService, ProjectServiceFactory, InvalidUpdate, PartNotAccessible diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index f2bdd60..128c89e 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -463,9 +463,9 @@ def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=N """ initialize the exception :param str message: a brief description of the problem with the user input - :param str recid: the id of the record that was existed - :param str part: the part of the record that was requested. Do not provide this parameter if - the entire record does not exist. + :param str recid: the id of the record that data was provided for + :param str part: the part of the record that was requested for update. Do not provide + this parameter if the entire record was provided. :param [str] errors: a listing of the individual errors uncovered in the data """ if errors: diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index 38a7419..09b39c4 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -1,10 +1,11 @@ -import os, json, pdb, logging, tempfile +import os, json, pdb, logging, tempfile, pathlib import unittest as test -from nistoar.midas.dbio import inmem, base, AlreadyExists, InvalidUpdate +from nistoar.midas.dbio import inmem, base, AlreadyExists, InvalidUpdate, ObjectNotFound, PartNotAccessible from nistoar.midas.dbio import project as prj from nistoar.midas.dap.service import mds3 from nistoar.pdr.publish import prov +from nistoar.pdr.utils import read_nerd from nistoar.nerdm.constants import CORE_SCHEMA_URI tmpdir = tempfile.TemporaryDirectory(prefix="_test_mds3.") @@ -30,6 +31,10 @@ def tearDownModule(): nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") +# test records +testdir = pathlib.Path(__file__).parents[0] +pdr2210 = testdir.parents[2] / 'pdr' / 'describe' / 'data' / 'pdr2210.json' +ncnrexp0 = testdir.parents[2] / 'pdr' / 'publish' / 'data' / 'ncnrexp0.json' class TestMDS3DAPService(test.TestCase): @@ -48,7 +53,7 @@ def setUp(self): "default_shoulder": "mdsy", }, "assign_doi": "always", - "doi_naan": "88888", + "doi_naan": "10.88888", "nerdstorage": { # "type": "fsbased", # "store_dir": os.path.join(tmpdir.name) @@ -73,6 +78,12 @@ def test_ctor(self): self.assertTrue(self.svc._valid8r) self.assertEqual(self.svc._minnerdmver, (0, 6)) + def test_ids_for(self): + self.create_service() + self.assertEqual(self.svc._aipid_for("ncnr0:goob"), "ncnr0-goob") + self.assertEqual(self.svc._arkid_for("ncnr0:goob"), "ark:/88434/ncnr0-goob") + self.assertEqual(self.svc._doi_for("ncnr0:goob"), "doi:10.88888/ncnr0-goob") + def test_create_record(self): self.create_service() self.assertTrue(not self.svc.dbcli.name_exists("goob")) @@ -84,7 +95,7 @@ def test_create_record(self): self.assertEqual(prec.owner, "nstr1") self.assertIn("_schema", prec.data) self.assertNotIn("_extensionSchemas", prec.data) # contains only data summary - self.assertEqual(prec.data['doi'], "doi:88888/mdsy-0003") + self.assertEqual(prec.data['doi'], "doi:10.88888/mdsy-0003") self.assertEqual(prec.data['@id'], "ark:/88434/mdsy-0003") self.assertTrue(self.svc.dbcli.name_exists("goob")) @@ -92,7 +103,7 @@ def test_create_record(self): self.assertEqual(prec2.name, "goob") self.assertEqual(prec2.id, "mdsy:0003") self.assertEqual(prec2.data['@id'], "ark:/88434/mdsy-0003") - self.assertEqual(prec2.data['doi'], "doi:88888/mdsy-0003") + self.assertEqual(prec2.data['doi'], "doi:10.88888/mdsy-0003") self.assertEqual(prec2.meta, {"creatorisContact": True, "resourceType": "data"}) self.assertEqual(prec2.owner, "nstr1") @@ -286,6 +297,42 @@ def test_moderate_contact(self): self.assertEqual(contact["@type"], "vcard:Contact") self.assertEqual(len(contact), 2) + def test_moderate_res_data(self): + self.create_service() + nerd = self.svc._store.open("nrd0:goob") + + try: + res = self.svc._moderate_res_data({}, {}, nerd) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(res.get("_schema"), mds3.NERDM_SCH_ID) + self.assertEqual(res.get("@type"), ["nrdp:PublicDataResource"]) + self.assertEqual(res.get("_extensionSchemas"), [ mds3.NERDMPUB_DEF+"PublicDataResource" ]) + self.assertEqual(len(res), 3) + + with self.assertRaises(InvalidUpdate): + self.svc._moderate_res_data({"description": 3}, {}, nerd) + + upd = { + "description": ["This is it."], + "contactPoint": { + "fn": "Edgar Allen Poe", + "hasEmail": "eap@dead.com" + }, + } + try: + res = self.svc._moderate_res_data(upd, res, nerd) + except InvalidUpdate as ex: + self.fail("Validation Error: "+ex.format_errors()) + self.assertEqual(res.get("_schema"), mds3.NERDM_SCH_ID) + self.assertEqual(res.get("@type"), ["nrdp:PublicDataResource"]) + self.assertEqual(res.get("_extensionSchemas"), [ mds3.NERDMPUB_DEF+"PublicDataResource" ]) + self.assertEqual(res.get("description"), ["This is it."]) + self.assertIn("contactPoint", res) + self.assertEqual(res.get("contactPoint",{}).get("hasEmail"), "eap@dead.com") + self.assertEqual(res.get("contactPoint",{}).get("@type"), "vcard:Contact") + + def test_moderate_author(self): self.create_service() @@ -314,6 +361,91 @@ def test_moderate_author(self): self.assertEqual(auth['affiliation'][0]['abbrev'], ["NIST"]) self.assertTrue(auth['affiliation'][0]['@id'].startswith("ror:")) + def test_replace_update_authors(self): + self.create_service() + prec = self.svc.create_record("goob") + id = prec.id + nerd = self.svc._store.open(id) + self.assertEqual(len(nerd.authors), 0) + + with self.assertRaises(InvalidUpdate): + self.svc.replace_authors(id, {"fn": "Edgar Allen Poe"}) + + self.svc.replace_authors(id, [ + { "familyName": "Cranston", "givenName": "Gurn", "middleName": "J." }, + { "fn": "Edgar Allen Poe", "affiliation": "NIST" } + ]) + self.assertEqual(len(nerd.authors), 2) + self.assertEqual(nerd.authors.get(0)["givenName"], "Gurn") + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(nerd.authors.get(0)["@id"], "auth_0") + self.assertEqual(len(nerd.authors.get(0)), 5) + self.assertEqual(nerd.authors.get(1)["fn"], "Edgar Allen Poe") + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["abbrev"], ["NIST"]) + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["title"], + "National Institute of Standards and Technology") + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(nerd.authors.get(1)["@id"], "auth_1") + self.assertEqual(len(nerd.authors.get(0)), 5) + + self.svc.update_author(id, { "fn": "Joe Don Baker" }, 1) + self.assertEqual(nerd.authors.get(0)["@id"], "auth_0") + self.assertEqual(nerd.authors.get(0)["givenName"], "Gurn") + self.assertEqual(len(nerd.authors.get(0)), 5) + self.assertEqual(nerd.authors.get(1)["@id"], "auth_1") + self.assertEqual(nerd.authors.get(1)["fn"], "Joe Don Baker") + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["abbrev"], ["NIST"]) + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["title"], + "National Institute of Standards and Technology") + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(len(nerd.authors.get(0)), 5) + + self.svc.update_author(id, { "fn": "Joe Don Baker" }, 1, True) + self.assertEqual(nerd.authors.get(0)["@id"], "auth_0") + self.assertEqual(nerd.authors.get(0)["givenName"], "Gurn") + self.assertEqual(len(nerd.authors.get(0)), 5) + self.assertEqual(nerd.authors.get(1)["@id"], "auth_1") + self.assertEqual(nerd.authors.get(1)["fn"], "Joe Don Baker") + self.assertNotIn("affiliation", nerd.authors.get(1)) + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(len(nerd.authors.get(1)), 3) + + # self.svc._update_objlist(nerd.authors, self.svc._moderate_author, + # [{"@id": "auth_1", "fn": "Edgar Allen Poe", "affiliation": "NIST"}]) + self.svc.update_data(id, [{"@id": "auth_1", "fn": "Edgar Allen Poe", "affiliation": "NIST"}], + "authors") + self.assertEqual(nerd.authors.get(0)["@id"], "auth_0") + self.assertEqual(nerd.authors.get(0)["givenName"], "Gurn") + self.assertNotIn("fn", nerd.authors.get(0)) + self.assertEqual(len(nerd.authors.get(0)), 5) + self.assertEqual(nerd.authors.get(1)["@id"], "auth_1") + self.assertEqual(nerd.authors.get(1)["fn"], "Edgar Allen Poe") + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["abbrev"], ["NIST"]) + self.assertEqual(nerd.authors.get(1)["affiliation"][0]["title"], + "National Institute of Standards and Technology") + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(len(nerd.authors.get(1)), 4) + + self.svc.replace_authors(id, [nerd.authors.get(1), nerd.authors.get(0)]) + self.assertEqual(len(nerd.authors), 2) + self.assertEqual(nerd.authors.get(0)["@id"], "auth_1") + self.assertEqual(nerd.authors.get(0)["fn"], "Edgar Allen Poe") + self.assertEqual(nerd.authors.get(0)["affiliation"][0]["abbrev"], ["NIST"]) + self.assertEqual(nerd.authors.get(0)["affiliation"][0]["title"], + "National Institute of Standards and Technology") + self.assertEqual(nerd.authors.get(0)["@type"], "foaf:Person") + self.assertEqual(len(nerd.authors.get(0)), 4) + self.assertEqual(nerd.authors.get(1)["givenName"], "Gurn") + self.assertEqual(nerd.authors.get(1)["@type"], "foaf:Person") + self.assertEqual(nerd.authors.get(1)["@id"], "auth_0") + self.assertEqual(len(nerd.authors.get(1)), 5) + + self.svc.add_author(id, {"fn": "Madonna"}) + self.assertEqual(len(nerd.authors), 3) + self.assertEqual(nerd.authors.get(0)["fn"], "Edgar Allen Poe") + self.assertEqual(nerd.authors.get(1)["givenName"], "Gurn") + self.assertEqual(nerd.authors.get(2)["fn"], "Madonna") + def test_moderate_reference(self): self.create_service() @@ -327,12 +459,14 @@ def test_moderate_reference(self): self.assertEqual(ref['refType'], "References") self.assertIn('_extensionSchemas', ref) self.assertEqual(len(ref['_extensionSchemas']), 1) - self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_SCH_ID) + self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_DEF+"DCiteReference") self.assertEqual(ref["proxyFor"], "doi:10.18434/example") self.assertEqual(len(ref), 5) try: ref = self.svc._moderate_reference({"proxyFor": "doi:10.18434/example", + "_extensionSchemas": + ["https://data.nist.gov/od/dm/nerdm-schema/bib/v0.6#/definitions/DCiteReference"], "goob": "gurn"}) except InvalidUpdate as ex: self.fail("Validation Error: "+ex.format_errors()) @@ -341,10 +475,9 @@ def test_moderate_reference(self): self.assertEqual(ref['refType'], "References") self.assertIn('_extensionSchemas', ref) self.assertEqual(len(ref['_extensionSchemas']), 1) - self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_SCH_ID) + self.assertEqual(ref['_extensionSchemas'][0], mds3.NERDMBIB_DEF+"DCiteReference") self.assertEqual(ref["proxyFor"], "doi:10.18434/example") self.assertEqual(len(ref), 5) - try: ref = self.svc._moderate_reference({"location": "doi:10.18434/example", "refType": "myown", @@ -360,6 +493,190 @@ def test_moderate_reference(self): "abbrev": ["SRB-400"], "citation": "C", "label": "drink me", "inprep": False}) + def test_replace_update_references(self): + self.create_service() + prec = self.svc.create_record("goob") + id = prec.id + nerd = self.svc._store.open(id) + + with self.assertRaises(InvalidUpdate): + self.svc.replace_references(id, {"location": "https://doi.org/10.1/blah"}) + + self.svc.replace_references(id, [ + {"location": "https://doi.org/10.1/blah"}, + {"proxyFor": "doi:10.18434/example", "goob": "gurn"} + ]) + self.assertEqual(len(nerd.references), 2) + self.assertEqual(nerd.references.get(0)["location"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.references.get(0)["refType"], "References") + self.assertEqual(nerd.references.get(0)["proxyFor"], "doi:10.1/blah") + self.assertEqual(nerd.references.get(0)["@id"], "ref_0") + self.assertEqual(nerd.references.get(0)["_extensionSchemas"], + [mds3.NERDMBIB_DEF+"DCiteReference"]) + self.assertNotIn("title", nerd.references.get(0)) + self.assertEqual(nerd.references.get(1)["location"], "https://doi.org/10.18434/example") + self.assertEqual(nerd.references.get(1)["refType"], "References") + self.assertEqual(nerd.references.get(1)["proxyFor"], "doi:10.18434/example") + self.assertEqual(nerd.references.get(1)["@id"], "ref_1") + self.assertEqual(nerd.references.get(1)["_extensionSchemas"], + [mds3.NERDMBIB_DEF+"DCiteReference"]) + self.assertNotIn("title", nerd.references.get(1)) + + self.svc.update_reference(id, {"title": "The End of Film"}, 0) + self.assertEqual(nerd.references.get(0)["@id"], "ref_0") + self.assertEqual(nerd.references.get(0)["location"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.references.get(0)["refType"], "References") + self.assertEqual(nerd.references.get(0)["proxyFor"], "doi:10.1/blah") + self.assertEqual(nerd.references.get(0)["title"], "The End of Film") + self.assertEqual(nerd.references.get(1)["@id"], "ref_1") + self.assertEqual(nerd.references.get(1)["location"], "https://doi.org/10.18434/example") + self.assertNotIn("title", nerd.references.get(1)) + + self.svc.replace_references(id, [nerd.references.get(1), + {"proxyFor": "doi:10.2/another"}, + nerd.references.get(0)]) + self.assertEqual(len(nerd.references), 3) + self.assertEqual(nerd.references.get(0)["location"], "https://doi.org/10.18434/example") + self.assertEqual(nerd.references.get(0)["refType"], "References") + self.assertEqual(nerd.references.get(0)["proxyFor"], "doi:10.18434/example") + self.assertEqual(nerd.references.get(0)["@id"], "ref_1") + self.assertEqual(nerd.references.get(0)["_extensionSchemas"], + [mds3.NERDMBIB_DEF+"DCiteReference"]) + self.assertNotIn("title", nerd.references.get(0)) + self.assertEqual(nerd.references.get(1)["location"], "https://doi.org/10.2/another") + self.assertEqual(nerd.references.get(1)["refType"], "References") + self.assertEqual(nerd.references.get(1)["proxyFor"], "doi:10.2/another") + self.assertEqual(nerd.references.get(1)["@id"], "ref_2") + self.assertEqual(nerd.references.get(1)["_extensionSchemas"], + [mds3.NERDMBIB_DEF+"DCiteReference"]) + self.assertNotIn("title", nerd.references.get(1)) + self.assertEqual(nerd.references.get(2)["@id"], "ref_0") + self.assertEqual(nerd.references.get(2)["location"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.references.get(2)["refType"], "References") + self.assertEqual(nerd.references.get(2)["proxyFor"], "doi:10.1/blah") + self.assertEqual(nerd.references.get(2)["title"], "The End of Film") + + self.svc.add_reference(id, {"location": "https://example.com/doc"}) + self.assertEqual(len(nerd.references), 4) + self.assertEqual(nerd.references.get(0)["location"], "https://doi.org/10.18434/example") + self.assertEqual(nerd.references.get(1)["location"], "https://doi.org/10.2/another") + self.assertEqual(nerd.references.get(2)["location"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.references.get(3)["location"], "https://example.com/doc") + + def test_replace_update_nonfiles(self): + self.create_service() + prec = self.svc.create_record("goob") + id = prec.id + nerd = self.svc._store.open(id) + + with self.assertRaises(InvalidUpdate): + self.svc.replace_nonfile_components(id, {"filepath": "top.zip"}) + + self.svc.replace_nonfile_components(id, [ + {"accessURL": "https://doi.org/10.1/blah"}, + ]) + self.assertEqual(len(nerd.nonfiles), 1) + self.assertEqual(len(nerd.files), 0) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertNotIn("title", nerd.nonfiles.get(0)) + + self.svc.add_nonfile_component(id, {"accessURL": "https://doi.org/10.1/blue"}) + self.assertEqual(len(nerd.nonfiles), 2) + self.assertEqual(len(nerd.files), 0) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertEqual(nerd.nonfiles.get(1)["accessURL"], "https://doi.org/10.1/blue") + self.assertEqual(nerd.nonfiles.get(1)["@id"], "cmp_1") + self.assertEqual(nerd.nonfiles.get(1)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(1)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + + with self.assertRaises(ObjectNotFound): + self.svc.update_nonfile_component(id, {"title": "The End of Film"}, 2) + + self.svc.update_nonfile_component(id, {"title": "The End of Film"}, 0) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(0)["title"], "The End of Film") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertNotIn("title", nerd.nonfiles.get(1)) + + self.svc.replace_nonfile_components(id, [nerd.nonfiles.get(1), nerd.nonfiles.get(0)]) + self.assertEqual(nerd.nonfiles.get(1)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(1)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(1)["title"], "The End of Film") + self.assertEqual(nerd.nonfiles.get(1)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(1)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blue") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_1") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + + + def test_replace_update_components(self): + self.create_service() + prec = self.svc.create_record("goob") + id = prec.id + nerd = self.svc._store.open(id) + + with self.assertRaises(InvalidUpdate): + self.svc.replace_data(id, {"filepath": "top.zip"}, part="components") + + self.svc.replace_data(id, [ + {"accessURL": "https://doi.org/10.1/blah"}, + {"filepath": "raw", "description": "raw data"}, + {"downloadURL": "pdr:file", "filepath": "raw/data.csv"} + ], "components") + self.assertEqual(len(nerd.nonfiles), 1) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertEqual(len(nerd.files), 2) + ids = nerd.files.ids + self.assertEqual(nerd.files.get(ids[0])["filepath"], "raw") + self.assertEqual(nerd.files.get(ids[0])["description"], "raw data") + self.assertEqual(nerd.files.get(ids[0])["@id"], "coll_0") + self.assertEqual(nerd.files.get(ids[0])["@type"], ["nrdp:Subcollection"]) + self.assertEqual(nerd.files.get(ids[0])["_extensionSchemas"], [mds3.NERDMPUB_DEF+"Subcollection"]) + self.assertEqual(nerd.files.get(ids[1])["filepath"], "raw/data.csv") + self.assertEqual(nerd.files.get(ids[1])["downloadURL"], "pdr:file") + self.assertEqual(nerd.files.get(ids[1])["mediaType"], "text/csv") + self.assertEqual(nerd.files.get(ids[1])["format"], {"description": "data table"}) + self.assertEqual(nerd.files.get(ids[1])["@id"], "file_1") + self.assertEqual(nerd.files.get(ids[1])["@type"], ["nrdp:DataFile", "nrdp:DownloadableFile"]) + self.assertEqual(nerd.files.get(ids[1])["_extensionSchemas"], [mds3.NERDMPUB_DEF+"DataFile"]) + + self.svc.update_data(id, [{"title": "All data", "@id": "file_1"}], "components") + self.assertEqual(len(nerd.nonfiles), 1) + self.assertEqual(nerd.nonfiles.get(0)["accessURL"], "https://doi.org/10.1/blah") + self.assertEqual(nerd.nonfiles.get(0)["@id"], "cmp_0") + self.assertEqual(nerd.nonfiles.get(0)["@type"], ["nrdp:AccessPage"]) + self.assertEqual(nerd.nonfiles.get(0)["_extensionSchemas"], [mds3.NERDMPUB_DEF+"AccessPage"]) + self.assertNotIn('title', nerd.nonfiles.get(0)) + self.assertEqual(len(nerd.files), 2) + self.assertEqual(nerd.files.get(ids[0])["filepath"], "raw") + self.assertEqual(nerd.files.get(ids[0])["description"], "raw data") + self.assertNotIn('title', nerd.files.get(ids[0])) + self.assertEqual(nerd.files.get(ids[0])["@id"], "coll_0") + self.assertEqual(nerd.files.get(ids[0])["@type"], ["nrdp:Subcollection"]) + self.assertEqual(nerd.files.get(ids[0])["_extensionSchemas"], [mds3.NERDMPUB_DEF+"Subcollection"]) + self.assertEqual(nerd.files.get(ids[1])["filepath"], "raw/data.csv") + self.assertEqual(nerd.files.get(ids[1])["downloadURL"], "pdr:file") + self.assertEqual(nerd.files.get(ids[1])["mediaType"], "text/csv") + self.assertEqual(nerd.files.get(ids[1])["format"], {"description": "data table"}) + self.assertEqual(nerd.files.get(ids[1])["@id"], "file_1") + self.assertEqual(nerd.files.get(ids[1])["title"], "All data") + self.assertEqual(nerd.files.get(ids[1])["@type"], ["nrdp:DataFile", "nrdp:DownloadableFile"]) + self.assertEqual(nerd.files.get(ids[1])["_extensionSchemas"], [mds3.NERDMPUB_DEF+"DataFile"]) + + + def test_moderate_file(self): self.create_service() @@ -479,9 +796,142 @@ def test_moderate_nonfile(self): "proxyFor": "ark:/88434/bob", "title": "Bob the Blob", "description": "wow", "_extensionSchemas": [mds3.NERDM_DEF+"IncludedResource"]}) + def test_get_sw_desc_for(self): + self.create_service() + cmp = self.svc._get_sw_desc_for("https://github.com/foo/bar") + self.assertEqual(cmp, { + "@id": "pdr:see/repo:bar", + "@type": ["nrd:AccessPage", "dcat:Distribution"], + "title": "Software Repository in GitHub", + "accessURL": "https://github.com/foo/bar" + }) + + cmp = self.svc._get_sw_desc_for("https://bitbucket.com/foo/bar") + self.assertEqual(cmp, { + "@id": "pdr:see/repo:bar", + "@type": ["nrd:AccessPage", "dcat:Distribution"], + "title": "Software Repository", + "accessURL": "https://bitbucket.com/foo/bar" + }) + + def test_update(self): + rec = read_nerd(pdr2210) + self.create_service() - + prec = self.svc.create_record("goob") + pdrid = "ark:/88434/%s-%s" % tuple(prec.id.split(":")) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["doi"], "doi:10.88888/mdsy-0003") + self.assertNotIn("title", nerd) + self.assertNotIn("authors", nerd) + self.assertNotIn("references", nerd) + self.assertNotIn("components", nerd) + + try: + result = self.svc.replace_data(prec.id, rec) + except InvalidUpdate as ex: + self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(result, nerd) + + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["doi"], "doi:10.88888/mdsy-0003") + self.assertTrue(nerd["title"].startswith("OptSortSph: ")) + self.assertEqual(nerd["contactPoint"]["fn"], "Zachary Levine") + self.assertNotIn("bureauCode", nerd) + self.assertNotIn("ediid", nerd) + self.assertEqual(len(nerd["references"]), 1) + self.assertEqual(len(nerd["components"]), 5) + self.assertNotIn("authors", nerd) + + result = self.svc.update_data(prec.id, {"title": "The End of Food"}) + self.assertEqual(result["@id"], pdrid) + self.assertEqual(result["title"], "The End of Food") + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["title"], "The End of Food") + self.assertEqual(nerd["contactPoint"]["fn"], "Zachary Levine") + self.assertEqual(len(nerd["references"]), 1) + self.assertEqual(len(nerd["components"]), 5) + self.assertNotIn("authors", nerd) + + result = self.svc.update_data(prec.id, "The End of Film", "title") + self.assertEqual(result, "The End of Film") + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["title"], "The End of Film") + self.assertEqual(nerd["contactPoint"]["fn"], "Zachary Levine") + self.assertEqual(len(nerd["references"]), 1) + self.assertEqual(len(nerd["components"]), 5) + self.assertNotIn("authors", nerd) + + self.assertEqual(nerd["references"][0]["refType"], "IsReferencedBy") + result = self.svc.update_data(prec.id, {"refType": "References"}, "references[0]") + self.assertEqual(result["location"], "https://doi.org/10.1364/OE.24.014100") + self.assertEqual(result["refType"], "References") + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["title"], "The End of Film") + self.assertEqual(len(nerd["references"]), 1) + self.assertEqual(len(nerd["components"]), 5) + self.assertEqual(nerd["references"][0]["location"], "https://doi.org/10.1364/OE.24.014100") + self.assertEqual(nerd["references"][0]["refType"], "References") + + with self.assertRaises(ObjectNotFound): + self.svc.update_data(prec.id, {"refType": "References"}, "references[1]") + with self.assertRaises(PartNotAccessible): + self.svc.update_data(prec.id, {"refType": "References"}, "references[-1]") + with self.assertRaises(ObjectNotFound): + self.svc.update_data(prec.id, {"refType": "References"}, "references[goober]") + with self.assertRaises(InvalidUpdate): + self.svc.update_data(prec.id, {"refType": "IsGurnTo"}, "references[0]") + try: + result = self.svc.update_data(prec.id, {"refType": "IsSourceOf"}, "references[ref_0]") + except InvalidUpdate as ex: + self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) + self.assertEqual(result["location"], "https://doi.org/10.1364/OE.24.014100") + self.assertEqual(result["refType"], "IsSourceOf") + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["references"][0]["location"], "https://doi.org/10.1364/OE.24.014100") + self.assertEqual(nerd["references"][0]["refType"], "IsSourceOf") + + # update a file by its filepath + filemd = nerd["components"][1] + self.assertEqual(filemd["filepath"], "trial1.json") + self.assertEqual(filemd["size"], 69) + filemd["size"] = 70 + try: + result = self.svc.update_data(prec.id, filemd, "components[trial1.json]") + except InvalidUpdate as ex: + self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) + self.assertEqual(result["filepath"], "trial1.json") + self.assertEqual(result["size"], 70) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["components"][1]["filepath"], "trial1.json") + self.assertEqual(nerd["components"][1]["size"], 70) + + rec = read_nerd(ncnrexp0) + self.assertNotIn("references", rec) + try: + result = self.svc.replace_data(prec.id, rec) + except InvalidUpdate as ex: + self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(result, nerd) + self.assertEqual(nerd["@id"], pdrid) + self.assertEqual(nerd["doi"], "doi:10.88888/mdsy-0003") + self.assertTrue(nerd["title"].startswith("Neutron ")) + self.assertEqual(len(nerd["authors"]), 2) + self.assertNotIn("references", nerd) + self.assertEqual(len(nerd["components"]), 2) + + + + if __name__ == '__main__': diff --git a/python/tests/nistoar/pdr/publish/data/ncnrexp0.json b/python/tests/nistoar/pdr/publish/data/ncnrexp0.json index 0e02f53..02233b5 100644 --- a/python/tests/nistoar/pdr/publish/data/ncnrexp0.json +++ b/python/tests/nistoar/pdr/publish/data/ncnrexp0.json @@ -26,7 +26,7 @@ } ], "description": [ - "" + "An exploration of oxide films" ], "keywords": [ "electrochemistry", "oxide films" ], "accessLevel": "public", From 03500f807a818b65b209741fa20be98e6ebab4e3 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 11 Feb 2023 16:39:29 -0500 Subject: [PATCH 048/123] mds3: changing handling of format/mime-type maps --- python/nistoar/midas/dap/service/mds3.py | 63 ++++++++++--------- .../nistoar/pdr/data}/fext2format.json | 0 python/nistoar/pdr/data/mime.types | 6 +- .../nistoar/midas/dap/service/test_mds3.py | 4 +- 4 files changed, 41 insertions(+), 32 deletions(-) rename {etc => python/nistoar/pdr/data}/fext2format.json (100%) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 5d616f4..013e85f 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -5,7 +5,7 @@ Support for the web service frontend is provided as a WSGI :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` implementation. """ -import os, re +import os, re, pkg_resources from logging import Logger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence, Callable @@ -123,38 +123,43 @@ def __init__(self, dbclient_factory: DBClient, config: Mapping={}, who: PubAgent raise ConfigurationException("'validate_nerdm' is set but cannot find schema dir") self._valid8r = validate.create_lenient_validator(self._schemadir, "_") - self._mediatypes = { - "csv": "text/csv", "txt": "text/plain", "html": "text/html", "htm": "text/html", - "sha256": "text/plain", "md5": "text/plain" - } - mimefiles = self.cfg.get('mimetype_files', []) - if not isinstance(mimefiles, list): - mimefiles = [mimefiles] - if mimefiles: - self._mediatypes = build_mime_type_map(mimefiles) - - self._formatbyext = {} - if 'file_format_maps' in self.cfg: - mimefiles = self.cfg.get('file_format_maps', []) - else: - mimefiles = os.path.join(def_etc_dir, "fext2format.json") - if not isinstance(mimefiles, list): - mimefiles = [mimefiles] - for ffile in mimefiles: - try: - fmp = read_json(ffile) - if not isinstance(fmp, Mapping): - raise ValueError("wrong format for format-map file: contains "+type(fmp)) - if fmp: - self._formatbyext.update(fmp) - except Exception as ex: - self.log.warning("Unable to read format-map file, %s: %s", ffile, str(ex)) + self._mediatypes = None + self._formatbyext = None self._minnerdmver = minnerdmver + def _choose_mediatype(self, fext): + defmt = 'application/octet-stream' + + if not self._mediatypes: + mtfiles = [f if os.path.isabs(f) else os.path.join(def_etc_dir, f) + for f in self.cfg.get('mimetype_files', [])] + if not mtfiles: + mtfiles = [pkg_resources.resource_filename('nistoar.pdr', 'data/mime.types')] + self._mediatypes = build_mime_type_map(mtfiles) + + return self._mediatypes.get(fext, defmt) + def _guess_format(self, file_ext, mimetype=None): if not mimetype: - mimetype = self._mediatypes.get(file_ext) + mimetype = self._choose_mediatypes(file_ext) + + if self._formatbyext is None: + fmtfiles = [f if os.path.isabs(f) else os.path.join(def_etc_dir, f) + for f in self.cfg.get('file_format_maps', [])] + if not fmtfiles: + fmtfiles = [pkg_resources.resource_filename('nistoar.pdr', 'data/fext2format.json')] + self._formatbyext = {} + for f in fmtfiles: + try: + fmp = read_json(f) + if not isinstance(fmp, Mapping): + raise ValueError("wrong format for format-map file: contains "+type(fmp)) + if fmp: + self._formatbyext.update(fmp) + except Exception as ex: + self.log.warning("Unable to fead format-map file, %s: %s", f, str(ex)) + fmtd = self._formatbyext.get(file_ext) if fmtd: return { "description": fmtd } @@ -1398,7 +1403,7 @@ def _moderate_file(self, cmp, doval=True): if nerdutils.is_type(cmp, "DownloadableFile"): filext = os.path.splitext(cmp.get("filepath",""))[-1].lstrip('.') if not cmp.get("mediaType"): - cmp["mediaType"] = self._mediatypes.get(filext, "application/octet-stream") + cmp["mediaType"] = self._choose_mediatype(filext) if not cmp.get("format"): fmt = self._guess_format(filext, cmp["mediaType"]) diff --git a/etc/fext2format.json b/python/nistoar/pdr/data/fext2format.json similarity index 100% rename from etc/fext2format.json rename to python/nistoar/pdr/data/fext2format.json diff --git a/python/nistoar/pdr/data/mime.types b/python/nistoar/pdr/data/mime.types index 21f89de..eec77e6 100644 --- a/python/nistoar/pdr/data/mime.types +++ b/python/nistoar/pdr/data/mime.types @@ -10,10 +10,14 @@ types { application/rss+xml rss; text/mathml mml; - text/plain txt sha256 sha512 md5; + text/plain txt sha256 sha512 md5 r R; text/vnd.sun.j2me.app-descriptor jad; text/vnd.wap.wml wml; text/x-component htc; + text/csv csv; + text/tab-separated-values tsv; + text/x-python py; + application/x-python-code pyc; image/png png; image/tiff tif tiff; diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index 09b39c4..ba3d87c 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -696,7 +696,7 @@ def test_moderate_file(self): self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", "@type": ["nrdp:DataFile", "nrdp:DownloadableFile"], - "mediaType": "application/octet-stream", + "mediaType": "application/zip", "format": {"description": "compressed file archive"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) @@ -707,7 +707,7 @@ def test_moderate_file(self): self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(cmp, {"filepath": "data.zip", "downloadURL": "pdr:file", "@type": ["nrdp:DataFile", "nrdp:DownloadableFile", "dcat:Distribution"], - "mediaType": "application/octet-stream", + "mediaType": "application/zip", "format": {"description": "compressed file archive"}, "_extensionSchemas": [ mds3.NERDMPUB_DEF+"DataFile" ]}) From 0e3c770b15f8b938d492bd28ab65c9d3721ea88a Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 11 Feb 2023 17:31:58 -0500 Subject: [PATCH 049/123] mds3: expand class documentatation --- python/nistoar/midas/dap/service/mds3.py | 91 ++++++++++++++++-------- 1 file changed, 63 insertions(+), 28 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 013e85f..734e100 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -76,6 +76,22 @@ class DAPService(ProjectService): """ a project record request broker class for DAP records. + This service allows a client to create and update DAP records in the form of NERDm Resource + records. + + This service extends the generic DBIO :py:class:`~nistoar.midas.dbio.project.ProjectService` by + supporting the following conventions: + * The data record being created/updated through the service is a NERDm Resource + * The NERDm Resource record is stored separately from the DBIO + :py:class:`~nistoar.midas.dbio.base.ProjectRecord` (via the + :py:mod:`~nistoar.midas.dap.nerdstore` module). The ``data`` property of the + :py:class:`~nistoar.midas.dbio.base.ProjectRecord` contains a summary (i.e. a subset of + properties) of the NERDm record. + * Conventions and heuristics are applied for setting default values for various NERDm + properties based on the potentially limited properties provided by the client during the + editing process. (These conventions and hueristics are implemented the in various + ``_moderate_*`` functions in this class. + In addition to the configuration parameters supported by the parent class, this specialization also supports the following parameters: @@ -89,6 +105,19 @@ class DAPService(ProjectService): ``validate_nerdm`` if True (default), validate the updates to NERDm metadata. Incomplete NERDm records are permitted. + ``mimetype_files`` + a list of paths to files containing MIME-type to file extension maps used to assign a MIME-type + (i.e. ``mediaType``) to a file component. Any path given as a relative path will be assumed to + be relative to OAR_ETC_DIR. If this parameter not set, a default map is loaded as a package + resource, ``data/mime.types``, under ``nistoar.pdr``. The format is that supported by the Apache + and Nginx web servers. + ``file_format_maps`` + a list of paths to files containing file extension to file format maps used to attach a file + format description to to a file component. Any path given as a relative path will be assumed to + be relative to OAR_ETC_DIR. If this parameter not set, a default map is loaded as a package + resource, ``data/fext2format.json``, under ``nistoar.pdr``. The format of such files is a + JSON-encoded object with file extensions as the keys, and string descriptions of formats + as values. Note that the DOI is not yet registered with DataCite; it is only internally reserved and included in the record NERDm data. @@ -376,16 +405,18 @@ def replace_data(self, id, newdata, part=None): :param str id: the identifier for the record whose data should be updated. :param str newdata: the data to save as the new content. :param stt part: the slash-delimited pointer to an internal data property. If provided, - the given `newdata` is a value that should be set to the property pointed - to by `part`. - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. - If this is not provided, the record will by fetched anew based on the `id`. - :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to - an undefined or unrecognized part of the data + the given ``newdata`` is a value that should be set to the property pointed + to by ``part``. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to + ``id``. If this is not provided, the record will by fetched anew based on + the ``id``. + :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points + to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record - given by `id`. - :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. - :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + given by ``id``. + :raises PartNotAccessible: if replacement of the part of the data specified by ``part`` is not + allowed. + :raises InvalidUpdate: if the provided ``newdata`` represents an illegal or forbidden update or would otherwise result in invalid data content. """ return self._update_data(id, newdata, part, replace=True) @@ -396,16 +427,18 @@ def update_data(self, id, newdata, part=None): :param str id: the identifier for the record whose data should be updated. :param str newdata: the data to save as the new content. :param stt part: the slash-delimited pointer to an internal data property. If provided, - the given `newdata` is a value that should be set to the property pointed - to by `part`. - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. - If this is not provided, the record will by fetched anew based on the `id`. - :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + the given ``newdata`` is a value that should be set to the property pointed + to by ``part``. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to + ``id``. If this is not provided, the record will by fetched anew based on + the ``id``. + :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record - given by `id`. - :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. - :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + given by ``id``. + :raises PartNotAccessible: if replacement of the part of the data specified by ``part`` is not + allowed. + :raises InvalidUpdate: if the provided ``newdata`` represents an illegal or forbidden update or would otherwise result in invalid data content. """ return self._update_data(id, newdata, part, replace=False) @@ -417,13 +450,15 @@ def clear_data(self, id, part=None, _prec=None): :param stt part: the slash-delimited pointer to an internal data property. If provided, only that property will be cleared (either removed or set to an initial default). - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. - If this is not provided, the record will by fetched anew based on the `id`. - :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to + ``id``. If this is not provided, the record will by fetched anew based on + the ``id``. + :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record - given by `id`. - :raises PartNotAccessible: if clearing of the part of the data specified by `part` is not allowed. + given by ``id``. + :raises PartNotAccessible: if clearing of the part of the data specified by ``part`` is not + allowed. """ if not _prec: _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized @@ -712,8 +747,8 @@ def set_file_component(self, id, filemd, filepath=None): :param dict filemd: the NERDm file metadata describing the new file to add. If the "@id" property is set, it will be ignored. :param str filepath: the path within the dataset to assign to the file. If provided, - it will override the corresponding value in `filemd`; if not - provided, the filepath must be set within `filemd`. + it will override the corresponding value in ``filemd``; if not + provided, the filepath must be set within ``filemd``. """ prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if filepath: @@ -742,7 +777,7 @@ def update_file_component_at(self, id: str, filemd: Mapping, filepath: str=None) :param dict filemd: the file metadata to update :param str filepath: the path of the file within the dataset to update :raises ObjectNotFound: if there does not exist a file at the given filepath - :raises ValueError: if filepath is not set in either the `filepath` argument or the + :raises ValueError: if filepath is not set in either the ``filepath`` argument or the filepath property. """ prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized @@ -768,8 +803,8 @@ def update_file_component(self, id: str, filemd: Mapping, fileid: str=None): :param dict filemd: the file metadata to update :param str fileid: the id of the file within the dataset to update :raises ObjectNotFound: if there does not exist a resource with the given id - :raises ValueError: if id is not set in either the `fileid` argument or the `filemd` object's - `@id` property. + :raises ValueError: if id is not set in either the ``fileid`` argument or the ``filemd`` + object's ``@id`` property. """ prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not fileid: @@ -788,7 +823,7 @@ def update_file_component(self, id: str, filemd: Mapping, fileid: str=None): def replace_files(self, id: str, files: List[Mapping]): """ replace all currently saved files and folder components with the given list. Each component - must include a `filepath` property. + must include a ``filepath`` property. :param str id: the identifier for the dataset containing the file :raises ObjectNotFound: if there does not exist a resource with the given id """ From 9759527e14663d0dcae3eca0fed56e31fbd86a16 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 14 Feb 2023 13:22:40 -0500 Subject: [PATCH 050/123] dap: lotsa debugging via web service testing --- python/nistoar/midas/dap/nerdstore/fsbased.py | 2 +- python/nistoar/midas/dap/nerdstore/inmem.py | 6 +- python/nistoar/midas/dap/service/mds3.py | 219 ++++++++++++++---- python/nistoar/midas/dbio/wsgi/base.py | 2 +- python/nistoar/midas/dbio/wsgi/project.py | 166 +++++++------ 5 files changed, 270 insertions(+), 125 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index 6a1680b..e73798c 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -726,7 +726,7 @@ def load_file_components(self, cmps): # base subcollection contents first on 'has_member' list as this captures order info if cmp.get('has_member'): - if isinstance(cmd.get('has_member',[]), str): + if isinstance(cmp.get('has_member',[]), str): cmp['has_member'] = [cmp['has_member']] for child in cmp['has_member']: if child.get('@id') in saved and child.get('name'): diff --git a/python/nistoar/midas/dap/nerdstore/inmem.py b/python/nistoar/midas/dap/nerdstore/inmem.py index 420e428..57b3a09 100644 --- a/python/nistoar/midas/dap/nerdstore/inmem.py +++ b/python/nistoar/midas/dap/nerdstore/inmem.py @@ -192,7 +192,7 @@ def _load_data(self, comps): if 'filepath' not in cmp: if cmp.get('@id'): self._data[cmp.get('@id')] = copy.deepcopy(cmp) - m = _idre.find(cmd['@id']) + m = _idre.search(cmp['@id']) if m: # the id was set by a previous call to this class's minter # extract the number to ensure future ids are unique @@ -233,7 +233,7 @@ def _load_from(self, cmps: [Mapping]): for cmp in cmps: if cmp.get('filepath'): if cmp.get('@id'): - m = _idre.search(cmd['@id']) + m = _idre.search(cmp['@id']) if m: # the id was set by a previous call to this class's minter # extract the number to ensure future ids are unique @@ -277,7 +277,7 @@ def _load_from(self, cmps: [Mapping]): # base subcollection contents first on 'has_member' list as this captures order info if cmp.get('has_member'): - if isinstance(cmd.get('has_member',[]), str): + if isinstance(cmp.get('has_member',[]), str): cmp['has_member'] = [cmp['has_member']] for child in cmp['has_member']: if child.get('@id') in self._files and child.get('name'): diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 734e100..b1f5ea0 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -2,20 +2,32 @@ The DAP Authoring Service implemented using the mds3 convention. This convention represents the first DAP convention powered by the DBIO APIs. -Support for the web service frontend is provided as a -WSGI :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp` implementation. +The key features of the mds3 conventions are: + * The data record being created/updated through the service is a NERDm Resource + * The NERDm Resource record is stored separately from the DBIO + :py:class:`~nistoar.midas.dbio.base.ProjectRecord` (via the + :py:mod:`~nistoar.midas.dap.nerdstore` module). The ``data`` property of the + :py:class:`~nistoar.midas.dbio.base.ProjectRecord` contains a summary (i.e. a subset of + properties) of the NERDm record. + * Conventions and heuristics are applied for setting default values for various NERDm + properties based on the potentially limited properties provided by the client during the + editing process. (These conventions and hueristics are implemented the in various + ``_moderate_*`` functions in the :py:class:`DAPService` class.) + +Support for the web service frontend is provided via :py:class:`DAPApp` class, an implementation +of the WSGI-based :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp`. """ import os, re, pkg_resources from logging import Logger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence, Callable -from typing import List +from typing import List, Union from copy import deepcopy from ...dbio import (DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ACLs, InvalidUpdate, ObjectNotFound, PartNotAccessible, ProjectService, ProjectServiceFactory, DAP_PROJECTS) -from ...dbio.wsgi.project import MIDASProjectApp +from ...dbio.wsgi.project import MIDASProjectApp, ProjectDataHandler, SubApp from nistoar.base.config import ConfigurationException, merge_config from nistoar.nerdm import constants as nerdconst, utils as nerdutils from nistoar.pdr import def_schema_dir, def_etc_dir, constants as const @@ -70,6 +82,10 @@ NIST_ABBREV = "NIST" NIST_ROR = "ror:05xpvk416" +VER_DELIM = const.RELHIST_EXTENSION.lstrip('/') +FILE_DELIM = const.FILECMP_EXTENSION.lstrip('/') +LINK_DELIM = const.LINKCMP_EXTENSION.lstrip('/') +AGG_DELIM = const.AGGCMP_EXTENSION.lstrip('/') EXTSCHPROP = "_extensionSchemas" class DAPService(ProjectService): @@ -77,20 +93,9 @@ class DAPService(ProjectService): a project record request broker class for DAP records. This service allows a client to create and update DAP records in the form of NERDm Resource - records. - - This service extends the generic DBIO :py:class:`~nistoar.midas.dbio.project.ProjectService` by - supporting the following conventions: - * The data record being created/updated through the service is a NERDm Resource - * The NERDm Resource record is stored separately from the DBIO - :py:class:`~nistoar.midas.dbio.base.ProjectRecord` (via the - :py:mod:`~nistoar.midas.dap.nerdstore` module). The ``data`` property of the - :py:class:`~nistoar.midas.dbio.base.ProjectRecord` contains a summary (i.e. a subset of - properties) of the NERDm record. - * Conventions and heuristics are applied for setting default values for various NERDm - properties based on the potentially limited properties provided by the client during the - editing process. (These conventions and hueristics are implemented the in various - ``_moderate_*`` functions in this class. + records according to the mds3 conventions. See this + :py:module:`module's documentation ` for + a summary of the supported conventions. In addition to the configuration parameters supported by the parent class, this specialization also supports the following parameters: @@ -247,18 +252,18 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: prec.data = self._summarize(nerd) if data: - self.update_data(prec.id, data, prec=prec, nerd=nerd) # this will call prec.save() + self._update_data(prec.id, data, prec=prec, nerd=nerd) # this will call prec.save() else: prec.save() except Exception as ex: if nerd: try: - nerd.delete() + self._store.delete(prec.id) except Exception as ex: self.log.error("Error while cleaning up NERDm data after create failure: %s", str(ex)) try: - prec.delete() + self.dbcli.delete_record(prec.id) except Exception as ex: self.log.error("Error while cleaning up DAP record after create failure: %s", str(ex)) raise @@ -273,7 +278,7 @@ def _new_data_for(self, recid, meta=None, schemaid=None): ("@context", NERDM_CONTEXT), (EXTSCHPROP, [NERDMPUB_DEF + "PublicDataResource"]), ("@id", self._arkid_for(recid)), - ("@type", [":".join([NERDMPUB_PRE, "PublicDataResource"]), "dcat:Resource"]) + ("@type", [NERDMPUB_PRE + ":PublicDataResource", "dcat:Resource"]) ]) if self.cfg.get('assign_doi') == ASSIGN_DOI_ALWAYS: @@ -283,9 +288,9 @@ def _new_data_for(self, recid, meta=None, schemaid=None): if meta.get("resourceType"): addtypes = [] if meta['resourceType'].lower() == "software": - addtypes = [":".join([NERDPUB_PRE, "Software"])] + addtypes = [":".join([NERDMSW_PRE, "SoftwarePublication"])] elif meta['resourceType'].lower() == "srd": - addtypes = [":".join([NERDPUB_PRE, "SRD"])] + addtypes = [":".join([NERDMPUB_PRE, "SRD"])] out["@type"] = addtypes + out["@type"] if meta.get("softwareLink"): @@ -332,7 +337,7 @@ def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): if isinstance(out.get('creatorisContact'), str): out['creatorisContact'] = out['creatorisContact'].lower() == "true" elif out.get('creatorisContact') is None: - out['creatorisContact'] = true + out['creatorisContact'] = True return out @@ -387,6 +392,10 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.authors.get_data() elif part == "authors": out = nerd.references.get_data() + elif part == FILE_DELIM: + out = nerd.files.get_data() + elif part == LINK_DELIM: + out = nerd.nonfiles.get_data() elif part == "components": out = nerd.nonfiles.get_data() + nerd.files.get_data() else: @@ -474,6 +483,10 @@ def clear_data(self, id, part=None, _prec=None): nerd.authors.empty() elif part == "references": nerd.references.empty() + elif part == FILE_DELIM: + nerd.files.empty() + elif part == LINK_DELIM: + nerd.nonfiles.empty() elif part == "components": nerd.files.empty() nerd.nonfiles.empty() @@ -600,7 +613,7 @@ def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mappin raise InvalidUpdate("Input metadata data would create invalid record (%d errors detected)" % len(errors), prec.id, errors=errors) elif len(errors) == 1: - raise InvalidUpdate("Input validation error: "+errors[0], prec.id, errors=errors) + raise InvalidUpdate("Input validation error: "+str(errors[0]), prec.id, errors=errors) # all data is merged and validated; now commit nerd.replace_res_data(newdata) @@ -638,8 +651,13 @@ def put_each_into(data, objlist): return nerd.get_data(True) - def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data: Mapping, - replace=False, doval=True): + def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, + data: Union[list, Mapping], replace=False, doval=True): + # update just part of the NERDm metadata as given by path. The path identifies which + # NERDm Resource property to update; the data parameter is expected to be of a JSONSchema + # type that matches that property. Two special path values, FILE_DELIM ("pdr:f") and + # LINK_DELIM ("pdr:see") are taken to refer to the list of file and non-file components, + # respectively. schemabase = prec.data.get("_schema") or NERDMPUB_SCH_ID @@ -659,7 +677,11 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data["_schema"] = schemabase+"/definitions/BibliographicReference" data = self._update_listitem(nerd.references, self._moderate_reference, data, key, replace, doval) - elif m.group(1) == "components": + elif m.group(1) == LINK_DELIM: + data["_schema"] = schemabase+"/definitions/Component" + data = self._update_listitem(nerd.nonfiles, self._moderate_nonfile, data, key, + replace, doval) + elif m.group(1) == "components" or m.group(1) == FILE_DELIM: data["_schema"] = schemabase+"/definitions/Component" data = self._update_component(nerd, data, key, replace, doval=doval) else: @@ -683,22 +705,42 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, else: data = self._update_objlist(nerd.references, self._moderate_reference, data, doval) - elif path == "components": + elif path == LINK_DELIM: + if not isinstance(data, list): + err = "non-file (links) data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + if replace: + data = self._replace_objlist(nerd.nonfies, self._moderate_nonfile, data, doval) + else: + data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) + + elif path == FILE_DELIM: + if not isinstance(data, list): + err = "components data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + + elif path == "components" or path == FILE_DELIM: if not isinstance(data, list): err = "components data is not a list" raise InvalidUpdate(err, id, path, errors=[err]) files, nonfiles = self._merge_comps_for_update(nerd, data, replace, doval) if replace: - nerd.nonfiles.empty() + if path == "components": + nerd.nonfiles.empty() nerd.files.empty() - for cmp in nonfiles: - if cmp.get("@id"): - nerd.nonfiles.set(cmp['@id']) - else: - nerd.nonfiles.append(cmp) + if path == "components": + for cmp in nonfiles: + if cmp.get("@id"): + nerd.nonfiles.set(cmp['@id']) + else: + nerd.nonfiles.append(cmp) for cmp in files: nerd.files.set_file_at(cmp) - data = nerd.nonfiles.get_data() + nerd.files.get_files() + + if path == "components": + data = nerd.nonfiles.get_data() + nerd.files.get_files() + else: + data = nerd.files.get_files() elif path == "contactPoint": if not isinstance(data, Mapping): @@ -748,7 +790,7 @@ def set_file_component(self, id, filemd, filepath=None): the "@id" property is set, it will be ignored. :param str filepath: the path within the dataset to assign to the file. If provided, it will override the corresponding value in ``filemd``; if not - provided, the filepath must be set within ``filemd``. + provided, the ``filepath`` property must be set within ``filemd``. """ prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if filepath: @@ -1601,18 +1643,22 @@ class DAPServiceFactory(ProjectServiceFactory): """ def __init__(self, dbclient_factory: DBClientFactory, config: Mapping={}, log: Logger=None, - project_coll: str=None): + nerdstore: NERDResourceStorage=None, project_coll: str=None): """ create a service factory associated with a particulr DB backend. :param DBClientFactory dbclient_factory: the factory instance to use to create a DBClient to talk to the DB backend. :param Mapping config: the configuration for the service (see class-level documentation). :param Logger log: the Logger to use in the service. + :param NERDResourceStorage nerdstore: the NERDResourceStorage instance to use to access NERDm + records. If not provided, one will be created based on the given + configuration (in the ``nerdstore`` parameter). :param str project_coll: the project type (i.e. the DBIO project collection to access); default: "dap". """ if not project_coll: project_coll = DAP_PROJECTS + self._nerdstore = nerdstore super(DAPServiceFactory, self).__init__(project_coll, dbclient_factory, config, log) def create_service_for(self, who: PubAgent=None): @@ -1620,15 +1666,100 @@ def create_service_for(self, who: PubAgent=None): create a service that acts on behalf of a specific user. :param PubAgent who: the user that wants access to a project """ - return DAPService(self._dbclifact, self._cfg, who, self._log, self._prjtype) + return DAPService(self._dbclifact, self._cfg, who, self._log, self._nerdstore, self._prjtype) class DAPApp(MIDASProjectApp): """ - A MIDAS SubApp supporting a DAP service + A MIDAS SubApp supporting a DAP service following the mds3 conventions """ - def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, project_coll: str=None): - service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) - super(DAPApp, self).__init__(service_factory, log.getChild(DAP_PROJECTS), config) + def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, + service_factory: ProjectServiceFactory=None, project_coll: str=None): + if not project_coll: + project_coll = DAP_PROJECTS + if not service_factory: + service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) + super(DAPApp, self).__init__(service_factory, log.getChild(project_coll), config) + self._data_update_handler = DAPProjectDataHandler +class DAPProjectDataHandler(ProjectDataHandler): + """ + A :py:class:`~nistoar.midas.wsgi.project.ProjectDataHandler` specialized for editing NERDm records. + """ + _allowed_post_paths = "authors references components".split() + [FILE_DELIM, LINK_DELIM] + + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, datapath: str, config: dict=None, log: Logger=None): + super(DAPProjectDataHandler, self).__init__(service, subapp, wsgienv, start_resp, who, + id, datapath, config, log) + + def do_GET(self, path, ashead=False): + """ + respond to a GET request + :param str path: a path to the portion of the data to get. This is the same as the `datapath` + given to the handler constructor. This will be an empty string if the full + data object is requested. + :param bool ashead: if True, the request is actually a HEAD request for the data + """ + try: + out = self.svc.get_nerdm_data(self._id, path) + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except dbio.ObjectNotFound as ex: + if ex.record_part: + return self.send_error_resp(404, "Data property not found", + "No data found at requested property", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + return self.send_json(out) + + def do_POST(self, path): + """ + respond to a POST request. Allowed paths include "authors", "references", "components", + "pdr:f" (for files), and "pdr:see" (for non-file components). + :param str path: a path to the portion of the data to get. This is the same as the `datapath` + given to the handler constructor. This will be an empty string if the full + data object is requested. + :param bool ashead: if True, the request is actually a HEAD request for the data + """ + try: + newdata = self.get_json_body() + except self.FatalError as ex: + return self.send_fatal_error(ex) + + try: + if not self.svc.dbcli.exists(self._id): + return self.end_error_resp(404, "ID not found" + "Record with requested identifier not found", self._id) + + if path == "authors": + self.svc.add_author(self._id, newdata) + elif path == "references": + self.svc.add_reference(self._id, newdata) + elif path == FILE_DELIM: + self.svc.set_file_component(self._id, newdata) + elif path == LINK_DELIM: + self.svc.add_nonfile_component(self._id, newdata) + elif path == "components": + if 'filepath' in newdata: + self.svc.set_file_component(self._id, newdata) + else: + self.svc.add_nonfile_component(self._id, newdata) + + else: + return self.send_error_resp(405, "POST not allowed", + "POST not supported on path") + + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except ObjectNotFound as ex: + return send.send_error_resp(404, "Path not found", + "Requested path not found within record", self._id) + except InvalidUpdate as ex: + return self.send_error_resp(400, "Invalid Input Data", str(ex), self._id) + except PartNotAccessible as ex: + return self.send_error_resp(405, "Data part not updatable", + "Requested part of data cannot be updated", self._id) + + diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py index c88fa5a..276fe04 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -104,7 +104,7 @@ def get_json_body(self): if self._reqrec: self._reqrec.add_body_text(body).record() raise self.FatalError(400, "Input not parseable as JSON", - "Input document is not parse-able as JSON: "+str(ex), sipid) + "Input document is not parse-able as JSON: "+str(ex)) except Exception as ex: if self._reqrec: diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index f713f7b..49dd91b 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -22,79 +22,7 @@ from ...dbio import ProjectRecord, ProjectService, ProjectServiceFactory from .base import DBIOHandler - -class MIDASProjectApp(SubApp): - """ - a base web app for an interface handling project record - """ - - def __init__(self, service_factory: ProjectServiceFactory, log: Logger, config: dict={}): - super(MIDASProjectApp, self).__init__(service_factory._prjtype, log, config) - self.svcfact = service_factory - - def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: - """ - return a handler instance to handle a particular request to a path - :param Mapping env: the WSGI environment containing the request - :param Callable start_resp: the start_resp function to use initiate the response - :param str path: the path to the resource being requested. This is usually - relative to a parent path that this SubApp is configured to - handle. - :param PubAgent who the authenticated user agent making the request - """ - - # create a service on attached to the user - service = self.svcfact.create_service_for(who) - - # now parse the requested path; we have different handlers for different types of paths - path = path.strip('/') - idattrpart = path.split('/', 2) - if len(idattrpart) < 2: - if not idattrpart[0]: - # path is empty: this is used to list all available projects or create a new one - return ProjectSelectionHandler(service, self, env, start_resp, who) - else: - # path is just an ID: - return ProjectHandler(service, self, env, start_resp, who, idattrpart[0]) - - elif idattrpart[1] == "name": - # path=ID/name: get/change the mnumonic name of record ID - return ProjectNameHandler(service, self, env, start_resp, who, idattrpart[0]) - elif idattrpart[1] == "data": - # path=ID/data[/...]: get/change the content of record ID - if len(idattrpart) == 2: - idattrpart.append("") - return ProjectDataHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) - elif idattrpart[1] == "acls": - # path=ID/acls: get/update the access control on record ID - if len(idattrpart) < 3: - idattrpart.append("") - return ProjectACLsHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) - - # the fallback handler will return some arbitrary part of the record - if len(idattrpart) > 2: - idattrpart[1] = "/".join(idattrpart[1:]) - return ProjectInfoHandler(service, self, env, start_resp, who, idattrpart[0], idattrpart[1]) - - class _factory: - def __init__(self, project_coll): - self._prjcoll = project_coll - def __call__(self, dbcli_factory: dbio.DBClientFactory, log: Logger, config: dict={}, - prjcoll: str=None): - if not prjcoll: - prjcoll = self._prjcoll - service_factory = ProjectServiceFactory(prjcoll, dbcli_factory, config, log) - return MIDASProjectApp(service_factory, log, config) - - @classmethod - def factory_for(cls, project_coll): - """ - return a factory function that instantiates this class connected to the given DBIO collection. - This is intended for plugging this SubApp into the main WSGI app as is. - :param str project_coll: the name of the DBIO project collection to use for creating and - updating project records. - """ - return cls._factory(project_coll) +__all__ = ["MIDASProjectHandler", "ProjectDataHandler"] class ProjectRecordHandler(DBIOHandler): """ @@ -357,7 +285,7 @@ def do_PUT(self, path): return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id) except InvalidUpdate as ex: - return self.send_error_resp(400, "Invalid Input Data", str(ex)) + return self.send_error_resp(400, "Invalid Input Data", ex.format_errors()) except PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") @@ -377,8 +305,9 @@ def do_PATCH(self, path): except dbio.ObjectNotFound as ex: return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id) - except InvalidUpdate as ex: - return self.send_error_resp(400, "Invalid Input Data", str(ex)) + except dbio.InvalidUpdate as ex: + return self.send_error_resp(400, "Submitted data creates an invalid record", + ex.format_errors()) except PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") @@ -469,6 +398,9 @@ def do_POST(self, path): return self.send_unauthorized() except dbio.AlreadyExists as ex: return self.send_error_resp(400, "Name already in use", str(ex)) + except dbio.InvalidUpdate as ex: + return self.send_error_resp(400, "Submitted data creates an invalid record", + ex.format_errors()) return self.send_json(prec.to_dict(), "Project Created", 201) @@ -702,6 +634,88 @@ def do_DELETE(self, path): "Updating specified permission is not allowed") +class MIDASProjectApp(SubApp): + """ + a base web app for an interface handling project record + """ + _selection_handler = ProjectSelectionHandler + _update_handler = ProjectHandler + _name_update_handler = ProjectNameHandler + _data_update_handler = ProjectDataHandler + _acls_update_handler = ProjectACLsHandler + _info_update_handler = ProjectInfoHandler + + def __init__(self, service_factory: ProjectServiceFactory, log: Logger, config: dict={}): + super(MIDASProjectApp, self).__init__(service_factory._prjtype, log, config) + self.svcfact = service_factory + + def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: + """ + return a handler instance to handle a particular request to a path + :param Mapping env: the WSGI environment containing the request + :param Callable start_resp: the start_resp function to use initiate the response + :param str path: the path to the resource being requested. This is usually + relative to a parent path that this SubApp is configured to + handle. + :param PubAgent who the authenticated user agent making the request + """ + + # create a service on attached to the user + service = self.svcfact.create_service_for(who) + + # now parse the requested path; we have different handlers for different types of paths + path = path.strip('/') + idattrpart = path.split('/', 2) + if len(idattrpart) < 2: + if not idattrpart[0]: + # path is empty: this is used to list all available projects or create a new one + return self._selection_handler(service, self, env, start_resp, who) + else: + # path is just an ID: + return self._update_handler(service, self, env, start_resp, who, idattrpart[0]) + + elif idattrpart[1] == "name": + # path=ID/name: get/change the mnumonic name of record ID + return self._name_update_handler(service, self, env, start_resp, who, idattrpart[0]) + elif idattrpart[1] == "data": + # path=ID/data[/...]: get/change the content of record ID + if len(idattrpart) == 2: + idattrpart.append("") + return self._data_update_handler(service, self, env, start_resp, who, + idattrpart[0], idattrpart[2]) + elif idattrpart[1] == "acls": + # path=ID/acls: get/update the access control on record ID + if len(idattrpart) < 3: + idattrpart.append("") + return self._acls_update_handler(service, self, env, start_resp, who, + idattrpart[0], idattrpart[2]) + + # the fallback handler will return some arbitrary part of the record + if len(idattrpart) > 2: + idattrpart[1] = "/".join(idattrpart[1:]) + return self._info_update_handler(service, self, env, start_resp, who, + idattrpart[0], idattrpart[1]) + + class _factory: + def __init__(self, project_coll): + self._prjcoll = project_coll + def __call__(self, dbcli_factory: dbio.DBClientFactory, log: Logger, config: dict={}, + prjcoll: str=None): + if not prjcoll: + prjcoll = self._prjcoll + service_factory = ProjectServiceFactory(prjcoll, dbcli_factory, config, log) + return MIDASProjectApp(service_factory, log, config) + + @classmethod + def factory_for(cls, project_coll): + """ + return a factory function that instantiates this class connected to the given DBIO collection. + This is intended for plugging this SubApp into the main WSGI app as is. + :param str project_coll: the name of the DBIO project collection to use for creating and + updating project records. + """ + return cls._factory(project_coll) + From ae545dcc045883f8c019e4fa7f12d859a57aa5ad Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 18 Feb 2023 15:57:15 -0500 Subject: [PATCH 051/123] more debugging via the wsgi interface --- python/nistoar/midas/dap/service/mds3.py | 81 +-- python/nistoar/midas/dbio/wsgi/project.py | 6 +- .../midas/dap/service/test_mds3_app.py | 460 ++++++++++++++++++ 3 files changed, 514 insertions(+), 33 deletions(-) create mode 100644 python/tests/nistoar/midas/dap/service/test_mds3_app.py diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index b1f5ea0..24e48fc 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -35,6 +35,7 @@ from nistoar.pdr.publish.prov import PubAgent from . import validate +from .. import nerdstore from ..nerdstore import NERDResource, NERDResourceStorage, NERDResourceStorageFactory ASSIGN_DOI_NEVER = 'never' @@ -86,6 +87,7 @@ FILE_DELIM = const.FILECMP_EXTENSION.lstrip('/') LINK_DELIM = const.LINKCMP_EXTENSION.lstrip('/') AGG_DELIM = const.AGGCMP_EXTENSION.lstrip('/') +RES_DELIM = const.RESONLY_EXTENSION.lstrip('/') EXTSCHPROP = "_extensionSchemas" class DAPService(ProjectService): @@ -352,7 +354,10 @@ def get_nerdm_data(self, id: str, part: str=None): return the full NERDm metadata. This differs from the :py:method:`get_data` method which (in this implementation) only returns a summary of hte NERDm metadata. :param str id: the identifier for the record whose NERDm data should be returned. - :param str part: a path to the part of the record that should be returned + :param str part: a path to the part of the record that should be returned. This can be the + name of a top level NERDm property or one of the following special values: + * ``pdr:f`` -- returns only the file-like components (files and subcollections) + * ``pdr:see`` -- returns only the non-file components (like links) """ prec = self.dbcli.get_record_for(id, ACLs.READ) # may raise ObjectNotFound/NotAuthorized nerd = self._store.open(prec.id) @@ -361,10 +366,10 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.get_data() else: - m = re.search(r'^([a-z]+s)\[([\w\d]+)\]$', path) + m = re.search(r'^([a-z]+s)\[([\w\d/#\.]+)\]$', part) if m: - # path is of the form xxx[k] and refers to an item in a list - key = m.group(3) + # part is of the form xxx[k] and refers to an item in a list + key = m.group(2) try: key = int(key) except ValueError: @@ -374,6 +379,10 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.authors.get(key) elif m.group(1) == "references": out = nerd.reference.get(key) + elif m.group(1) == LINK_DELIM: + out = nerd.nonfiles.get(key) + elif m.group(1) == FILE_DELIM: + out = nerd.files.get(key) elif m.group(1) == "components": out = None try: @@ -383,27 +392,33 @@ def get_nerdm_data(self, id: str, part: str=None): if not out: try: out = nerd.files.get_file_by_id(key) - except ObjectNotFound as ex: - pass - if not out: - out = nerd.files.get_file_by_path(key) + except nerdstore.ObjectNotFound as ex: + raise ObjectNotFound(id, part, str(ex)) elif part == "authors": out = nerd.authors.get_data() - elif part == "authors": + elif part == "references": out = nerd.references.get_data() + elif part == "components": + out = nerd.nonfiles.get_data() + nerd.files.get_data() elif part == FILE_DELIM: out = nerd.files.get_data() elif part == LINK_DELIM: out = nerd.nonfiles.get_data() - elif part == "components": - out = nerd.nonfiles.get_data() + nerd.files.get_data() + elif part == RES_DELIM: + out = nerd.get_res_data() + elif part.startswith(FILE_DELIM+"/"): + fprts = part.split('/', 1) + try: + out = nerd.files.get_file_by_path(fprts[1]) + except nerdstore.ObjectNotFound as ex: + raise ObjectNotFound(id, part, str(ex)) else: out = nerd.get_res_data() if part in out: out = out[part] else: - raise PartNotAccessible(prec.id, path, "Accessing %s not supported" % path) + raise PartNotAccessible(prec.id, part, "Accessing %s not supported" % part) return out @@ -714,10 +729,10 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, else: data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) - elif path == FILE_DELIM: - if not isinstance(data, list): - err = "components data is not a list" - raise InvalidUpdate(err, id, path, errors=[err]) + # elif path == FILE_DELIM: + # if not isinstance(data, list): + # err = "components data is not a list" + # raise InvalidUpdate(err, id, path, errors=[err]) elif path == "components" or path == FILE_DELIM: if not isinstance(data, list): @@ -805,7 +820,7 @@ def set_file_component(self, id, filemd, filepath=None): oldfile = None try: oldfile = nerd.files.get_file_by_path(filepath) # it must have na id - except ObjectNotFound as ex: + except nerdstore.ObjectNotFound as ex: pass return self._update_file_comp(nerd, data, oldfile, replace=True, doval=True) @@ -832,7 +847,10 @@ def update_file_component_at(self, id: str, filemd: Mapping, filepath: str=None) filemd['filepath'] = filepath nerd = self._store.open(id) - oldfile = nerd.files.get_file_by_path(filepath) # may raise ObjectNotFound + try: + oldfile = nerd.files.get_file_by_path(filepath) + except nerdstore.ObjectNotFound as ex: + raise ObjectNotFound(id, FILE_DELIM+"/"+filepath, str(ex), self._sys) return self._update_file_comp(nerd, filemd, oldfile, replace=False, doval=True) @@ -858,7 +876,10 @@ def update_file_component(self, id: str, filemd: Mapping, fileid: str=None): filemd['@id'] = fileid nerd = self._store.open(id) - oldfile = nerd.files.get_file_by_path(filepath) # may raise ObjectNotFound + try: + oldfile = nerd.files.get_file_by_path(filepath) + except nerdstore.ObjectNotFound as ex: + raise ObjectNotFound(id, FILE_DELIM+"/"+filepath, str(ex), self._sys) return self._update_file_comp(nerd, filemd, oldfile, replace=False, doval=True) @@ -1215,8 +1236,8 @@ def _moderate_text(self, val, resmd=None, doval=True): return val def _moderate_description(self, val, resmd=None, doval=True): - if not isinstance(val, list): - val = [val] + if isinstance(val, str): + val = val.split("\n\n") return [self._moderate_text(t, resmd, doval=doval) for t in val if t != ""] _pfx_for_type = OrderedDict([ @@ -1704,9 +1725,9 @@ def do_GET(self, path, ashead=False): """ try: out = self.svc.get_nerdm_data(self._id, path) - except dbio.NotAuthorized as ex: + except NotAuthorized as ex: return self.send_unauthorized() - except dbio.ObjectNotFound as ex: + except ObjectNotFound as ex: if ex.record_part: return self.send_error_resp(404, "Data property not found", "No data found at requested property", self._id, ashead=ashead) @@ -1734,18 +1755,18 @@ def do_POST(self, path): "Record with requested identifier not found", self._id) if path == "authors": - self.svc.add_author(self._id, newdata) + out = self.svc.add_author(self._id, newdata) elif path == "references": - self.svc.add_reference(self._id, newdata) + out = self.svc.add_reference(self._id, newdata) elif path == FILE_DELIM: - self.svc.set_file_component(self._id, newdata) + out = self.svc.set_file_component(self._id, newdata) elif path == LINK_DELIM: - self.svc.add_nonfile_component(self._id, newdata) + out = self.svc.add_nonfile_component(self._id, newdata) elif path == "components": if 'filepath' in newdata: - self.svc.set_file_component(self._id, newdata) + out = self.svc.set_file_component(self._id, newdata) else: - self.svc.add_nonfile_component(self._id, newdata) + out = self.svc.add_nonfile_component(self._id, newdata) else: return self.send_error_resp(405, "POST not allowed", @@ -1762,4 +1783,4 @@ def do_POST(self, path): return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated", self._id) - + return self.send_json(out, "Added", 201) diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 49dd91b..036494f 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -284,9 +284,9 @@ def do_PUT(self, path): except dbio.ObjectNotFound as ex: return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id) - except InvalidUpdate as ex: + except dbio.InvalidUpdate as ex: return self.send_error_resp(400, "Invalid Input Data", ex.format_errors()) - except PartNotAccessible as ex: + except dbio.PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") @@ -308,7 +308,7 @@ def do_PATCH(self, path): except dbio.InvalidUpdate as ex: return self.send_error_resp(400, "Submitted data creates an invalid record", ex.format_errors()) - except PartNotAccessible as ex: + except dbio.PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") diff --git a/python/tests/nistoar/midas/dap/service/test_mds3_app.py b/python/tests/nistoar/midas/dap/service/test_mds3_app.py new file mode 100644 index 0000000..71b2e28 --- /dev/null +++ b/python/tests/nistoar/midas/dap/service/test_mds3_app.py @@ -0,0 +1,460 @@ +import os, json, pdb, logging, tempfile, pathlib +import unittest as test +from io import StringIO +from copy import deepcopy +from collections import OrderedDict + +from nistoar.midas.dbio import inmem, base, AlreadyExists, InvalidUpdate, ObjectNotFound, PartNotAccessible +from nistoar.midas.dbio.wsgi import project as prj +from nistoar.midas.dap.service import mds3 +from nistoar.midas.dap.nerdstore.inmem import InMemoryResourceStorage +from nistoar.pdr.publish import prov +from nistoar.pdr.utils import read_nerd +from nistoar.nerdm.constants import CORE_SCHEMA_URI + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_mds3.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_mds3.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +# test records +testdir = pathlib.Path(__file__).parents[0] +pdr2210 = testdir.parents[2] / 'pdr' / 'describe' / 'data' / 'pdr2210.json' +ncnrexp0 = testdir.parents[2] / 'pdr' / 'publish' / 'data' / 'ncnrexp0.json' + +class TestMDS3DAPApp(test.TestCase): + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mds3" + }, + "default": { + "default_shoulder": "mds3" + } + }, + "dbio": { + "superusers": [ "rlp" ], + "allowed_project_shoulders": ["mds3", "pdr1"], + "default_shoulder": "mds3" + }, + "assign_doi": "always", + "doi_naan": "10.88888", + "nerdstorage": { +# "type": "fsbased", +# "store_dir": os.path.join(tmpdir.name) + "type": "inmem", + } + } + self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mds3": 0 }}) + self.nerdstore = InMemoryResourceStorage(self.cfg["nerdstorage"]) + self.svcfact = mds3.DAPServiceFactory(self.dbfact, self.cfg, rootlog.getChild("midas.dap"), + self.nerdstore) + self.app = mds3.DAPApp(self.dbfact, rootlog.getChild("midas"), self.cfg, self.svcfact) + self.resp = [] + self.rootpath = "/midas/dap/mds3" + + def create_record(self, name="goob", meta=None): + cli = self.dbfact.create_client(base.DAP_PROJECTS, self.cfg["dbio"], nistr.actor) + out = cli.create_record(name, "pdr1") + if meta: + out.meta = meta + out.save() + return out + + def sudb(self): + return self.dbfact.create_client(base.DAP_PROJECTS, self.cfg["dbio"], "rlp") + + def test_create_handler_name(self): + path = "mdm1:0001/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectNameHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + self.assertEqual(hdlr._id, "mdm1:0001") + + def test_get_name(self): + path = "pdr1:0001/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + prec = self.create_record("goob") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "goob") + + self.resp = [] + path = "mds3:0001/name" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("404 ", self.resp[0]) + + def test_create(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": {"fn": "Gurn Cranston"} }, + "meta": { "resType": "Software" }})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("400 ", self.resp[0]) # input was missing name + + # TODO: this will succeed after we define the Software extension schema + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"meta": { "resourceType": "Software", + "creatorisContact": "false", + "softwareLink": "https://sw.ex/gurn" }, + "name": "Gurn's Opus" })) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "Gurn's Opus") + self.assertEqual(resp['id'], "mds3:0001") + self.assertEqual(resp['meta']["resourceType"], "Software") + self.assertEqual(resp['meta']["softwareLink"], "https://sw.ex/gurn") + self.assertIs(resp['meta']["creatorisContact"], False) + self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0001') + self.assertEqual(resp['data']['@type'], + [ "nrdw:SoftwarePublication", "nrdp:PublicDataResource", "dcat:Resource" ]) + + + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": {"fn": "Gurn Cranston"} }, + "meta": { "creatorisContact": "false", + "softwareLink": "https://sw.ex/gurn" }, + "name": "Gurn's Penultimate" })) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "Gurn's Penultimate") + self.assertEqual(resp['id'], "mds3:0002") + self.assertEqual(resp['meta']["resourceType"], "data") + self.assertEqual(resp['meta']["softwareLink"], "https://sw.ex/gurn") + self.assertIs(resp['meta']["creatorisContact"], False) + self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0002') + self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0002') + self.assertNotIn('contactPoint', resp['data']) # because ['data'] is just a summary + + self.resp = [] + path = resp['id'] + '/data' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['@id'], 'ark:/88434/mds3-0002') + self.assertEqual(resp['doi'], 'doi:10.88888/mds3-0002') + self.assertEqual(resp['contactPoint'], + {"fn": "Gurn Cranston", "@type": "vcard:Contact"}) + self.assertEqual(resp['@type'], + [ "nrdp:PublicDataResource", "dcat:Resource" ]) + self.assertIn('_schema', resp) + self.assertIn('_extensionSchemas', resp) + self.assertEqual(len(resp.get('components',[])), 1) + self.assertEqual(resp['components'][0]['accessURL'], "https://sw.ex/gurn") + self.assertEqual(len(resp), 8) + + def test_put_patch(self): + testnerd = read_nerd(pdr2210) + res = deepcopy(testnerd) + del res['references'] + del res['components'] + del res['@id'] + del res['_schema'] + del res['_extensionSchemas'] + + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": res['contactPoint'] }, + "meta": { "creatorisContact": "false" }, + "name": "OptSortSph" })) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "OptSortSph") + self.assertEqual(resp['id'], "mds3:0001") + self.assertEqual(resp['meta']["resourceType"], "data") + self.assertIs(resp['meta']["creatorisContact"], False) + self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0001') + self.assertNotIn('contactPoint', resp['data']) # because ['data'] is just a summary + + self.resp = [] + id = resp['id'] + path = id + '/data' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['doi'], 'doi:10.88888/mds3-0001') + self.assertEqual(resp['contactPoint'], + {"fn": "Zachary Levine", "@type": "vcard:Contact", + "hasEmail": "mailto:zachary.levine@nist.gov" }) + self.assertEqual(resp['@type'], + [ "nrdp:PublicDataResource", "dcat:Resource" ]) + self.assertIn('_schema', resp) + self.assertIn('_extensionSchemas', resp) + self.assertNotIn('components', resp) + self.assertNotIn('authors', resp) + self.assertNotIn('description', resp) + self.assertNotIn('rights', resp) + self.assertEqual(len(resp), 7) + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps(res)) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['doi'], 'doi:10.88888/mds3-0001') + self.assertEqual(resp['contactPoint'], + {"fn": "Zachary Levine", "@type": "vcard:Contact", + "hasEmail": "mailto:zachary.levine@nist.gov" }) + self.assertEqual(len(resp['description']), 1) + self.assertNotIn('references', resp) + self.assertNotIn('authors', resp) + self.assertIn('description', resp) + + self.resp = [] + path = id + '/data/authors' + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({ + "fn": "Levine, Zachary", + "givenName": "Zachary", + "familyName": "Levine", + "affiliation": "NIST" + })) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp["givenName"], "Zachary") + self.assertEqual(len(resp["affiliation"]), 1) + self.assertIn("@id", resp) + + files = [c for c in testnerd['components'] if 'filepath' in c] + self.resp = [] + path = id + '/data/components' + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps(files)) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertTrue(isinstance(resp, list)) + self.assertEqual(len(resp), len(files)) + + links = [c for c in testnerd['components'] if 'filepath' not in c] + self.assertEqual(len(links), 1) + self.resp = [] + path = id + '/data/components' + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps(links)) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertTrue(isinstance(resp, list)) + self.assertEqual(len(resp), len(files)+len(links)) + self.assertEqual(resp[0]['accessURL'], "https://doi.org/10.18434/T4SW26") + + self.resp = [] + path = id + '/data/pdr:f' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertTrue(isinstance(resp, list)) + self.assertEqual(len(resp), len(files)) + self.assertTrue(all('filepath' in c for c in resp)) + + self.resp = [] + path = id + '/data/pdr:f/trial3' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['filepath'], "trial3") + self.assertEqual(resp['@id'], "coll_2") + self.assertNotIn("downloadURL", resp) + + self.resp = [] + path = id + '/data/components[file_1]' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['filepath'], "trial2.json") + self.assertEqual(resp['@id'], "file_1") + + self.resp = [] + path = id + '/data/doi' + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("doi:10.88888/haha")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("405 ", self.resp[0]) + + self.resp = [] + path = id + '/data/rights' + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps("What ever.")) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, "What ever.") + + self.resp = [] + path = id + '/data/pdr:r' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['doi'], 'doi:10.88888/mds3-0001') + self.assertEqual(resp['rights'], 'What ever.') + self.assertIn('_schema', resp) + self.assertIn('_extensionSchemas', resp) + self.assertNotIn('components', resp) + self.assertNotIn('authors', resp) + self.assertIn('description', resp) + self.assertEqual(resp['rights'], "What ever.") + + + + + + + + + + + + + + + + +if __name__ == '__main__': + test.main() + + From 6b9982120cc907733b4f1537e4f2407fcc0cefd4 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 22 Feb 2023 08:08:11 -0500 Subject: [PATCH 052/123] dbio: introduce status and provenance storage --- python/nistoar/midas/dbio/base.py | 87 ++++++++- python/nistoar/midas/dbio/fsbased.py | 64 ++++++- python/nistoar/midas/dbio/inmem.py | 29 +++ python/nistoar/midas/dbio/mongo.py | 43 ++++- python/nistoar/midas/dbio/status.py | 172 ++++++++++++++++++ .../tests/nistoar/midas/dbio/test_fsbased.py | 60 ++++++ python/tests/nistoar/midas/dbio/test_inmem.py | 107 +++++++++++ python/tests/nistoar/midas/dbio/test_mongo.py | 54 +++++- .../tests/nistoar/midas/dbio/test_status.py | 80 ++++++++ 9 files changed, 692 insertions(+), 4 deletions(-) create mode 100644 python/nistoar/midas/dbio/status.py create mode 100644 python/tests/nistoar/midas/dbio/test_status.py diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index f92e3c9..7445d6d 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -20,7 +20,9 @@ from datetime import datetime from nistoar.base.config import ConfigurationException +from nistoar.pdr.publish.prov import Action from .. import MIDASException +from .status import ProjectStatus DAP_PROJECTS = "dap" DMP_PROJECTS = "dmp" @@ -277,6 +279,14 @@ def reactivate(self) -> bool: self._data['deactivated'] = None return True + @property + def status(self) -> ProjectStatus: + """ + return the status object that indicates the current state of the record and the last + action applied to it. + """ + return ProjectStatus(self._data.get('status', {})) + @property def acls(self) -> ACLs: """ @@ -713,7 +723,7 @@ def meta(self, data: Mapping): def __str__(self): return "<{} ProjectRecord: {} ({}) owner={}>".format(self._coll.rstrip("s"), self.id, self.name, self.owner) - + class DBClient(ABC): """ a client connected to the database for a particular service (e.g. drafting, DMPs, etc.) @@ -1044,6 +1054,81 @@ def delete_record(self, id: str) -> bool: self._delete_from(self._projcoll, id) return True + def record_action(self, act: Action): + """ + save the given action record to the back-end store + """ + if not act.subject: + raise ValueError("record_action(): action is missing a subject identifier") + self._save_action_data(act.to_dict()) + + @abstractmethod + def _save_action_data(self, actdata: Mapping): + """ + save the given data to the action log collection + """ + raise NotImplementedError() + + @abstractmethod + def _select_actions_for(self, id: str) -> List[Mapping]: + """ + retrieve all actions currently recorded for the record with the given identifier + """ + raise NotImplementedError() + + @abstractmethod + def _delete_actions_for(self, id: str): + """ + purge all actions currently recorded for the record with the given identifier + """ + raise NotImplementedError() + + def _close_actionlog_with(self, rec: ProtectedRecord, close_action: Action, extra=None, + cancel_if_empty=True): + """ + archive all actions in the action log for a given ID, ending with the given action. + :param str id: the record identifier to select the actions for + :param Action close_action: the action that is effectively closing the record. This + is usually a SUBMIT action or a DELETE action. + """ + history = self._select_actions_for(rec.id) + if len(history) == 0 and cancel_if_empty: + return + history.append(close_action.to_dict()) + + # users with permission to read record can read the history, but only superusers + # can update it or administer it. + acls = OrderedDict([ + ("read", rec.acls._perms.get('read', [])) + ]) + + if 'recid' in extra or 'close_action' in extra: + extra = deepcopy(extra) + if 'recid' in extra: + del extra['recid'] + if 'close_action' in extra: + del extra['close_action'] + + archive = OrderedDict([ + ("recid", rec.id), + ("close_action", close_action.type) + ]) + archive.update(extra) + archive['acls'] = acls + archive['history'] = history + + self._save_history(archive) + self._delete_actions_for(rec.id) + + @abstractmethod + def _save_history(self, histrec): + """ + save the given history record to the history collection + """ + raise NotImplementedError() + + + class DBClientFactory(ABC): """ diff --git a/python/nistoar/midas/dbio/fsbased.py b/python/nistoar/midas/dbio/fsbased.py index b039e00..9d32497 100644 --- a/python/nistoar/midas/dbio/fsbased.py +++ b/python/nistoar/midas/dbio/fsbased.py @@ -146,12 +146,74 @@ def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base. # skip over corrupted records continue except IOError as ex: - raise DBIOException(recf+": file locking error: "+str(ex)) + raise base.DBIOException(recf+": file locking error: "+str(ex)) for p in perm: if rec.authorized(p): yield rec break + def _save_action_data(self, actdata: Mapping): + self._ensure_collection("action_log") + try: + recpath = self._root / 'action_log' / (actdata['subject']+".lis") + return self._append_json_to_listfile(actdata, recpath) + except KeyError as ex: + raise ValueError("_save_action_data(): Action is missing subject id") + except Exception as ex: + raise base.DBIOException(actdata['subject']+": Unable to append action: "+str(ex)) from ex + + # the action log list file contains one JSON object per line + def _append_json_to_listfile(self, data: Mapping, outpath: Path): + exists = outpath.exists() + with open(outpath, 'a') as fd: + fd.write(json.dumps(data)) + fd.write("\n") + return not exists + + # the action log list file contains one JSON object per line + def _load_from_listfile(self, inpath: Path): + if not inpath.exists(): + return [] + with open(inpath) as fd: + return [json.loads(line.strip()) for line in fd] + + def _select_actions_for(self, id: str) -> List[Mapping]: + self._ensure_collection("action_log") + recpath = self._root / 'action_log' / (id+".lis") + if not recpath.is_file(): + return [] + try: + return self._load_from_listfile(recpath) + except Exception as ex: + raise base.DBIOException(id+": Unable to read actions: "+str(ex)) + + def _delete_actions_for(self, id): + self._ensure_collection("action_log") + recpath = self._root / 'action_log' / (id+".lis") + if recpath.is_file(): + recpath.unlink() + + def _save_history(self, histrec): + if not histrec.get('recid'): + raise ValueError("_save_history(): History is missing record id") + self._ensure_collection("history") + + history = [] + recpath = self._root / 'history' / (histrec['recid']+".json") + if recpath.is_file(): + try: + history = read_json(str(recpath)) + except Exception as ex: + raise base.DBIOException(histrec['recid']+": Failed to read old history entries: "+str(ex)) + elif recpath.exists(): + raise base.DBIOException(str(recpath)+": not a file") + + history.append(histrec) + try: + write_json(history, str(recpath)) + except Exception as ex: + raise base.DBIOException(histrec['recid']+": Failed to write history entries: "+str(ex)) + class FSBasedDBClientFactory(base.DBClientFactory): """ a DBClientFactory that creates FSBasedDBClient instances in which records are stored in JSON diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index 0aede78..1d1d0f9 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -80,7 +80,36 @@ def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base. if rec.authorized(p): yield deepcopy(rec) break + + def _save_action_data(self, actdata: Mapping): + if 'subject' not in actdata: + raise ValueError("_save_action_data(): Missing subject property in action data") + id = actdata['subject'] + if 'action_log' not in self._db: + self._db['action_log'] = {} + if id not in self._db['action_log']: + self._db['action_log'][id] = [] + self._db['action_log'][id].append(actdata) + def _select_actions_for(self, id: str) -> List[Mapping]: + if 'action_log' not in self._db or id not in self._db['action_log']: + return [] + return deepcopy(self._db['action_log'][id]) + + def _delete_actions_for(self, id): + if 'action_log' not in self._db or id not in self._db['action_log']: + return + del self._db['action_log'][id] + + def _save_history(self, histrec): + if 'recid' not in histrec: + raise ValueError("_save_history(): Missing recid property in history data") + if 'history' not in self._db: + self._db['history'] = {} + if histrec['recid'] not in self._db['history']: + self._db['history'][histrec['recid']] = [] + self._db['history'][histrec['recid']].append(histrec) + class InMemoryDBClientFactory(base.DBClientFactory): """ diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index b67d005..596bbdd 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -7,7 +7,7 @@ from typing import Iterator, List from . import base -from pymongo import MongoClient +from pymongo import MongoClient, ASCENDING from nistoar.base.config import ConfigurationException, merge_config @@ -17,6 +17,8 @@ class MongoDBClient(base.DBClient): """ an implementation of DBClient using a MongoDB database as the backend store. """ + ACTION_LOG_COLL = 'action_log' + HISTORY_COLL = 'history' def __init__(self, dburl: str, config: Mapping, projcoll: str, foruser: str = base.ANONYMOUS): """ @@ -202,6 +204,45 @@ def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base. except Exception as ex: raise base.DBIOException("Failed while selecting records: " + str(ex), cause=ex) + def _save_action_data(self, actdata: Mapping): + try: + coll = self.native[self.ACTION_LOG_COLL] + result = coll.insert_one(actdata) + return True + + except base.DBIOException as ex: + raise + except Exception as ex: + raise base.DBIOException(actdata.get('subject',"id=?")+ + ": Failed to save action: "+str(ex)) from ex + + def _select_actions_for(self, id: str) -> List[Mapping]: + try: + coll = self.native[self.ACTION_LOG_COLL] + return [rec for rec in coll.find({'subject': id}, {'_id': False}).sort("timestamp", ASCENDING)] + except Exception as ex: + raise base.DBIOException(id+": Failed to select action records: "+str(ex)) from ex + + def _delete_actions_for(self, id): + try: + coll = self.native[self.ACTION_LOG_COLL] + result = coll.delete_many({'subject': id}) + return result.deleted_count > 0 + except Exception as ex: + raise base.DBIOException(id+": Failed to delete action records: "+str(ex)) from ex + + def _save_history(self, histrec): + try: + coll = self.native[self.HISTORY_COLL] + result = coll.insert_one(histrec) + return True + except base.DBIOException as ex: + raise + except Exception as ex: + raise DBIOEception(histrec.get('recid', "id=?")+": Failed to save history entry: "+str(ex)) \ + from ex + + class MongoDBClientFactory(base.DBClientFactory): """ a DBClientFactory that creates MongoDBClient instances in which records are stored in a MongoDB diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py new file mode 100644 index 0000000..ca6f1a8 --- /dev/null +++ b/python/nistoar/midas/dbio/status.py @@ -0,0 +1,172 @@ +""" +Module for tracking the status of a project record. + +Note that this module is similar in intent and implementation to +:py:mod:`nistoar.pdr.publish.service.status` but has implemented to different requirements. +""" +import math +from collections.abc import Mapping +from time import time +from datetime import datetime + +from nistoar.pdr.publish.prov import Action + +EDIT = "edit" # Record is currently being edit for a new released version +PROCESSING = "processing" # Record is being processed at the moment and cannot be updated + # further until this processing is complete. +SUBMITTED = "submitted" # Record has been submitted and is either processed or is under review +ACCEPTED = "accepted" # Record has been reviewed and is being processed for release +INPRESS = "in press" # Record was submitted to the publishing service and is still being processed +PUBLISHED = "published" # Record was successfully preserved and released +UNWELL = "unwell" # Record is in a state that does not allow it to be further processed or + # updated and requires administrative care to restore it to a usable state +_state_p = "state" +_since_p = "since" +_action_p = "action" +_modified_p = "modified" +_message_p = "message" + +class ProjectStatus: + """ + a class that holds the current status of a project, aggregating multiple pieces of information about + the projects state and the last action applied to it. + """ + + def __init__(self, id: str, status_data: Mapping): + """ + wrap the status information for a particular project record + :param str id: the project identifier that this status object belongs to + :param Mapping status_data: the dictionary containing the project's status data. This + data usually comes from the ``status`` the internal property of a + :py:class:`~nistoar.midas.dbio.project.ProjectRecord`. This class will + manipulate the given dictionary directly without making a copy. + """ + self._id = id + self._data = status_data + if not self._data.get(_state_p): + self._data[_state_p] = EDIT + if not self._data.get(_action_p): + self._data[_action_p] = Action.CREATE + + # try to keep since <= modified by default + if _since_p not in self._data or not isinstance(self._data[_since_p], int): + self._data[_since_p] = self._data.get(_modified_p) \ + if isinstance(self._data.get(_modified_p), int) else 0 + if _modified_p not in self._data or not isinstance(self._data[_modified_p], int): + self._data[_modified_p] = -1 if self._data[_since_p] < 0 else 0 + + if self._data[_since_p] < 0: + self._data[_since_p] = time() + if self._data[_modified_p] < 0: + self._data[_modified_p] = time() + + if _message_p not in self._data: + self._data[_message_p] = "" + elif not isinstance(self._data[_message_p], str): + self._data[_message_p] = str(self._data[_message_p]) + + @property + def id(self) -> str: + """ + the identifier for the project record this object provides the status for + """ + return self._id + + @property + def state(self) -> str: + """ + One of a set of enumerated values indicating a distinct stage of the record's evolution. + """ + return self._data[_state_p] + + @property + def since(self) -> int: + """ + The epoch timestamp when the record entered the current state + """ + return self._data[_since_p] + + @property + def since_date(self) -> str: + """ + the timestamp for when the record entered the current state, formatted as an ISO string + """ + if self.since <= 0: + return "pending" + return datetime.fromtimestamp(math.floor(self.since)).isoformat() + + @property + def action(self) -> str: + """ + The name of the last action applied to the project. In general the actions that can be applied + to a project record are project-type-specific; however, there are a set of common actions. + """ + return self._data[_action_p] + + @property + def modified(self) -> int: + """ + The epoch timestamp when the latest action was applied to the record. + """ + return self._data[_modified_p] + + @property + def modified_date(self) -> str: + """ + The timestamp when the latest action was applied to the record, formatted as an ISO string + """ + if self.modified <= 0: + return "pending" + return datetime.fromtimestamp(math.floor(self.modified)).isoformat() + + @property + def message(self) -> str: + """ + A statement providing further description of about the last action. The message may be + provided by the requesting user (to record the intent of the action) or set by default + by the project service. + """ + return self._data[_message_p] + + @message.setter + def message(self, val): + self._data[_message_p] = val + + def act(self, action: str, message: str="", when: int=0): + """ + record the application of a particular action on the project + :param str action: the name of the action being applied + :param str message: a statement indicating the reason or intent of the action + :param int when: the epoch timestamp for when the action was applied. A value of + zero (default) indicates that the timestamp should be set when the + project record is saved. A value less than zero will cause + the current time to be set. + """ + if not action: + raise ValueError("Action not specified") + if message is None: + message = "" + if when < 0: + when = time() + + self._data[_action_p] = action + self._data[_message_p] = message + self._data[_modified_p] = when + + def set_state(self, state, when: int=-1): + """ + record a new state that the project record has entered. + :param str state: the name of the new state that the record has entered + :param int when: the epoch timestamp for when the state changed. A value of + zero indicates that the timestamp should be set when the + project record is saved. A value less than zero (default) will + cause the current time to be set. + """ + if not state: + raise ValueError("State not specified") + if when < 0: + when = time() + + self._data[_state_p] = state + self._data[_since_p] = when + diff --git a/python/tests/nistoar/midas/dbio/test_fsbased.py b/python/tests/nistoar/midas/dbio/test_fsbased.py index bc9f5fa..89a553f 100644 --- a/python/tests/nistoar/midas/dbio/test_fsbased.py +++ b/python/tests/nistoar/midas/dbio/test_fsbased.py @@ -242,8 +242,68 @@ def test_select_records(self): self.assertTrue(isinstance(recs[0], base.ProjectRecord)) self.assertEqual(recs[0].id, id) + def test_action_log_io(self): + with self.assertRaises(ValueError): + self.cli._save_action_data({'goob': 'gurn'}) + + recpath = self.cli._root / "action_log" / "goob:gurn.lis" + self.assertTrue(not recpath.exists()) + self.cli._save_action_data({'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertTrue(recpath.exists()) + with open(recpath) as fd: + lines = fd.readlines() + self.assertEqual(len(lines), 1) + self.assertEqual(json.loads(lines[0]), {'subject': 'goob:gurn', 'foo': 'bar'}) + + self.cli._save_action_data({'subject': 'goob:gurn', 'bob': 'alice'}) + with open(recpath) as fd: + lines = fd.readlines() + self.assertEqual(len(lines), 2) + self.assertEqual(json.loads(lines[0]), {'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(json.loads(lines[1]), {'subject': 'goob:gurn', 'bob': 'alice'}) + recpath = self.cli._root / "action_log" / "grp0001.lis" + self.assertTrue(not recpath.exists()) + self.cli._save_action_data({'subject': 'grp0001', 'dylan': 'bob'}) + self.assertTrue(recpath.exists()) + with open(recpath) as fd: + lines = fd.readlines() + self.assertEqual(len(lines), 1) + self.assertEqual(json.loads(lines[0]), {'subject': 'grp0001', 'dylan': 'bob'}) + + acts = self.cli._select_actions_for("goob:gurn") + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0], {'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(acts[1], {'subject': 'goob:gurn', 'bob': 'alice'}) + acts = self.cli._select_actions_for("grp0001") + self.assertEqual(len(acts), 1) + self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) + + self.cli._delete_actions_for("grp0001") + self.assertTrue(not recpath.exists()) + recpath = self.cli._root / "action_log" / "goob:gurn.lis" + self.assertTrue(recpath.exists()) + self.cli._delete_actions_for("goob:gurn") + self.assertTrue(not recpath.exists()) + + self.assertEqual(self.cli._select_actions_for("goob:gurn"), []) + self.assertEqual(self.cli._select_actions_for("grp0001"), []) + + def test_save_history(self): + with self.assertRaises(ValueError): + self.cli._save_history({'goob': 'gurn'}) + + recpath = self.cli._root / "history" / "goob:gurn.json" + self.assertFalse(recpath.exists()) + self.cli._save_history({'recid': 'goob:gurn', 'foo': 'bar'}) + self.cli._save_history({'recid': 'goob:gurn', 'alice': 'bob'}) + self.assertTrue(recpath.is_file(), "history not saved to file") + with open(recpath) as fd: + data = json.load(fd) + self.assertEqual(len(data), 2) + self.assertEqual(data[0], {'recid': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(data[1], {'recid': 'goob:gurn', 'alice': 'bob'}) diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index b8b227c..f85a27a 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -3,6 +3,9 @@ import unittest as test from nistoar.midas.dbio import inmem, base +from nistoar.pdr.publish.prov import Action, PubAgent + +testuser = PubAgent("test", PubAgent.AUTO, "tester") class TestInMemoryDBClientFactory(test.TestCase): @@ -214,9 +217,113 @@ def test_select_records(self): self.assertTrue(isinstance(recs[0], base.ProjectRecord)) self.assertEqual(recs[0].id, id) + def test_action_log_io(self): + with self.assertRaises(ValueError): + self.cli._save_action_data({'goob': 'gurn'}) + + self.cli._save_action_data({'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertTrue('action_log' in self.cli._db) + self.assertTrue('goob:gurn' in self.cli._db['action_log']) + self.assertEqual(len(self.cli._db['action_log']['goob:gurn']), 1) + self.assertEqual(self.cli._db['action_log']['goob:gurn'][0], + {'subject': 'goob:gurn', 'foo': 'bar'}) + + self.cli._save_action_data({'subject': 'goob:gurn', 'bob': 'alice'}) + self.assertEqual(len(self.cli._db['action_log']['goob:gurn']), 2) + self.assertEqual(self.cli._db['action_log']['goob:gurn'][0], + {'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(self.cli._db['action_log']['goob:gurn'][1], + {'subject': 'goob:gurn', 'bob': 'alice'}) + self.cli._save_action_data({'subject': 'grp0001', 'dylan': 'bob'}) + self.assertTrue('action_log' in self.cli._db) + self.assertTrue('grp0001' in self.cli._db['action_log']) + self.assertEqual(len(self.cli._db['action_log']['grp0001']), 1) + self.assertEqual(self.cli._db['action_log']['grp0001'][0], + {'subject': 'grp0001', 'dylan': 'bob'}) + + acts = self.cli._select_actions_for("goob:gurn") + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0], {'subject': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(acts[1], {'subject': 'goob:gurn', 'bob': 'alice'}) + acts = self.cli._select_actions_for("grp0001") + self.assertEqual(len(acts), 1) + self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) + + self.cli._delete_actions_for("goob:gurn") + self.assertTrue('action_log' in self.cli._db) + self.assertTrue('goob:gurn' not in self.cli._db['action_log']) + self.assertEqual(len(self.cli._db['action_log']['grp0001']), 1) + self.assertEqual(self.cli._db['action_log']['grp0001'][0], + {'subject': 'grp0001', 'dylan': 'bob'}) + + self.cli._delete_actions_for("grp0001") + self.assertTrue('action_log' in self.cli._db) + self.assertTrue('goob:gurn' not in self.cli._db['action_log']) + self.assertTrue('grp0001' not in self.cli._db['action_log']) + + self.assertEqual(self.cli._select_actions_for("goob:gurn"), []) + self.assertEqual(self.cli._select_actions_for("grp0001"), []) + + def test_save_history(self): + with self.assertRaises(ValueError): + self.cli._save_history({'goob': 'gurn'}) + + self.cli._save_history({'recid': 'goob:gurn', 'foo': 'bar'}) + self.cli._save_history({'recid': 'goob:gurn', 'alice': 'bob'}) + + self.assertTrue('history' in self.cli._db) + self.assertTrue('goob:gurn' in self.cli._db['history']) + self.assertEqual(len(self.cli._db['history']['goob:gurn']), 2) + self.assertEqual(self.cli._db['history']['goob:gurn'][0], + {'recid': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(self.cli._db['history']['goob:gurn'][1], + {'recid': 'goob:gurn', 'alice': 'bob'}) + + def test_record_action(self): + self.cli.record_action(Action(Action.CREATE, "mds3:0008", testuser, "created")) + self.cli.record_action(Action(Action.COMMENT, "mds3:0008", testuser, "i'm hungry")) + acts = self.cli._select_actions_for("mds3:0008") + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0]['type'], Action.CREATE) + self.assertEqual(acts[1]['type'], Action.COMMENT) + def test_close_actionlog_with(self): + prec = base.ProjectRecord(base.DRAFT_PROJECTS, + {"id": "pdr0:2222", "name": "brains", "owner": "nist0:ava1"}, self.cli) + finalact = Action(Action.SUBMIT, "pdr0:2222", testuser, "done!") + self.assertNotIn('action_log', self.cli._db) + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) + + # no history should have been written + self.assertNotIn('history', self.cli._db) + self.cli.record_action(Action(Action.CREATE, "pdr0:2222", testuser, "created")) + self.cli.record_action(Action(Action.COMMENT, "pdr0:2222", testuser, "i'm hungry")) + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook", "recid": "goob"}) + self.assertIn('history', self.cli._db) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"][0]['history']), 3) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['recid'], "pdr0:2222") + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['published_as'], "comicbook") + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['close_action'], Action.SUBMIT) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['acls'], + {"read": prec.acls._perms['read']}) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['history'][-1]['message'], "done!") + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) + + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}, False) + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 2) + self.assertEqual(self.cli._db['history']["pdr0:2222"][1]['history'][-1]['message'], "done!") + self.assertEqual(len(self.cli._db['history']["pdr0:2222"][1]['history']), 1) + + + if __name__ == '__main__': test.main() diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index 6601a4c..f3e2efe 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -65,7 +65,7 @@ def tearDown(self): client.get_database = client.get_default_database db = client.get_database() for coll in [base.GROUPS_COLL, base.PEOPLE_COLL, base.DMP_PROJECTS, base.DRAFT_PROJECTS, - "nextnum", "about"]: + "nextnum", "about", "action_log", "history"]: if coll in db.list_collection_names(): db.drop_collection(coll) @@ -284,6 +284,58 @@ def test_select_records(self): self.assertTrue(isinstance(recs[0], base.ProjectRecord)) self.assertEqual(recs[0].id, id) + def test_action_log_io(self): + self.assertEqual(self.cli.native['action_log'].count_documents({}), 0) + self.cli._save_action_data({'subject': 'goob:gurn', 'foo': 'bar', 'timestamp': 8}) + acts = [r for r in self.cli.native['action_log'].find({}, {'_id': False})] + self.assertEqual(len(acts), 1) + self.assertEqual(acts[0], {'subject': 'goob:gurn', 'foo': 'bar', 'timestamp': 8}) + + self.cli._save_action_data({'subject': 'goob:gurn', 'bob': 'alice', 'timestamp': 5}) + acts = [r for r in self.cli.native['action_log'].find({}, {'_id': False})] + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0], {'subject': 'goob:gurn', 'foo': 'bar', 'timestamp': 8}) + self.assertEqual(acts[1], {'subject': 'goob:gurn', 'bob': 'alice', 'timestamp': 5}) + + self.assertEqual(self.cli.native['action_log'].count_documents({'subject': 'grp0001'}), 0) + self.cli._save_action_data({'subject': 'grp0001', 'dylan': 'bob'}) + self.assertEqual(self.cli.native['action_log'].count_documents({}), 3) + acts = [r for r in self.cli.native['action_log'].find({'subject': 'grp0001'}, {'_id': False})] + self.assertEqual(len(acts), 1) + self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) + + acts = self.cli._select_actions_for("goob:gurn") + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0], {'subject': 'goob:gurn', 'bob': 'alice', 'timestamp': 5}) + self.assertEqual(acts[1], {'subject': 'goob:gurn', 'foo': 'bar', 'timestamp': 8}) + acts = self.cli._select_actions_for("grp0001") + self.assertEqual(len(acts), 1) + self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) + + self.cli._delete_actions_for("grp0001") + self.assertEqual(self.cli.native['action_log'].count_documents({}), 2) + self.assertEqual(self.cli.native['action_log'].count_documents({'subject': 'grp0001'}), 0) + self.cli._delete_actions_for("goob:gurn") + self.assertEqual(self.cli.native['action_log'].count_documents({}), 0) + + self.assertEqual(self.cli._select_actions_for("goob:gurn"), []) + self.assertEqual(self.cli._select_actions_for("grp0001"), []) + + def test_save_history(self): + self.assertEqual(self.cli.native['history'].count_documents({}), 0) + self.cli._save_history({'recid': 'goob:gurn', 'foo': 'bar'}) + self.cli._save_history({'recid': 'pdr0:0001', 'alice': 'bob'}) + + data = [r for r in self.cli.native['history'].find({}, {'_id': False})] + self.assertEqual(len(data), 2) + self.assertEqual(data[0], {'recid': 'goob:gurn', 'foo': 'bar'}) + self.assertEqual(data[1], {'recid': 'pdr0:0001', 'alice': 'bob'}) + + + + + + @test.skipIf(not os.environ.get('MONGO_TESTDB_URL'), "test mongodb not available") class TestMongoProjectRecord(test.TestCase): diff --git a/python/tests/nistoar/midas/dbio/test_status.py b/python/tests/nistoar/midas/dbio/test_status.py new file mode 100644 index 0000000..409aff6 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/test_status.py @@ -0,0 +1,80 @@ +import os, pdb, sys, json +import unittest as test +from copy import deepcopy + +from nistoar.midas.dbio import status +from nistoar.pdr.publish.prov import Action + +class TestProjectStatus(test.TestCase): + + def test_ctor(self): + stat = status.ProjectStatus("goob", {}) + self.assertEqual(stat.id, "goob") + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.message, "") + self.assertEqual(stat.since, 0) + self.assertEqual(stat.modified, 0) + self.assertEqual(stat.since_date, "pending") + self.assertEqual(stat.modified_date, "pending") + + def test_act(self): + stat = status.ProjectStatus("goob", {"state": status.EDIT, "since": -1}) + self.assertEqual(stat.id, "goob") + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.message, "") + self.assertGreater(stat.since, 0) + self.assertGreater(stat.modified, 0) + self.assertNotEqual(stat.since_date, "pending") + self.assertNotEqual(stat.modified_date, "pending") + + stat.act(Action.PATCH, "made updates") + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.PATCH) + self.assertEqual(stat.message, "made updates") + self.assertEqual(stat.modified, 0) + self.assertNotEqual(stat.since_date, "pending") + self.assertEqual(stat.modified_date, "pending") + + stat.act(Action.PUT) + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.PUT) + self.assertEqual(stat.message, "") + self.assertEqual(stat.modified, 0) + self.assertNotEqual(stat.since_date, "pending") + self.assertEqual(stat.modified_date, "pending") + + stat.act(Action.COMMENT, "Whoa", -1) + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.COMMENT) + self.assertEqual(stat.message, "Whoa") + self.assertGreater(stat.modified, stat.since) + self.assertNotEqual(stat.since_date, "pending") + self.assertNotEqual(stat.modified_date, "pending") + + def test_set_state(self): + stat = status.ProjectStatus("goob", {"state": status.EDIT, "since": -1}) + self.assertEqual(stat.id, "goob") + self.assertEqual(stat.state, status.EDIT) + self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.message, "") + self.assertGreater(stat.since, 0) + self.assertGreater(stat.modified, 0) + self.assertNotEqual(stat.since_date, "pending") + self.assertNotEqual(stat.modified_date, "pending") + + then = stat.since + stat.set_state(status.PROCESSING) + self.assertEqual(stat.id, "goob") + self.assertEqual(stat.state, status.PROCESSING) + self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.message, "") + self.assertGreater(stat.since, then) + self.assertLess(stat.modified, stat.since) + self.assertNotEqual(stat.since_date, "pending") + self.assertNotEqual(stat.modified_date, "pending") + + +if __name__ == '__main__': + test.main() From 0d18818a0c4f134f0b74edf47556276aa923ff3b Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Mar 2023 14:38:37 -0500 Subject: [PATCH 053/123] dbio status: properly initialize the project status --- python/nistoar/midas/dbio/base.py | 4 +++- python/nistoar/midas/dbio/status.py | 27 +++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 7445d6d..994edf4 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -190,6 +190,8 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: if 'deactivated' not in recdata: # Should be None or a date recdata['deactivated'] = None + if 'status' not in recdata: + recdata['status'] = ProjectStatus(recdata['id'], {}).to_dict(False) for perm in ACLs.OWN: if perm not in recdata['acls']: recdata['acls'][perm] = [recdata['owner']] if recdata['owner'] else [] @@ -285,7 +287,7 @@ def status(self) -> ProjectStatus: return the status object that indicates the current state of the record and the last action applied to it. """ - return ProjectStatus(self._data.get('status', {})) + return ProjectStatus(self.id, self._data.get('status')) @property def acls(self) -> ACLs: diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py index ca6f1a8..285a6dd 100644 --- a/python/nistoar/midas/dbio/status.py +++ b/python/nistoar/midas/dbio/status.py @@ -8,9 +8,12 @@ from collections.abc import Mapping from time import time from datetime import datetime +from copy import deepcopy from nistoar.pdr.publish.prov import Action +# Available project states: +# EDIT = "edit" # Record is currently being edit for a new released version PROCESSING = "processing" # Record is being processed at the moment and cannot be updated # further until this processing is complete. @@ -26,15 +29,23 @@ _modified_p = "modified" _message_p = "message" +# Common project record actions +# +ACTION_CREATE = "create" +ACTION_UPDATE = "update" + class ProjectStatus: """ a class that holds the current status of a project, aggregating multiple pieces of information about the projects state and the last action applied to it. """ + CREATE_ACTION = ACTION_CREATE + UPDATE_ACTION = ACTION_UPDATE def __init__(self, id: str, status_data: Mapping): """ - wrap the status information for a particular project record + wrap the status information for a particular project record. Note that this constructor + may update the input data to add default values to standard properties. :param str id: the project identifier that this status object belongs to :param Mapping status_data: the dictionary containing the project's status data. This data usually comes from the ``status`` the internal property of a @@ -46,7 +57,7 @@ def __init__(self, id: str, status_data: Mapping): if not self._data.get(_state_p): self._data[_state_p] = EDIT if not self._data.get(_action_p): - self._data[_action_p] = Action.CREATE + self._data[_action_p] = self.CREATE_ACTION # try to keep since <= modified by default if _since_p not in self._data or not isinstance(self._data[_since_p], int): @@ -170,3 +181,15 @@ def set_state(self, state, when: int=-1): self._data[_state_p] = state self._data[_since_p] = when + def to_dict(self, with_id=True): + """ + return a new dictionary instance containing the storable data from this ProjectStatus instance + """ + out = deepcopy(self._data) + if with_id: + out['@id'] = self.id + return out + + def __str__(self): + return str(self.to_dict()) + From e2dbed408d9c99e0f0b161cd4c34dfc0b2ce5a7e Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Mar 2023 14:46:42 -0500 Subject: [PATCH 054/123] dbio status: class name change: ProjectStatus -> RecordStatus (as it is more generic) --- python/nistoar/midas/dbio/base.py | 8 +++--- python/nistoar/midas/dbio/status.py | 40 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 994edf4..0f1e402 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -22,7 +22,7 @@ from nistoar.base.config import ConfigurationException from nistoar.pdr.publish.prov import Action from .. import MIDASException -from .status import ProjectStatus +from .status import RecordStatus DAP_PROJECTS = "dap" DMP_PROJECTS = "dmp" @@ -191,7 +191,7 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: # Should be None or a date recdata['deactivated'] = None if 'status' not in recdata: - recdata['status'] = ProjectStatus(recdata['id'], {}).to_dict(False) + recdata['status'] = RecordStatus(recdata['id'], {}).to_dict(False) for perm in ACLs.OWN: if perm not in recdata['acls']: recdata['acls'][perm] = [recdata['owner']] if recdata['owner'] else [] @@ -282,12 +282,12 @@ def reactivate(self) -> bool: return True @property - def status(self) -> ProjectStatus: + def status(self) -> RecordStatus: """ return the status object that indicates the current state of the record and the last action applied to it. """ - return ProjectStatus(self.id, self._data.get('status')) + return RecordStatus(self.id, self._data.get('status')) @property def acls(self) -> ACLs: diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py index 285a6dd..bf7f3af 100644 --- a/python/nistoar/midas/dbio/status.py +++ b/python/nistoar/midas/dbio/status.py @@ -1,5 +1,5 @@ """ -Module for tracking the status of a project record. +Module for tracking the status of a dbio record. Note that this module is similar in intent and implementation to :py:mod:`nistoar.pdr.publish.service.status` but has implemented to different requirements. @@ -12,7 +12,7 @@ from nistoar.pdr.publish.prov import Action -# Available project states: +# Available record states: # EDIT = "edit" # Record is currently being edit for a new released version PROCESSING = "processing" # Record is being processed at the moment and cannot be updated @@ -29,27 +29,27 @@ _modified_p = "modified" _message_p = "message" -# Common project record actions +# Common record actions # ACTION_CREATE = "create" ACTION_UPDATE = "update" -class ProjectStatus: +class RecordStatus: """ - a class that holds the current status of a project, aggregating multiple pieces of information about - the projects state and the last action applied to it. + a class that holds the current status of a record (particularly, a project record), aggregating + multiple pieces of information about the record's state and the last action applied to it. """ CREATE_ACTION = ACTION_CREATE UPDATE_ACTION = ACTION_UPDATE def __init__(self, id: str, status_data: Mapping): """ - wrap the status information for a particular project record. Note that this constructor + wrap the status information for a particular record. Note that this constructor may update the input data to add default values to standard properties. - :param str id: the project identifier that this status object belongs to - :param Mapping status_data: the dictionary containing the project's status data. This + :param str id: the record identifier that this status object belongs to + :param Mapping status_data: the dictionary containing the record's status data. This data usually comes from the ``status`` the internal property of a - :py:class:`~nistoar.midas.dbio.project.ProjectRecord`. This class will + :py:class:`~nistoar.midas.dbio.project.ProtectedRecord`. This class will manipulate the given dictionary directly without making a copy. """ self._id = id @@ -79,7 +79,7 @@ def __init__(self, id: str, status_data: Mapping): @property def id(self) -> str: """ - the identifier for the project record this object provides the status for + the identifier for the record this object provides the status for """ return self._id @@ -109,8 +109,8 @@ def since_date(self) -> str: @property def action(self) -> str: """ - The name of the last action applied to the project. In general the actions that can be applied - to a project record are project-type-specific; however, there are a set of common actions. + The name of the last action applied to the record. In general the actions that can be applied + to a record are record-type-specific; however, there are a set of common actions. """ return self._data[_action_p] @@ -135,7 +135,7 @@ def message(self) -> str: """ A statement providing further description of about the last action. The message may be provided by the requesting user (to record the intent of the action) or set by default - by the project service. + by the record service. """ return self._data[_message_p] @@ -145,13 +145,13 @@ def message(self, val): def act(self, action: str, message: str="", when: int=0): """ - record the application of a particular action on the project + record the application of a particular action on the record :param str action: the name of the action being applied :param str message: a statement indicating the reason or intent of the action :param int when: the epoch timestamp for when the action was applied. A value of zero (default) indicates that the timestamp should be set when the - project record is saved. A value less than zero will cause - the current time to be set. + record is saved. A value less than zero will cause the current + time to be set. """ if not action: raise ValueError("Action not specified") @@ -166,11 +166,11 @@ def act(self, action: str, message: str="", when: int=0): def set_state(self, state, when: int=-1): """ - record a new state that the project record has entered. + record a new state that the record has entered. :param str state: the name of the new state that the record has entered :param int when: the epoch timestamp for when the state changed. A value of zero indicates that the timestamp should be set when the - project record is saved. A value less than zero (default) will + record is saved. A value less than zero (default) will cause the current time to be set. """ if not state: @@ -183,7 +183,7 @@ def set_state(self, state, when: int=-1): def to_dict(self, with_id=True): """ - return a new dictionary instance containing the storable data from this ProjectStatus instance + return a new dictionary instance containing the storable data from this RecordStatus instance """ out = deepcopy(self._data) if with_id: From ba297bef7bb9735c78d4e9f81a83cc782d2f9342 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 3 Mar 2023 14:48:45 -0500 Subject: [PATCH 055/123] dbio status: update test_status.py for class name change --- python/tests/nistoar/midas/dbio/test_status.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/tests/nistoar/midas/dbio/test_status.py b/python/tests/nistoar/midas/dbio/test_status.py index 409aff6..569420d 100644 --- a/python/tests/nistoar/midas/dbio/test_status.py +++ b/python/tests/nistoar/midas/dbio/test_status.py @@ -5,13 +5,13 @@ from nistoar.midas.dbio import status from nistoar.pdr.publish.prov import Action -class TestProjectStatus(test.TestCase): +class TestRecordStatus(test.TestCase): def test_ctor(self): - stat = status.ProjectStatus("goob", {}) + stat = status.RecordStatus("goob", {}) self.assertEqual(stat.id, "goob") self.assertEqual(stat.state, status.EDIT) - self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.action, status.ACTION_CREATE) self.assertEqual(stat.message, "") self.assertEqual(stat.since, 0) self.assertEqual(stat.modified, 0) @@ -19,10 +19,10 @@ def test_ctor(self): self.assertEqual(stat.modified_date, "pending") def test_act(self): - stat = status.ProjectStatus("goob", {"state": status.EDIT, "since": -1}) + stat = status.RecordStatus("goob", {"state": status.EDIT, "since": -1}) self.assertEqual(stat.id, "goob") self.assertEqual(stat.state, status.EDIT) - self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.action, status.ACTION_CREATE) self.assertEqual(stat.message, "") self.assertGreater(stat.since, 0) self.assertGreater(stat.modified, 0) @@ -54,10 +54,10 @@ def test_act(self): self.assertNotEqual(stat.modified_date, "pending") def test_set_state(self): - stat = status.ProjectStatus("goob", {"state": status.EDIT, "since": -1}) + stat = status.RecordStatus("goob", {"state": status.EDIT, "since": -1}) self.assertEqual(stat.id, "goob") self.assertEqual(stat.state, status.EDIT) - self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.action, status.ACTION_CREATE) self.assertEqual(stat.message, "") self.assertGreater(stat.since, 0) self.assertGreater(stat.modified, 0) @@ -68,7 +68,7 @@ def test_set_state(self): stat.set_state(status.PROCESSING) self.assertEqual(stat.id, "goob") self.assertEqual(stat.state, status.PROCESSING) - self.assertEqual(stat.action, Action.CREATE) + self.assertEqual(stat.action, status.ACTION_CREATE) self.assertEqual(stat.message, "") self.assertGreater(stat.since, then) self.assertLess(stat.modified, stat.since) From 949f12f0f45341495e06c1c2bf6f37ffbd1f4a61 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 6 Mar 2023 09:02:42 -0500 Subject: [PATCH 056/123] avoid BagBuilder log disconnection errors by explicitly deleting the builder --- python/nistoar/pdr/publish/bagger/pdp.py | 4 ++++ .../nistoar/pdr/preserve/bagit/test_builder.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/python/nistoar/pdr/publish/bagger/pdp.py b/python/nistoar/pdr/publish/bagger/pdp.py index 07900c1..b38f376 100644 --- a/python/nistoar/pdr/publish/bagger/pdp.py +++ b/python/nistoar/pdr/publish/bagger/pdp.py @@ -106,6 +106,10 @@ def __init__(self, sipid: str, bagparent: str, config: Mapping, convention: str, self._histfile = None + def __del__(self): + if self.bagbldr: + del self.bagbldr + @property def sipid(self): "the identifier of the SIP being operated on" diff --git a/python/tests/nistoar/pdr/preserve/bagit/test_builder.py b/python/tests/nistoar/pdr/preserve/bagit/test_builder.py index b0e0226..8e778f4 100644 --- a/python/tests/nistoar/pdr/preserve/bagit/test_builder.py +++ b/python/tests/nistoar/pdr/preserve/bagit/test_builder.py @@ -65,11 +65,19 @@ def setUp(self): self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.tf.track("testbag") self.tf.track("issued-ids.json") + if len(self.bag.plog.handlers) > 0: + print("warning: found stray handlers attached:"+ + ("\n".join([str(h) for h in self.bag.plog.handlers]))) def tearDown(self): self.bag.disconnect_logfile() + plog = self.bag.plog + del self.bag self.bag = None self.tf.clean() + if len(plog.handlers) > 0: + print("ERROR: found stray handlers attached:"+ + ("\n".join([str(h) for h in self.bag.plog.handlers]))) def test_ctor(self): self.assertEqual(self.bag.bagname, "testbag") @@ -111,6 +119,7 @@ def test_ctor_on_existng_dir(self): bagdir = os.path.join(self.tf.root, "testbag") if not os.path.exists(bagdir): os.mkdir(bagdir) + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag.bagname, "testbag") @@ -127,6 +136,7 @@ def test_ctor_on_existng_dir(self): self.assertFalse(self.bag._has_resmd()) def test_ctor_with_id(self): + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg, id="edi00hw91c") @@ -161,6 +171,7 @@ def test_fix_id(self): self.bag._fix_id("ark:/88434/mds2-4193") self.cfg['validate_id'] = False + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag._fix_id("ark:/88434/edi00hw91c"), "ark:/88434/edi00hw91c") @@ -172,6 +183,7 @@ def test_fix_id(self): self.bag._fix_id("ark:/goober/foo") self.cfg['validate_id'] = r'(edi\d)|(mds[01])' + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) with self.assertRaises(ValueError): # validate this one @@ -188,6 +200,7 @@ def test_fix_id(self): self.cfg['validate_id'] = r'(edi\d)|(mds[01])' self.cfg['require_ark_id'] = False + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag._fix_id("edi00hw91c"), "edi00hw91c") self.assertEqual(self.bag._fix_id("ark:/88434/edi00hw91c"), @@ -1176,6 +1189,7 @@ def test_update_ediid(self): def test_add_res_nerd(self): self.cfg['ensure_nerdm_type_on_add'] = bldr.NERDM_SCH_ID_BASE + "v0.4" + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertIsNone(self.bag.ediid) with open(simplenerd) as fd: @@ -1230,6 +1244,7 @@ def test_add_ds_pod(self): def test_add_ds_pod_convert(self): self.cfg['ensure_nerdm_type_on_add'] = bldr.NERDM_SCH_ID_BASE + "v0.7" + del self.bag self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertIsNone(self.bag.ediid) From f5b1059539cca486dad00bac6adb202233d1379e Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 7 Mar 2023 14:01:43 -0500 Subject: [PATCH 057/123] move and expand version support from nistoar.pdr.publish.bagger.utils to nistoar.id.versions --- metadata | 2 +- python/nistoar/pdr/publish/bagger/utils.py | 68 +--------------------- 2 files changed, 2 insertions(+), 68 deletions(-) diff --git a/metadata b/metadata index ab1fc6d..e6e2b3e 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit ab1fc6dc99e4980f72b80ff42ce53cd2f80a8270 +Subproject commit e6e2b3e5f06d99153917b68c8894adeadcdb7041 diff --git a/python/nistoar/pdr/publish/bagger/utils.py b/python/nistoar/pdr/publish/bagger/utils.py index b37400e..3e7b965 100644 --- a/python/nistoar/pdr/publish/bagger/utils.py +++ b/python/nistoar/pdr/publish/bagger/utils.py @@ -16,6 +16,7 @@ from ...preserve.bagit.builder import (NERDM_SCH_ID_BASE, NERDM_SCH_VER, NERDMPUB_SCH_VER, NERDMBIB_SCH_ID_BASE, NERDMBIB_SCH_VER) +from nistoar.id.versions import Version, cmp_versions DEF_MBAG_VERSION = "0.4" DEF_NIST_PROF_VERSION = "0.4" @@ -165,73 +166,6 @@ def multibag_version_of(name): except ValueError as ex: return '' -_ver_delim = re.compile(r"[\._]") -_proper_ver = re.compile(r"^\d+([\._]\d+)*$") - -class Version(object): - """ - a version class that can facilitate comparisons - """ - - def _toint(self, field): - try: - return int(field) - except ValueError: - return field - - def __init__(self, vers): - """ - convert a version string to a Version instance - """ - self._vs = vers - self.fields = [self._toint(n) for n in _ver_delim.split(self._vs)] - - def __str__(self): - return self._vs - - def __eq__(self, other): - if not isinstance(other, Version): - other = Version(other) - return self.fields == other.fields - - def __lt__(self, other): - if not isinstance(other, Version): - other = Version(other) - return self.fields < other.fields - - def __le__(self, other): - if not isinstance(other, Version): - other = Version(other) - return self < other or self == other - - def __ge__(self, other): - return not (self < other) - def __gt__(self, other): - return not self.__le__(other) - def __ne__(self, other): - return not (self == other) - - @classmethod - def is_proper_version(cls, vers): - """ - return true if the given version string is of the form M.M.M... where - each M is any non-negative number. - """ - return _proper_ver.match(vers) is not None - -def cmp_versions(ver1, ver2): - """ - compare two version strings for their order. - :return int: -1 if v1 < v2, 0 if v1 = v2, and +1 if v1 > v2 - """ - a = Version(ver1) - b = Version(ver2) - if a < b: - return -1 - elif a == b: - return 0 - return +1 - class BagName(object): """ a wrapper class around a legal bag name that allows it to be sorted as part of From bedcde0c183c5c37f0e9a4db936213c689b367a3 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 7 Mar 2023 18:02:56 -0500 Subject: [PATCH 058/123] bagger/test_utils.py: add comment about moved version stuff --- python/tests/nistoar/pdr/publish/bagger/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tests/nistoar/pdr/publish/bagger/test_utils.py b/python/tests/nistoar/pdr/publish/bagger/test_utils.py index 235ae5f..8787f1b 100644 --- a/python/tests/nistoar/pdr/publish/bagger/test_utils.py +++ b/python/tests/nistoar/pdr/publish/bagger/test_utils.py @@ -6,6 +6,7 @@ from nistoar.pdr.publish.bagger import utils as bagut from nistoar.nerdm.constants import CORE_SCHEMA_URI, PUB_SCHEMA_URI +# Note: Version and cmp_versions() have moved to nistoar.id.versions class TestVersion(test.TestCase): def test_ctor(self): From 0d959b7a30e9557008014e1c319226f770874d90 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 7 Mar 2023 18:13:12 -0500 Subject: [PATCH 059/123] dbio.ProjectService: integrate provenance tracking, status management and introduce lifecycle management: * prov: add PROCESS action * use a const for the prov collection name * status: add READY state * dbio base: refactor exceptions for handling validation errors * ProjectService: -- add finalize() and submit() -- integrate provenance recording and state management --- python/nistoar/midas/dbio/base.py | 21 +- python/nistoar/midas/dbio/fsbased.py | 12 +- python/nistoar/midas/dbio/inmem.py | 18 +- python/nistoar/midas/dbio/mongo.py | 2 +- python/nistoar/midas/dbio/project.py | 430 ++++++++++++++++-- python/nistoar/midas/dbio/status.py | 2 + python/nistoar/pdr/publish/prov.py | 20 +- .../tests/nistoar/midas/dbio/test_fsbased.py | 6 +- python/tests/nistoar/midas/dbio/test_inmem.py | 42 +- python/tests/nistoar/midas/dbio/test_mongo.py | 1 + .../tests/nistoar/midas/dbio/test_project.py | 22 + 11 files changed, 485 insertions(+), 91 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 0f1e402..51ad5fe 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -29,6 +29,7 @@ GROUPS_COLL = "groups" PEOPLE_COLL = "people" DRAFT_PROJECTS = "draft" # this name is deprecated +PROV_ACT_LOG = "prov_action_log" DEF_PEOPLE_SHOULDER = "ppl0" DEF_GROUPS_SHOULDER = "grp0" @@ -1062,6 +1063,10 @@ def record_action(self, act: Action): """ if not act.subject: raise ValueError("record_action(): action is missing a subject identifier") + if act.type == Action.PROCESS and \ + (not isinstance(act.object, Mapping) or 'name' not in act.object): + raise ValueError("record_action(): action object is missing name property: "+str(act.object)) + self._save_action_data(act.to_dict()) @abstractmethod @@ -1115,6 +1120,8 @@ def _close_actionlog_with(self, rec: ProtectedRecord, close_action: Action, extr ("recid", rec.id), ("close_action", close_action.type) ]) + if close_action.type == Action.PROCESS: + archive['close_action'] += ":%s" % str(close_action.object) archive.update(extra) archive['acls'] = acls archive['history'] = history @@ -1176,6 +1183,15 @@ class DBIOException(MIDASException): """ pass +class DBIORecordException(DBIOException): + """ + a base Exception class for DBIO exceptions that are associated with a specific DBIO record. This + class provides the record identifier via a ``record_id`` attribute. + """ + def __init__(self, recid, message, sys=None): + super(DBIORecordException, self).__init__(message, sys=sys) + self.record_id = recid + class NotAuthorized(DBIOException): """ an exception indicating that the user attempted an operation that they are not authorized to @@ -1209,7 +1225,7 @@ class AlreadyExists(DBIOException): """ pass -class ObjectNotFound(DBIOException): +class ObjectNotFound(DBIORecordException): """ an exception indicating that the requested record, or a requested part of a record, does not exist. """ @@ -1221,7 +1237,6 @@ def __init__(self, recid, part=None, message=None, sys=None): the entire record does not exist. :param str message: a brief description of the error (what object was not found) """ - self.record_id = recid self.record_part = part if not message: @@ -1229,7 +1244,7 @@ def __init__(self, recid, part=None, message=None, sys=None): message = "Requested portion of record (id=%s) does not exist: %s" % (recid, part) else: message = "Requested record with id=%s does not exist" % recid - super(ObjectNotFound, self).__init__(message) + super(ObjectNotFound, self).__init__(recid, message) diff --git a/python/nistoar/midas/dbio/fsbased.py b/python/nistoar/midas/dbio/fsbased.py index 9d32497..d1c7cbb 100644 --- a/python/nistoar/midas/dbio/fsbased.py +++ b/python/nistoar/midas/dbio/fsbased.py @@ -153,9 +153,9 @@ def select_records(self, perm: base.Permissions=base.ACLs.OWN) -> Iterator[base. break def _save_action_data(self, actdata: Mapping): - self._ensure_collection("action_log") + self._ensure_collection(base.PROV_ACT_LOG) try: - recpath = self._root / 'action_log' / (actdata['subject']+".lis") + recpath = self._root / base.PROV_ACT_LOG / (actdata['subject']+".lis") return self._append_json_to_listfile(actdata, recpath) except KeyError as ex: raise ValueError("_save_action_data(): Action is missing subject id") @@ -178,8 +178,8 @@ def _load_from_listfile(self, inpath: Path): return [json.loads(line.strip()) for line in fd] def _select_actions_for(self, id: str) -> List[Mapping]: - self._ensure_collection("action_log") - recpath = self._root / 'action_log' / (id+".lis") + self._ensure_collection(base.PROV_ACT_LOG) + recpath = self._root / base.PROV_ACT_LOG / (id+".lis") if not recpath.is_file(): return [] try: @@ -188,8 +188,8 @@ def _select_actions_for(self, id: str) -> List[Mapping]: raise base.DBIOException(id+": Unable to read actions: "+str(ex)) def _delete_actions_for(self, id): - self._ensure_collection("action_log") - recpath = self._root / 'action_log' / (id+".lis") + self._ensure_collection(base.PROV_ACT_LOG) + recpath = self._root / base.PROV_ACT_LOG / (id+".lis") if recpath.is_file(): recpath.unlink() diff --git a/python/nistoar/midas/dbio/inmem.py b/python/nistoar/midas/dbio/inmem.py index 1d1d0f9..8f276e8 100644 --- a/python/nistoar/midas/dbio/inmem.py +++ b/python/nistoar/midas/dbio/inmem.py @@ -85,21 +85,21 @@ def _save_action_data(self, actdata: Mapping): if 'subject' not in actdata: raise ValueError("_save_action_data(): Missing subject property in action data") id = actdata['subject'] - if 'action_log' not in self._db: - self._db['action_log'] = {} - if id not in self._db['action_log']: - self._db['action_log'][id] = [] - self._db['action_log'][id].append(actdata) + if base.PROV_ACT_LOG not in self._db: + self._db[base.PROV_ACT_LOG] = {} + if id not in self._db[base.PROV_ACT_LOG]: + self._db[base.PROV_ACT_LOG][id] = [] + self._db[base.PROV_ACT_LOG][id].append(actdata) def _select_actions_for(self, id: str) -> List[Mapping]: - if 'action_log' not in self._db or id not in self._db['action_log']: + if base.PROV_ACT_LOG not in self._db or id not in self._db[base.PROV_ACT_LOG]: return [] - return deepcopy(self._db['action_log'][id]) + return deepcopy(self._db[base.PROV_ACT_LOG][id]) def _delete_actions_for(self, id): - if 'action_log' not in self._db or id not in self._db['action_log']: + if base.PROV_ACT_LOG not in self._db or id not in self._db[base.PROV_ACT_LOG]: return - del self._db['action_log'][id] + del self._db[base.PROV_ACT_LOG][id] def _save_history(self, histrec): if 'recid' not in histrec: diff --git a/python/nistoar/midas/dbio/mongo.py b/python/nistoar/midas/dbio/mongo.py index 596bbdd..6793f17 100644 --- a/python/nistoar/midas/dbio/mongo.py +++ b/python/nistoar/midas/dbio/mongo.py @@ -17,7 +17,7 @@ class MongoDBClient(base.DBClient): """ an implementation of DBClient using a MongoDB database as the backend store. """ - ACTION_LOG_COLL = 'action_log' + ACTION_LOG_COLL = base.PROV_ACT_LOG HISTORY_COLL = 'history' def __init__(self, dburl: str, config: Mapping, projcoll: str, foruser: str = base.ANONYMOUS): diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 128c89e..fd3c7ac 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -13,11 +13,23 @@ from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence from typing import List +from copy import deepcopy -from .base import (DBClient, DBClientFactory, ProjectRecord, ACLs, - AlreadyExists, NotAuthorized, ObjectNotFound, DBIOException) +import jsonpatch + +from .base import (DBClient, DBClientFactory, ProjectRecord, ACLs, RecordStatus, + AlreadyExists, NotAuthorized, ObjectNotFound, DBIORecordException) +from . import status from .. import MIDASException, MIDASSystem -from nistoar.pdr.publish.prov import PubAgent +from nistoar.pdr.publish.prov import PubAgent, Action + +_STATUS_ACTION_CREATE = RecordStatus.CREATE_ACTION +_STATUS_ACTION_UPDATE = RecordStatus.CREATE_ACTION +_STATUS_ACTION_CLEAR = "clear" +_STATUS_ACTION_FINALIZE = "finalize" +_STATUS_ACTION_SUBMIT = "submit" + +DEF_PUBLISHED_SUFFIX = "_published" class ProjectService(MIDASSystem): """ @@ -54,6 +66,11 @@ class ProjectService(MIDASSystem): This parameter gives the identifier shoulder that should be used the identifier for a new record created under the user group. Subclasses of this service class may support other parameters. """ + STATUS_ACTION_CREATE = _STATUS_ACTION_CREATE + STATUS_ACTION_UPDATE = _STATUS_ACTION_UPDATE + STATUS_ACTION_CLEAR = _STATUS_ACTION_CLEAR + STATUS_ACTION_FINALIZE = _STATUS_ACTION_FINALIZE + STATUS_ACTION_SUBMIT = _STATUS_ACTION_SUBMIT def __init__(self, project_type: str, dbclient_factory: DBClient, config: Mapping={}, who: PubAgent=None, log: Logger=None, _subsys=None, _subsysabbrev=None): @@ -112,11 +129,13 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: elif not prec.meta: prec.meta = self._new_metadata_for(shoulder) prec.data = self._new_data_for(prec.id, prec.meta) + prec.status.act(self.STATUS_ACTION_CREATE, "draft created") if data: - self.update_data(prec.id, data, prec=prec) # this will call prec.save() + self.update_data(prec.id, data, message=None, _prec=prec) # this will call prec.save() else: prec.save() + self.dbcli.record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) return prec def _get_id_shoulder(self, user: PubAgent): @@ -149,18 +168,27 @@ def get_record(self, id) -> ProjectRecord: """ return self.dbcli.get_record_for(id) + def get_status(self, id) -> RecordStatus: + """ + For the record with the given identifier, return the status object that indicates the current + state of the record and the last action applied to it. + :raises ObjectNotFound: if a record with that ID does not exist + :raises NotAuthorized: if the record exists but the current user is not authorized to read it. + """ + return self.get_record(id).status + def get_data(self, id, part=None): """ return a data content from the record with the given ID :param str id: the record's identifier - :param str path: a path to the portion of the data to get. This is the same as the `datapath` + :param str path: a path to the portion of the data to get. This is the same as the ``datapath`` given to the handler constructor. This will be an empty string if the full data object is requested. - :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to a non-existent part of the data content. :raises NotAuthorized: if the authenticated user does not have permission to read the record - given by `id`. - :raises PartNotAccessible: if access to the part of the data specified by `part` is not allowed. + given by ``id``. + :raises PartNotAccessible: if access to the part of the data specified by ``part`` is not allowed. """ prec = self.dbcli.get_record_for(id) # may raise ObjectNotFound if not part: @@ -180,35 +208,55 @@ def _extract_data_part(self, data, part): return out - def update_data(self, id, newdata, part=None, prec=None): + def _record_action(self, act: Action): + # this is tolerant of recording errors + try: + self.dbcli.record_action(act) + except Exception as ex: + self.log.error("Failed to record provenance action for %s: %s: %s", + act.subject, act.type, act.message) + + def _try_save(self, prec): + # this is tolerant of recording errors + try: + prec.save() + except Exception as ex: + self.log.error("Failed to save project record, %s: %s", prec.id, str(ex)) + + def update_data(self, id, newdata, part=None, message="", _prec=None): """ merge the given data into the currently save data content for the record with the given identifier. :param str id: the identifier for the record whose data should be updated. :param str newdata: the data to save as the new content. - :param stt part: the slash-delimited pointer to an internal data property. If provided, - the given `newdata` is a value that should be set to the property pointed - to by `part`. - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. - If this is not provided, the record will by fetched anew based on the `id`. - :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + :param str part: the slash-delimited pointer to an internal data property. If provided, + the given ``newdata`` is a value that should be set to the property pointed + to by ``part``. + :param str message: an optional message that will be recorded as an explanation of the update. + :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to ``id``. + If this is not provided, the record will by fetched anew based on the ``id``. + :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record - given by `id`. + given by ``id``. :raises PartNotAccessible: if replacement of the part of the data specified by `part` is not allowed. - :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or + :raises InvalidUpdate: if the provided ``newdata`` represents an illegal or forbidden update or would otherwise result in invalid data content. """ - if not prec: - prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + set_action = False + if not _prec: + set_action = True # setting the last action will NOT be the caller's responsibility + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + olddata = None if not part: # updating data as a whole: merge given data into previously saved data - self._merge_into(newdata, prec.data) + olddata = deepcopy(_prec.data) + self._merge_into(newdata, _prec.data) else: # updating just a part of the data steps = part.split('/') - data = prec.data + data = _prec.data while steps: prop = steps.pop(0) if prop not in data or data[prop] is None: @@ -217,6 +265,7 @@ def update_data(self, id, newdata, part=None, prec=None): else: data[prop] = {} elif not steps: + olddata = data[prop] if isinstance(data[prop], Mapping) and isinstance(newdata, Mapping): self._merge_into(newdata, data[prop]) else: @@ -226,14 +275,38 @@ def update_data(self, id, newdata, part=None, prec=None): "%s: data property, %s, is not in an updatable state") data = data[prop] - data = prec.data + data = _prec.data + if message is None: + message = "draft updated" + + # prep the provenance record + obj = self._jsondiff(olddata, newdata) # used in provenance record below + tgt = _prec.id + if part: + # if patching a specific part, record it as a subaction + provact = Action(Action.PATCH, tgt, self.who, message) + tgt += "#data.%s" % part + provact.add_subaction(Action(Action.PATCH, tgt, self.who, "updating data."+part, obj)) + else: + provact = Action(Action.PATCH, tgt, self.who, _prec.status.message, obj) # ensure the replacing data is sufficiently complete and valid and then save it # If it is invalid, InvalidUpdate is raised. - data = self._save_data(data, prec) + try: + data = self._save_data(data, _prec, message, set_action and _STATUS_ACTION_UPDATE) + + except Exception as ex: + self.log.error("Failed to save update for project, %s: %s", _prec.id, str(ex)) + provact.message = "Failed to save update due to an internal error" + raise + finally: + self._record_action(provact) + return self._extract_data_part(data, part) + def _jsondiff(self, old, new): + return {"jsonpatch": jsonpatch.make_patch(old, new)} def _merge_into(self, update: Mapping, base: Mapping, depth: int=-1): if depth == 0: @@ -285,7 +358,7 @@ def _moderate_metadata(self, mdata: MutableMapping, shoulder=None): out.update(mdata) return out - def replace_data(self, id, newdata, part=None, prec=None): + def replace_data(self, id, newdata, part=None, message="", _prec=None): """ Replace the currently stored data content of a record with the given data. It is expected that the new data will be filtered/cleansed via an internal call to :py:method:`dress_data`. @@ -304,8 +377,11 @@ def replace_data(self, id, newdata, part=None, prec=None): :raises InvalidUpdate: if the provided `newdata` represents an illegal or forbidden update or would otherwise result in invalid data content. """ - if not prec: - prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + set_action = False + if not _prec: + set_action = True # setting the last action will NOT be the caller's responsibility + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + olddata = deepcopy(_prec.data) if not part: # this is a complete replacement; merge it with a starter record @@ -314,7 +390,7 @@ def replace_data(self, id, newdata, part=None, prec=None): else: # replacing just a part of the data - data = prec.data + data = _prec.data steps = part.split('/') while steps: prop = steps.pop(0) @@ -329,15 +405,39 @@ def replace_data(self, id, newdata, part=None, prec=None): raise PartNotAccessible(id, part) data = data[prop] - data = prec.data + data = _prec.data + + if message is None: + message = "draft updated" + + # prep the provenance record + obj = self._jsondiff(olddata, newdata) + tgt = _prec.id + if part: + # if patching a specific part, record it as a subaction + provact = Action(Action.PATCH, tgt, self.who, _prec.status.message) + tgt += "#data.%s" % part + provact.add_subaction(Action(Action.PUT, tgt, self.who, "replacing data."+part, obj)) + else: + provact = Action(Action.PUT, tgt, self.who, _prec.status.message, obj) # ensure the replacing data is sufficiently complete and valid. # If it is invalid, InvalidUpdate is raised. - data = self._save_data(data, prec) + try: + data = self._save_data(data, _prec, message, set_action and _STATUS_ACTION_UPDATE) + + except Exception as ex: + self.log.error("Failed to save update to project, %s: %s", _prec.id, str(ex)) + provact.message = "Failed to save update due to an internal error" + raise + + finally: + self._record_action(provact) return self._extract_data_part(data, part) - def _save_data(self, indata: Mapping, prec: ProjectRecord = None) -> Mapping: + def _save_data(self, indata: Mapping, prec: ProjectRecord, + message: str, action: str = _STATUS_ACTION_UPDATE) -> Mapping: """ expand, validate, and save the data modified by the user as the record's data content. @@ -351,6 +451,9 @@ def _save_data(self, indata: Mapping, prec: ProjectRecord = None) -> Mapping: final transformations and validation, this will be saved the the record's `data` property. :param ProjectRecord prec: the project record object to save the data to. + :param str message: a message to save as the status action message; if None, no message + is saved. + :param str action: the action label to record; if None, the action is not updated. :return: the (transformed) data that was actually saved :rtype: dict :raises InvalidUpdate: if the provided `indata` represents an illegal or forbidden update or @@ -360,14 +463,20 @@ def _save_data(self, indata: Mapping, prec: ProjectRecord = None) -> Mapping: self._validate_data(indata) # may raise InvalidUpdate prec.data = indata - prec.save(); + # update the record status according to the inputs + if action: + prec.status.act(action, message) + elif message is not None: + prec.message = message + + prec.save(); return indata def _validate_data(self, data): pass - def clear_data(self, id, part=None, prec=None): + def clear_data(self, id: str, part: str=None, message: str=None, prec=None): """ remove the stored data content of the record and reset it to its defaults. :param str id: the identifier for the record whose data should be cleared. @@ -387,8 +496,11 @@ def clear_data(self, id, part=None, prec=None): initdata = self._new_data_for(prec.id, prec.meta) if not part: - # clearing everything: return record to its initial defaults + # clearing everything: return record to its initial defaults prec.data = initdata + if message is None: + message = "reset draft to initial defaults" + prec.status.act(self.STATUS_ACTION_CLEAR, message) else: # clearing only part of the data @@ -408,6 +520,193 @@ def clear_data(self, id, part=None, prec=None): break data = data[prop] initdata = initdata.get(prop, {}) + + if message is None: + message = "reset %s to initial defaults" % part + prec.status.act(self.STATUS_ACTION_UPDATE, message) + + # prep the provenance record + tgt = prec.id + if part: + # if deleting a specific part, record it as a subaction + provact = Action(Action.PATCH, tgt, self.who, prec.status.message) + tgt += "#data.%s" % part + provact.add_subaction(Action(Action.DELETE, tgt, self.who, "clearing data."+part)) + else: + provact = Action(Action.DELETE, tgt, self.who, prec.status.message) + + try: + self.save() + + except Exception as ex: + self.log.error("Failed to save cleared data for project, %s: %s", tgt, str(ex)) + provact.message = "Failed to clear requested data due to internal error" + raise + + finally: + self._record_action(provact) + + + def update_status_message(self, id: str, message: str, _prec=None): + """ + set the message to be associated with the current status regarding the last action + taken on the record with the given identifier + :param str id: the identifier of the record to attach the message to + :param str message: the message to attach + """ + if not _prec: + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + stat = _prec.status + + if stat.state != status.EDIT: + raise NotEditable(id) + stat.message = message + _prec.save() + self._record_action(Action(Action.COMMENT, _prec.id, self.who, message)) + + + def finalize(self, id, message=None, as_version=None, _prec=None): + """ + Assume that no more client updates will be applied and apply any final automated updates + to the record in preparation for final publication. After the changes are applied, the + resulting record will be validated. Normally, the record's state will not be changed as + a result. The record must be in the edit state to be applied. + :param str id: the identifier of the record to finalize + :param str message: a message summarizing the updates to the record + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises NotEditable: the requested record in not in the edit state + :raises InvalidUpdate: if the finalization produces an invalid record + """ + reset_state = False + if not _prec: + reset_state = True # if successful, resetting state will NOT be caller's responsibility + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + + stat = _prec.status + if stat.state != status.EDIT: + raise NotEditable(id) + + stat.set_state(status.PROCESSING) + stat.act(self.STATUS_ACTION_FINALIZE, "in progress") + _prec.save() + + try: + defmsg = self._apply_final_updates(_prec) + + except InvalidRecord as ex: + emsg = "finalize process failed: "+str(ex) + self._record_action(Action(Action.PROCESS, _prec.id, self.who, emsg, + {"name": "finalize", "errors": ex.errors})) + stat.set_state(status.EDIT) + stat.act(self.STATUS_ACTION_FINALIZE, ex.format_errors()) + self._try_save(prec) + raise + + except Exception as ex: + self.log.error("Failed to finalize project record, %s: %s", _prec.id, str(ex)) + emsg = "Failed to finalize due to an internal error" + self._record_action(Action(Action.PROCESS, _prec.id, self.who, emsg, + {"name": "finalize", "errors": [emsg]})) + stat.set_state(status.EDIT) + stat.act(self.STATUS_ACTION_FINALIZE, emsg) + self._try_save(prec) + raise + + else: + # record provenance record + self._record_action(Action(Action.PROCESS, _prec.id, self.who, defmsg, {"name": "finalize"})) + + if reset_state: + stat.set_state(status.READY) + stat.act(self.STATUS_ACTION_FINALIZE, message or defmsg) + _prec.save() + + def _apply_final_updates(self, prec): + # update the data + + + self._validate_data(prec.data) + return "draft is ready for submission" + + def submit(self, id: str, message: str=None, _prec=None) -> str: + """ + finalize (via :py:meth:`finalize`) the record and submit it for publishing. After a successful + submission, it may not be possible to edit or revise the record until the submission process + has been completed. The record must be in the "edit" state prior to calling this method. + :param str id: the identifier of the record to submit + :param str message: a message summarizing the updates to the record + :returns: the label indicating its post-editing state + :rtype: str + :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to + an undefined or unrecognized part of the data + :raises NotAuthorized: if the authenticated user does not have permission to read the record + given by `id`. + :raises NotEditable: the requested record is not in the edit state. + :raises NotSubmitable: if the finalization produces an invalid record because the record + contains invalid data or is missing required data. + :raises SubmissionFailed: if, during actual submission (i.e. after finalization), an error + occurred preventing successful submission. This error is typically + not due to anything the client did, but rather reflects a system problem + (e.g. from a downstream service). + """ + if not _prec: + _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + self.finalize(id, message, _prec) # may raise NotEditable + + # this record is ready for submission. Send the record to its post-editing destination, + # and update its status accordingly. + try: + defmsg = self._submit(_prec) + + except InvalidRecord as ex: + emsg = "submit process failed: "+str(ex) + self._record_action(Action(Action.PROCESS, _prec.id, self.who, emsg, + {"name": "submit", "errors": ex.errors})) + stat.set_state(status.EDIT) + stat.act(self.STATUS_ACTION_SUBMIT, ex.format_errors()) + self._try_save(prec) + raise + + except Exception as ex: + emsg = "Submit process failed due to an internal error" + self._record_action(Action(Action.PROCESS, _prec.id, self.who, emsg, + {"name": "submit", "errors": [emsg]})) + stat.set_state(status.EDIT) + stat.act(self.STATUS_ACTION_SUBMIT, emsg) + self._try_save(prec) + raise + + else: + # record provenance record + self.dbcli.record_action(Action(Action.PROCESS, _prec.id, self.who, defmsg, {"name": "submit"})) + + _prec.stat.set_state(status.SUBMITTED) + _prec.stat.act(self.STATUS_ACTION_SUBMIT, message or defmsg) + _prec.save() + + + def _submit(prec: ProjectRecord) -> str: + """ + Actually send the given record to its post-editing destination and update its status + accordingly. + + This method should be overridden to provide project-specific handling. This generic + implementation will simply copy the data contents of the record to another collection. + :returns: the label indicating its post-editing state + :rtype: str + :raises NotSubmitable: if the finalization process produced an invalid record because the record + contains invalid data or is missing required data. + :raises SubmissionFailed: if, during actual submission (i.e. after finalization), an error + occurred preventing successful submission. This error is typically + not due to anything the client did, but rather reflects a system problem + (e.g. from a downstream service). + """ + pass + + class ProjectServiceFactory: @@ -449,17 +748,17 @@ def create_service_for(self, who: PubAgent=None): return ProjectService(self._prjtype, self._dbclifact, self._cfg, who, self._log) -class InvalidUpdate(DBIOException): +class InvalidRecord(DBIORecordException): """ - an exception indicating that the user-provided data is invalid or otherwise would result in - invalid data content for a record. + an exception indicating that record data is invalid and requires correction or completion. The determination of invalid data may result from detailed data validation which may uncover multiple errors. The ``errors`` property will contain a list of messages, each describing a validation error encounted. The :py:meth:`format_errors` will format all these messages into a single string for a (text-based) display. """ - def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=None, sys=None): + def __init__(self, message: str=None, recid: str=None, part: str=None, + errors: List[str]=None, sys=None): """ initialize the exception :param str message: a brief description of the problem with the user input @@ -482,8 +781,7 @@ def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=N message = "Unknown validation errors encountered while updating data" errors = [] - super(InvalidUpdate, self).__init__(message) - self.record_id = recid + super(InvalidUpdate, self).__init__(recid, message, sys) self.record_part = part self.errors = errors @@ -513,8 +811,28 @@ def format_errors(self): out += ":\n * " out += "\n * ".join([str(e) for e in self.errors]) return out + +class InvalidUpdate(InvalidRecord): + """ + an exception indicating that the user-provided data is invalid or otherwise would result in + invalid data content for a record. + + The determination of invalid data may result from detailed data validation which may uncover + multiple errors. The ``errors`` property will contain a list of messages, each describing a + validation error encounted. The :py:meth:`format_errors` will format all these messages into + a single string for a (text-based) display. + """ + def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=None, sys=None): + """ + initialize the exception + :param str message: a brief description of the problem with the user input + :param str recid: the id of the record that data was provided for + :param str part: the part of the record that was requested for update. Do not provide + this parameter if the entire record was provided. + :param [str] errors: a listing of the individual errors uncovered in the data + """ -class PartNotAccessible(DBIOException): +class PartNotAccessible(DBIORecordException): """ an exception indicating that the user-provided data is invalid or otherwise would result in invalid data content for a record. @@ -526,12 +844,34 @@ def __init__(self, recid, part, message=None, sys=None): :param str part: the part of the record that was requested. Do not provide this parameter if the entire record does not exist. """ - self.record_id = recid - self.record_part = part - if not message: message = "%s: data property, %s, is not in an updateable state" % (recid, part) - super(PartNotAccessible, self).__init__(message, sys=sys) + super(PartNotAccessible, self).__init__(recid, message, sys=sys) + self.record_part = part + +class NotEditable(DBIORecordException): + """ + An error indicating that a requested record cannot be updated because it is in an uneditable state. + """ + def __init__(self, recid, message=None, sys=None): + """ + initialize the exception + """ + if not message: + message = "%s: not in an editable state" % recid + super(NotEditable, self).__init__(recid, message, sys=sys) + +class NotSubmitable(InvalidRecord): + """ + An error indicating that a requested record cannot be finalized and submitted for publication, + typically because it is contains invalid data or is missing required data. + """ + def __init__(self, recid: str, message: str=None, errors: List[str]=None, sys=None): + """ + initialize the exception + """ + if not message: + message = "%s: not in an submitable state" % recid + super(NotSubmitable, self).__init__(message, recid, errors, sys=sys) - diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py index bf7f3af..01c5529 100644 --- a/python/nistoar/midas/dbio/status.py +++ b/python/nistoar/midas/dbio/status.py @@ -17,6 +17,8 @@ EDIT = "edit" # Record is currently being edit for a new released version PROCESSING = "processing" # Record is being processed at the moment and cannot be updated # further until this processing is complete. +READY = "ready" # Record is ready for submission having finalized and passed all + # validation tests. SUBMITTED = "submitted" # Record has been submitted and is either processed or is under review ACCEPTED = "accepted" # Record has been reviewed and is being processed for release INPRESS = "in press" # Record was submitted to the publishing service and is still being processed diff --git a/python/nistoar/pdr/publish/prov.py b/python/nistoar/pdr/publish/prov.py index 11585da..2b1bb5f 100644 --- a/python/nistoar/pdr/publish/prov.py +++ b/python/nistoar/pdr/publish/prov.py @@ -152,6 +152,12 @@ class Action(object): ``DELETE`` the content of the subject was deleted, and the subject identifer is no longer accessible. The ``Action`` should not have object data. + ``PROCESS`` + the subject was submitted for some type of processing. The Action's object field will be an + object that will contain a ``name`` property giving the name of the process or operation + that was applied (e.g. "finalize", "submit", etc.); the other object properties may represent + parameters that were used to control the processing. Some types of processing--most notably, + submitting for publishing--may result in the subject becoming no longer accessible or actionable. ``COMMENT`` This action serves to provide via its message extra information about an action (e.g. as a subaction) or otherwise describe an action that is not strictly one of the above types. @@ -162,15 +168,17 @@ class Action(object): PATCH: str = "PATCH" MOVE: str = "MOVE" DELETE: str = "DELETE" + PROCESS: str = "PROCESS" COMMENT: str = "COMMENT" - types = "CREATE PUT PATCH MOVE DELETE COMMENT".split() + types = "CREATE PUT PATCH MOVE DELETE PROCESS COMMENT".split() TZ = datetime.timezone.utc def __init__(self, acttype: str, subj: str, agent: PubAgent, msg: str = None, obj = None, timestamp: float = 0.0, subacts: List["Action"] = None): """ intialize the action - :param str acttype: the type of action taken; one of CREATE, PUT, PATCH, MOVE, DELETE, COMMENT. + :param str acttype: the type of action taken; one of CREATE, PUT, PATCH, MOVE, DELETE, + PROCESS, COMMENT. :param str subj: the identifier for the part of the dataset that was updated :param PubAgent agent: the agent (person or system) that intiated the action :param msg str: a description of the change (and possibly why). @@ -206,7 +214,7 @@ def __init__(self, acttype: str, subj: str, agent: PubAgent, msg: str = None, ob @property def type(self) -> str: """ - the type of action, one of CREATE, PUT, PATCH, MOVE, DELETE, COMMENT + the type of action, one of CREATE, PUT, PATCH, MOVE, DELETE, PROCESS, COMMENT """ return self._type @@ -304,6 +312,12 @@ def add_subaction(self, action: "Action") -> None: raise TypeError("add_subaction(): input is not an Action: "+str(action)) self._subacts.append(action) + def clear_subactions(self) -> None: + """ + remove all subactions attached to this action + """ + self._subacts = [] + def to_dict(self) -> Mapping: """ convert this Action into JSON-serializable dictionary. This implementation returns an diff --git a/python/tests/nistoar/midas/dbio/test_fsbased.py b/python/tests/nistoar/midas/dbio/test_fsbased.py index 89a553f..ece1430 100644 --- a/python/tests/nistoar/midas/dbio/test_fsbased.py +++ b/python/tests/nistoar/midas/dbio/test_fsbased.py @@ -246,7 +246,7 @@ def test_action_log_io(self): with self.assertRaises(ValueError): self.cli._save_action_data({'goob': 'gurn'}) - recpath = self.cli._root / "action_log" / "goob:gurn.lis" + recpath = self.cli._root / "prov_action_log" / "goob:gurn.lis" self.assertTrue(not recpath.exists()) self.cli._save_action_data({'subject': 'goob:gurn', 'foo': 'bar'}) self.assertTrue(recpath.exists()) @@ -262,7 +262,7 @@ def test_action_log_io(self): self.assertEqual(json.loads(lines[0]), {'subject': 'goob:gurn', 'foo': 'bar'}) self.assertEqual(json.loads(lines[1]), {'subject': 'goob:gurn', 'bob': 'alice'}) - recpath = self.cli._root / "action_log" / "grp0001.lis" + recpath = self.cli._root / "prov_action_log" / "grp0001.lis" self.assertTrue(not recpath.exists()) self.cli._save_action_data({'subject': 'grp0001', 'dylan': 'bob'}) self.assertTrue(recpath.exists()) @@ -281,7 +281,7 @@ def test_action_log_io(self): self.cli._delete_actions_for("grp0001") self.assertTrue(not recpath.exists()) - recpath = self.cli._root / "action_log" / "goob:gurn.lis" + recpath = self.cli._root / "prov_action_log" / "goob:gurn.lis" self.assertTrue(recpath.exists()) self.cli._delete_actions_for("goob:gurn") self.assertTrue(not recpath.exists()) diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index f85a27a..d8eaff3 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -222,24 +222,24 @@ def test_action_log_io(self): self.cli._save_action_data({'goob': 'gurn'}) self.cli._save_action_data({'subject': 'goob:gurn', 'foo': 'bar'}) - self.assertTrue('action_log' in self.cli._db) - self.assertTrue('goob:gurn' in self.cli._db['action_log']) - self.assertEqual(len(self.cli._db['action_log']['goob:gurn']), 1) - self.assertEqual(self.cli._db['action_log']['goob:gurn'][0], + self.assertTrue('prov_action_log' in self.cli._db) + self.assertTrue('goob:gurn' in self.cli._db['prov_action_log']) + self.assertEqual(len(self.cli._db['prov_action_log']['goob:gurn']), 1) + self.assertEqual(self.cli._db['prov_action_log']['goob:gurn'][0], {'subject': 'goob:gurn', 'foo': 'bar'}) self.cli._save_action_data({'subject': 'goob:gurn', 'bob': 'alice'}) - self.assertEqual(len(self.cli._db['action_log']['goob:gurn']), 2) - self.assertEqual(self.cli._db['action_log']['goob:gurn'][0], + self.assertEqual(len(self.cli._db['prov_action_log']['goob:gurn']), 2) + self.assertEqual(self.cli._db['prov_action_log']['goob:gurn'][0], {'subject': 'goob:gurn', 'foo': 'bar'}) - self.assertEqual(self.cli._db['action_log']['goob:gurn'][1], + self.assertEqual(self.cli._db['prov_action_log']['goob:gurn'][1], {'subject': 'goob:gurn', 'bob': 'alice'}) self.cli._save_action_data({'subject': 'grp0001', 'dylan': 'bob'}) - self.assertTrue('action_log' in self.cli._db) - self.assertTrue('grp0001' in self.cli._db['action_log']) - self.assertEqual(len(self.cli._db['action_log']['grp0001']), 1) - self.assertEqual(self.cli._db['action_log']['grp0001'][0], + self.assertTrue('prov_action_log' in self.cli._db) + self.assertTrue('grp0001' in self.cli._db['prov_action_log']) + self.assertEqual(len(self.cli._db['prov_action_log']['grp0001']), 1) + self.assertEqual(self.cli._db['prov_action_log']['grp0001'][0], {'subject': 'grp0001', 'dylan': 'bob'}) acts = self.cli._select_actions_for("goob:gurn") @@ -251,16 +251,16 @@ def test_action_log_io(self): self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) self.cli._delete_actions_for("goob:gurn") - self.assertTrue('action_log' in self.cli._db) - self.assertTrue('goob:gurn' not in self.cli._db['action_log']) - self.assertEqual(len(self.cli._db['action_log']['grp0001']), 1) - self.assertEqual(self.cli._db['action_log']['grp0001'][0], + self.assertTrue('prov_action_log' in self.cli._db) + self.assertTrue('goob:gurn' not in self.cli._db['prov_action_log']) + self.assertEqual(len(self.cli._db['prov_action_log']['grp0001']), 1) + self.assertEqual(self.cli._db['prov_action_log']['grp0001'][0], {'subject': 'grp0001', 'dylan': 'bob'}) self.cli._delete_actions_for("grp0001") - self.assertTrue('action_log' in self.cli._db) - self.assertTrue('goob:gurn' not in self.cli._db['action_log']) - self.assertTrue('grp0001' not in self.cli._db['action_log']) + self.assertTrue('prov_action_log' in self.cli._db) + self.assertTrue('goob:gurn' not in self.cli._db['prov_action_log']) + self.assertTrue('grp0001' not in self.cli._db['prov_action_log']) self.assertEqual(self.cli._select_actions_for("goob:gurn"), []) self.assertEqual(self.cli._select_actions_for("grp0001"), []) @@ -291,8 +291,8 @@ def test_record_action(self): def test_close_actionlog_with(self): prec = base.ProjectRecord(base.DRAFT_PROJECTS, {"id": "pdr0:2222", "name": "brains", "owner": "nist0:ava1"}, self.cli) - finalact = Action(Action.SUBMIT, "pdr0:2222", testuser, "done!") - self.assertNotIn('action_log', self.cli._db) + finalact = Action(Action.PROCESS, "pdr0:2222", testuser, "done!", "submit") + self.assertNotIn('prov_action_log', self.cli._db) self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) # no history should have been written @@ -306,7 +306,7 @@ def test_close_actionlog_with(self): self.assertEqual(len(self.cli._db['history']["pdr0:2222"][0]['history']), 3) self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['recid'], "pdr0:2222") self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['published_as'], "comicbook") - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['close_action'], Action.SUBMIT) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['close_action'], "PROCESS:submit") self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['acls'], {"read": prec.acls._perms['read']}) self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['history'][-1]['message'], "done!") diff --git a/python/tests/nistoar/midas/dbio/test_mongo.py b/python/tests/nistoar/midas/dbio/test_mongo.py index f3e2efe..1561aba 100644 --- a/python/tests/nistoar/midas/dbio/test_mongo.py +++ b/python/tests/nistoar/midas/dbio/test_mongo.py @@ -304,6 +304,7 @@ def test_action_log_io(self): self.assertEqual(len(acts), 1) self.assertEqual(acts[0], {'subject': 'grp0001', 'dylan': 'bob'}) + # _select_actions_for() will return the actions sorted by timestamp acts = self.cli._select_actions_for("goob:gurn") self.assertEqual(len(acts), 2) self.assertEqual(acts[0], {'subject': 'goob:gurn', 'bob': 'alice', 'timestamp': 5}) diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py index 11674ef..a4eb3d4 100644 --- a/python/tests/nistoar/midas/dbio/test_project.py +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -101,6 +101,9 @@ def test_create_record(self): self.assertEqual(prec.data, {}) self.assertEqual(prec.meta, {}) self.assertEqual(prec.owner, "nstr1") + self.assertEqual(prec.status.action, "create") + self.assertEqual(prec.status.message, "draft created") + self.assertEqual(prec.status.state, "edit") self.assertTrue(self.project.dbcli.name_exists("goob")) prec2 = self.project.get_record(prec.id) @@ -183,6 +186,25 @@ def test_update_replace_data(self): with self.assertRaises(project.PartNotAccessible): self.project.update_data(prec.id, 2, "pos/vec/x") + def test_finalize(self): + self.create_service() + prec = self.project.create_record("goob") + self.assertEqual(prec.status.state, "edit") + self.assertIn("created", prec.status.message) + data = self.project.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.project.finalize(prec.id) + stat = self.project.get_status(prec.id) + self.assertEqual(stat.state, "ready") + self.assertEqual(stat.message, "draft is ready for submission") + + prec = self.project.get_record(prec.id) + prec._data['status']['state'] = "ennui" + prec.save() + with self.assertRaises(project.NotEditable): + self.project.finalize(prec.id) + + + class TestProjectServiceFactory(test.TestCase): From bcff094140eb54fb826c31a4178ee0f3480924aa Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 8 Mar 2023 06:24:19 -0500 Subject: [PATCH 060/123] dbio: require write perm on existing record to record an associated action --- python/nistoar/midas/dbio/base.py | 35 ++++++++-- .../tests/nistoar/midas/dbio/test_client.py | 69 +++++++++++++++++++ python/tests/nistoar/midas/dbio/test_inmem.py | 45 ++---------- 3 files changed, 105 insertions(+), 44 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 51ad5fe..1a9d68b 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -1057,16 +1057,32 @@ def delete_record(self, id: str) -> bool: self._delete_from(self._projcoll, id) return True - def record_action(self, act: Action): + def record_action(self, act: Action, coll: str=None): """ - save the given action record to the back-end store + save the given action record to the back-end store. In order to save the action, the + action's subject must identify an existing record and the current user must have write + permission on that record. + + :param Action act: the Action object to save + :param str coll: the collection that the record with the action's subject ID can be + found. If not provided, the current project collection will be assumed. """ if not act.subject: raise ValueError("record_action(): action is missing a subject identifier") if act.type == Action.PROCESS and \ (not isinstance(act.object, Mapping) or 'name' not in act.object): raise ValueError("record_action(): action object is missing name property: "+str(act.object)) - + + # check existence and permission + if not coll: + coll = self._projcoll + rec = self._get_from_coll(coll, act.subject) + if not rec: + raise ObjectNotFound(act.subject) + rec = ProtectedRecord(coll, rec, self) + if not rec.authorized(ACLs.WRITE): + raise NotAuthorized(rec.id, "record action for id="+rec.id) + self._save_action_data(act.to_dict()) @abstractmethod @@ -1094,10 +1110,19 @@ def _close_actionlog_with(self, rec: ProtectedRecord, close_action: Action, extr cancel_if_empty=True): """ archive all actions in the action log for a given ID, ending with the given action. - :param str id: the record identifier to select the actions for + All of the entries associated with the given record will be removed from the action + log and stored into an action archive document in JSON format. + + :param ProtectedRecord rec: the record whose action log is being closed :param Action close_action: the action that is effectively closing the record. This - is usually a SUBMIT action or a DELETE action. + is usually a PROCESS action or a DELETE action. + :param dict extra: additional data to include in the action archive document + :raises NotAuthorized: if the current user does not have write permission to the given + record. """ + if not rec.authorized(ACLs.WRITE): + raise NotAuthorized(self.user_id, "close record history for id="+rec.id) + history = self._select_actions_for(rec.id) if len(history) == 0 and cancel_if_empty: return diff --git a/python/tests/nistoar/midas/dbio/test_client.py b/python/tests/nistoar/midas/dbio/test_client.py index c13ae99..72e514f 100644 --- a/python/tests/nistoar/midas/dbio/test_client.py +++ b/python/tests/nistoar/midas/dbio/test_client.py @@ -3,6 +3,7 @@ import unittest as test from nistoar.midas.dbio import inmem, base +from nistoar.pdr.publish.prov import Action, PubAgent class TestDBClient(test.TestCase): @@ -191,6 +192,74 @@ def test_select_records(self): self.assertIn("mine3", names) self.assertEqual(len(names), 4) + def test_record_action(self): + testuser = PubAgent("nist", PubAgent.USER, self.user) + + rec = self.cli.create_record("mine1") + rec = self.cli.create_record("mine2") + self.assertTrue(self.cli._upsert(base.DRAFT_PROJECTS, + {"id": "mds3:0001", "owner": self.user, "hobby": "whittling"})) + self.assertTrue(self.cli._upsert(base.DRAFT_PROJECTS, + {"id": "mds3:0002", "owner": self.user, "hobby": "whittling"})) + self.cli.record_action(Action(Action.CREATE, "mds3:0001", testuser, "created")) + self.cli.record_action(Action(Action.COMMENT, "mds3:0001", testuser, "i'm hungry")) + self.cli.record_action(Action(Action.COMMENT, "mds3:0002", testuser, "i'm hungry")) + acts = self.cli._select_actions_for("mds3:0001") + self.assertEqual(len(acts), 2) + self.assertEqual(acts[0]['type'], Action.CREATE) + self.assertEqual(acts[1]['type'], Action.COMMENT) + + with self.assertRaises(base.ObjectNotFound): + self.cli.record_action(Action(Action.CREATE, "bob", testuser, "created")) + + cli = self.fact.create_client(base.DRAFT_PROJECTS, {}, "nist:alice") + with self.assertRaises(base.NotAuthorized): + cli.record_action(Action(Action.COMMENT, "mds3:0001", testuser, "crikey")) + + def test_close_actionlog_with(self): + testuser = PubAgent("nist", PubAgent.USER, self.user) + + prec = base.ProjectRecord(base.DRAFT_PROJECTS, + {"id": "pdr0:2222", "name": "brains", "owner": "doc0:sec"}, self.cli) + finalact = Action(Action.PROCESS, "pdr0:2222", testuser, "done!", "submit") + self.assertNotIn('prov_action_log', self.cli._db) + with self.assertRaises(base.NotAuthorized): + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) + self.assertNotIn('prov_action_log', self.cli._db) + + prec = base.ProjectRecord(base.DRAFT_PROJECTS, + {"id": "pdr0:2222", "name": "brains", "owner": self.cli.user_id}, self.cli) + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) + + # no history should have been written + self.assertNotIn('history', self.cli._db) + + self.cli._db[self.cli._projcoll]["pdr0:2222"] = prec.to_dict() + + self.cli.record_action(Action(Action.CREATE, "pdr0:2222", testuser, "created")) + self.cli.record_action(Action(Action.COMMENT, "pdr0:2222", testuser, "i'm hungry")) + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook", "recid": "goob"}) + self.assertIn('history', self.cli._db) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"][0]['history']), 3) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['recid'], "pdr0:2222") + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['published_as'], "comicbook") + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['close_action'], "PROCESS:submit") + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['acls'], + {"read": prec.acls._perms['read']}) + self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['history'][-1]['message'], "done!") + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) + + self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}, False) + self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) + self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 2) + self.assertEqual(self.cli._db['history']["pdr0:2222"][1]['history'][-1]['message'], "done!") + self.assertEqual(len(self.cli._db['history']["pdr0:2222"][1]['history']), 1) + diff --git a/python/tests/nistoar/midas/dbio/test_inmem.py b/python/tests/nistoar/midas/dbio/test_inmem.py index d8eaff3..6000b41 100644 --- a/python/tests/nistoar/midas/dbio/test_inmem.py +++ b/python/tests/nistoar/midas/dbio/test_inmem.py @@ -36,9 +36,9 @@ def test_create_client(self): class TestInMemoryDBClient(test.TestCase): def setUp(self): - self.cfg = {} + self.cfg = { "default_shoulder": "mds3" } self.user = "nist0:ava1" - self.cli = inmem.InMemoryDBClientFactory({}).create_client(base.DMP_PROJECTS, {}, self.user) + self.cli = inmem.InMemoryDBClientFactory({}).create_client(base.DMP_PROJECTS, self.cfg, self.user) def test_next_recnum(self): self.assertEqual(self.cli._next_recnum("goob"), 1) @@ -281,47 +281,14 @@ def test_save_history(self): {'recid': 'goob:gurn', 'alice': 'bob'}) def test_record_action(self): - self.cli.record_action(Action(Action.CREATE, "mds3:0008", testuser, "created")) - self.cli.record_action(Action(Action.COMMENT, "mds3:0008", testuser, "i'm hungry")) - acts = self.cli._select_actions_for("mds3:0008") + rec = self.cli.create_record("mine1") + self.cli.record_action(Action(Action.CREATE, "mds3:0001", testuser, "created")) + self.cli.record_action(Action(Action.COMMENT, "mds3:0001", testuser, "i'm hungry")) + acts = self.cli._select_actions_for("mds3:0001") self.assertEqual(len(acts), 2) self.assertEqual(acts[0]['type'], Action.CREATE) self.assertEqual(acts[1]['type'], Action.COMMENT) - def test_close_actionlog_with(self): - prec = base.ProjectRecord(base.DRAFT_PROJECTS, - {"id": "pdr0:2222", "name": "brains", "owner": "nist0:ava1"}, self.cli) - finalact = Action(Action.PROCESS, "pdr0:2222", testuser, "done!", "submit") - self.assertNotIn('prov_action_log', self.cli._db) - self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) - - # no history should have been written - self.assertNotIn('history', self.cli._db) - - self.cli.record_action(Action(Action.CREATE, "pdr0:2222", testuser, "created")) - self.cli.record_action(Action(Action.COMMENT, "pdr0:2222", testuser, "i'm hungry")) - self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook", "recid": "goob"}) - self.assertIn('history', self.cli._db) - self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) - self.assertEqual(len(self.cli._db['history']["pdr0:2222"][0]['history']), 3) - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['recid'], "pdr0:2222") - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['published_as'], "comicbook") - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['close_action'], "PROCESS:submit") - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['acls'], - {"read": prec.acls._perms['read']}) - self.assertEqual(self.cli._db['history']["pdr0:2222"][0]['history'][-1]['message'], "done!") - self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) - - self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}) - self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) - self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 1) - - self.cli._close_actionlog_with(prec, finalact, {"published_as": "comicbook"}, False) - self.assertEqual(self.cli._select_actions_for("pdr0:2222"), []) - self.assertEqual(len(self.cli._db['history']["pdr0:2222"]), 2) - self.assertEqual(self.cli._db['history']["pdr0:2222"][1]['history'][-1]['message'], "done!") - self.assertEqual(len(self.cli._db['history']["pdr0:2222"][1]['history']), 1) - if __name__ == '__main__': From 981bad0132fc705e589060d109420d9c3912151e Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 8 Mar 2023 07:12:34 -0500 Subject: [PATCH 061/123] dbio.project: add provenance probing tests --- .../tests/nistoar/midas/dbio/test_project.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py index a4eb3d4..1c24be2 100644 --- a/python/tests/nistoar/midas/dbio/test_project.py +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -52,6 +52,12 @@ def create_service(self, request=None): rootlog.getChild("project")) return self.project + def last_action_for(self, recid): + acts = self.project.dbcli._db.get(base.PROV_ACT_LOG, {}).get(recid,[]) + if not acts: + return None + return acts[-1] + def test_ctor(self): self.create_service() self.assertTrue(self.project.dbcli) @@ -113,6 +119,11 @@ def test_create_record(self): self.assertEqual(prec2.meta, {}) self.assertEqual(prec2.owner, "nstr1") + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.CREATE) + self.assertNotIn('subactions', lastact) + with self.assertRaises(project.AlreadyExists): self.project.create_record("goob") @@ -139,6 +150,11 @@ def test_get_data(self): self.assertEqual(self.project.get_data(prec.id, "pos/desc"), {"a": 1}) self.assertEqual(self.project.get_data(prec.id, "pos/desc/a"), 1) + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.CREATE) + self.assertNotIn('subactions', lastact) + with self.assertRaises(project.ObjectNotFound): self.project.get_data(prec.id, "pos/desc/b") with self.assertRaises(project.ObjectNotFound): @@ -161,11 +177,23 @@ def test_update_replace_data(self): prec = self.project.get_record(prec.id) self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PATCH) + self.assertNotIn('subactions', lastact) + data = self.project.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) prec = self.project.get_record(prec.id) self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "B"}}) + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PATCH) + self.assertEqual(len(lastact['subactions']), 1) + self.assertEqual(lastact['subactions'][0]['type'], prov.Action.PATCH) + self.assertEqual(lastact['subactions'][0]['subject'], prec.id+"#data.pos") + data = self.project.update_data(prec.id, "C", "pos/grid") self.assertEqual(data, "C") prec = self.project.get_record(prec.id) @@ -177,6 +205,11 @@ def test_update_replace_data(self): prec = self.project.get_record(prec.id) self.assertEqual(prec.data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PUT) + self.assertNotIn('subactions', lastact) + # update again data = self.project.update_data(prec.id, "blue", "color") self.assertEqual(data, "blue") @@ -186,6 +219,8 @@ def test_update_replace_data(self): with self.assertRaises(project.PartNotAccessible): self.project.update_data(prec.id, 2, "pos/vec/x") + self.assertEqual(len(self.project.dbcli._db.get(base.PROV_ACT_LOG, {}).get(prec.id,[])), 6) + def test_finalize(self): self.create_service() prec = self.project.create_record("goob") From c76c2f87c20bad0ca7432767092b9114e46aded2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 8 Mar 2023 07:49:20 -0500 Subject: [PATCH 062/123] dbio.project: ensure proper state before updating data --- python/nistoar/midas/dbio/project.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index fd3c7ac..20cfe6b 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -248,6 +248,9 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized olddata = None + if _prec.status.state not in [status.EDIT, status.READY]: + raise NotEditable(id) + if not part: # updating data as a whole: merge given data into previously saved data olddata = deepcopy(_prec.data) @@ -383,6 +386,9 @@ def replace_data(self, id, newdata, part=None, message="", _prec=None): _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized olddata = deepcopy(_prec.data) + if _prec.status.state not in [status.EDIT, status.READY]: + raise NotEditable(id) + if not part: # this is a complete replacement; merge it with a starter record data = self._new_data_for(id) @@ -469,6 +475,7 @@ def _save_data(self, indata: Mapping, prec: ProjectRecord, prec.status.act(action, message) elif message is not None: prec.message = message + prec.status.set_state(status.EDIT) prec.save(); return indata From edeb6f18b8fdbd887c7bb91dacfaca4763bc5b7c Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 8 Mar 2023 09:25:37 -0500 Subject: [PATCH 063/123] integrate finalize, submit into dbio.wsgi.project --- python/nistoar/midas/dbio/__init__.py | 3 +- python/nistoar/midas/dbio/project.py | 36 +++-- python/nistoar/midas/dbio/status.py | 6 + python/nistoar/midas/dbio/wsgi/project.py | 151 +++++++++++++++++- .../nistoar/pdr/publish/service/wsgi/base.py | 25 ++- .../nistoar/pdr/publish/service/wsgi/pdp0.py | 17 -- .../nistoar/midas/dbio/wsgi/test_project.py | 148 +++++++++++++++++ 7 files changed, 348 insertions(+), 38 deletions(-) diff --git a/python/nistoar/midas/dbio/__init__.py b/python/nistoar/midas/dbio/__init__.py index c126e11..e94dfa2 100644 --- a/python/nistoar/midas/dbio/__init__.py +++ b/python/nistoar/midas/dbio/__init__.py @@ -203,4 +203,5 @@ MIDASDBClientFactory = MongoDBClientFactory -from .project import ProjectService, ProjectServiceFactory, InvalidUpdate, PartNotAccessible +from .project import (ProjectService, ProjectServiceFactory, InvalidRecord, InvalidUpdate, + PartNotAccessible, NotEditable, NotSubmitable) diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 20cfe6b..c3209fa 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -554,10 +554,12 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): self._record_action(provact) - def update_status_message(self, id: str, message: str, _prec=None): + def update_status_message(self, id: str, message: str, _prec=None) -> status.RecordStatus: """ set the message to be associated with the current status regarding the last action taken on the record with the given identifier + :returns: a Project status instance providing status after updating the message + :rtype: RecordStatus :param str id: the identifier of the record to attach the message to :param str message: the message to attach """ @@ -570,9 +572,11 @@ def update_status_message(self, id: str, message: str, _prec=None): stat.message = message _prec.save() self._record_action(Action(Action.COMMENT, _prec.id, self.who, message)) + + return stat.clone() - def finalize(self, id, message=None, as_version=None, _prec=None): + def finalize(self, id, message=None, as_version=None, _prec=None) -> status.RecordStatus: """ Assume that no more client updates will be applied and apply any final automated updates to the record in preparation for final publication. After the changes are applied, the @@ -580,6 +584,8 @@ def finalize(self, id, message=None, as_version=None, _prec=None): a result. The record must be in the edit state to be applied. :param str id: the identifier of the record to finalize :param str message: a message summarizing the updates to the record + :returns: a Project status instance providing the post-finalization status + :rtype: RecordStatus :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record @@ -593,7 +599,7 @@ def finalize(self, id, message=None, as_version=None, _prec=None): _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized stat = _prec.status - if stat.state != status.EDIT: + if _prec.status.state not in [status.EDIT, status.READY]: raise NotEditable(id) stat.set_state(status.PROCESSING) @@ -631,6 +637,9 @@ def finalize(self, id, message=None, as_version=None, _prec=None): stat.act(self.STATUS_ACTION_FINALIZE, message or defmsg) _prec.save() + return stat.clone() + + def _apply_final_updates(self, prec): # update the data @@ -638,15 +647,15 @@ def _apply_final_updates(self, prec): self._validate_data(prec.data) return "draft is ready for submission" - def submit(self, id: str, message: str=None, _prec=None) -> str: + def submit(self, id: str, message: str=None, _prec=None) -> status.RecordStatus: """ finalize (via :py:meth:`finalize`) the record and submit it for publishing. After a successful submission, it may not be possible to edit or revise the record until the submission process has been completed. The record must be in the "edit" state prior to calling this method. :param str id: the identifier of the record to submit :param str message: a message summarizing the updates to the record - :returns: the label indicating its post-editing state - :rtype: str + :returns: a Project status instance providing the post-submission status + :rtype: RecordStatus :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record @@ -661,7 +670,8 @@ def submit(self, id: str, message: str=None, _prec=None) -> str: """ if not _prec: _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized - self.finalize(id, message, _prec) # may raise NotEditable + stat = _prec.status + self.finalize(id, message, _prec=_prec) # may raise NotEditable # this record is ready for submission. Send the record to its post-editing destination, # and update its status accordingly. @@ -674,7 +684,7 @@ def submit(self, id: str, message: str=None, _prec=None) -> str: {"name": "submit", "errors": ex.errors})) stat.set_state(status.EDIT) stat.act(self.STATUS_ACTION_SUBMIT, ex.format_errors()) - self._try_save(prec) + self._try_save(_prec) raise except Exception as ex: @@ -683,19 +693,21 @@ def submit(self, id: str, message: str=None, _prec=None) -> str: {"name": "submit", "errors": [emsg]})) stat.set_state(status.EDIT) stat.act(self.STATUS_ACTION_SUBMIT, emsg) - self._try_save(prec) + self._try_save(_prec) raise else: # record provenance record self.dbcli.record_action(Action(Action.PROCESS, _prec.id, self.who, defmsg, {"name": "submit"})) - _prec.stat.set_state(status.SUBMITTED) - _prec.stat.act(self.STATUS_ACTION_SUBMIT, message or defmsg) + stat.set_state(status.SUBMITTED) + stat.act(self.STATUS_ACTION_SUBMIT, message or defmsg) _prec.save() + + return stat.clone() - def _submit(prec: ProjectRecord) -> str: + def _submit(self, prec: ProjectRecord) -> str: """ Actually send the given record to its post-editing destination and update its status accordingly. diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py index 01c5529..5c2baba 100644 --- a/python/nistoar/midas/dbio/status.py +++ b/python/nistoar/midas/dbio/status.py @@ -192,6 +192,12 @@ def to_dict(self, with_id=True): out['@id'] = self.id return out + def clone(self): + """ + return a copy that will be detached from its respective ProjectRecord + """ + return RecordStatus(self.id, self.to_dict(False)) + def __str__(self): return str(self.to_dict()) diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 036494f..03feec0 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -94,7 +94,7 @@ def do_GET(self, path, ashead=False): return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id, ashead=ashead) - return self.send_json(prec.to_dict()) + return self.send_json(prec.to_dict(), ashead=ashead) class ProjectInfoHandler(ProjectRecordHandler): @@ -195,7 +195,7 @@ def do_GET(self, path, ashead=False): return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id, ashead=ashead) - return self.send_json(prec.name) + return self.send_json(prec.name, ashead=ashead) def do_PUT(self, path): try: @@ -269,7 +269,7 @@ def do_GET(self, path, ashead=False): "No data found at requested property", self._id, ashead=ashead) return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", self._id, ashead=ashead) - return self.send_json(out) + return self.send_json(out, ashead=ashead) def do_PUT(self, path): try: @@ -289,6 +289,9 @@ def do_PUT(self, path): except dbio.PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") + except dbio.NotEditable as ex: + return self.send_error_resp(409, "Not in editable state", "Record is not in state=edit") + return self.send_json(data) @@ -311,6 +314,8 @@ def do_PATCH(self, path): except dbio.PartNotAccessible as ex: return self.send_error_resp(405, "Data part not updatable", "Requested part of data cannot be updated") + except dbio.NotEditable as ex: + return self.send_error_resp(409, "Not in editable state", "Record is not in state=edit") return self.send_json(data) @@ -454,20 +459,21 @@ def do_GET(self, path, ashead=False): recd = prec.to_dict() if not path: - return self.send_json(recd.get('acls', {})) + return self.send_json(recd.get('acls', {}), ashead=ashead) path = path.strip('/') parts = path.split('/', 1) acl = recd.get('acls', {}).get(parts[0]) if acl is None: if parts[0] not in [dbio.ACLs.READ, dbio.ACLs.WRITE, dbio.ACLs.ADMIN, dbio.ACLs.DELETE]: - return self.send_error_resp(404, "Unsupported ACL type", "Request for unsupported ACL type") + return self.send_error_resp(404, "Unsupported ACL type", + "Request for unsupported ACL type", ashead=ashead) acl = [] if len(parts) < 2: - return self.send_json(acl) + return self.send_json(acl, ashead=ashead) - return self.send_json(parts[1] in acl) + return self.send_json(parts[1] in acl, ashead=ashead) def do_POST(self, path): """ @@ -632,6 +638,130 @@ def do_DELETE(self, path): return self.send_error_resp(405, "DELETE not allowed on this permission type", "Updating specified permission is not allowed") + +class ProjectStatusHandler(ProjectRecordHandler): + """ + handle status requests and actions + """ + _requestable_actions = [ ProjectService.STATUS_ACTION_FINALIZE, ProjectService.STATUS_ACTION_SUBMIT ] + + def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start_resp: Callable, + who: PubAgent, id: str, datapath: str="", config: dict=None, log: Logger=None): + """ + Initialize this data request handler with the request particulars. This constructor is called + by the webs service SubApp in charge of the project record interface. + + :param ProjectService service: the ProjectService instance to use to get and update + the project data. + :param SubApp subapp: the web service SubApp receiving the request and calling this constructor + :param dict wsgienv: the WSGI request context dictionary + :param Callable start_resp: the WSGI start-response function used to send the response + :param PubAgent who: the authenticated user making the request. + :param str id: the ID of the project record being requested + :param str permpath: the subpath pointing to a particular permission ACL; it can either be + simply a permission name, PERM (e.g. "read"), or a p + this will be a '/'-delimited identifier pointing to an object property + within the data object. This will be an empty string if the full data + object is requested. + :param dict config: the handler's configuration; if not provided, the inherited constructor + will extract the configuration from `subapp`. Normally, the constructor + is called without this parameter. + :param Logger log: the logger to use within this handler; if not provided (typical), the + logger attached to the SubApp will be used. + """ + super(ProjectStatusHandler, self).__init__(service, subapp, wsgienv, start_resp, who, datapath, + config, log) + self._id = id + if not id: + # programming error + raise ValueError("Missing ProjectRecord id") + + def do_GET(self, path, ashead=False): + """ + return the status object in response to a GET request + """ + try: + out = self.svc.get_status(self._id) + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except dbio.ObjectNotFound as ex: + if ex.record_part: + return self.send_error_resp(404, "Data property not found", + "No data found at requested property", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + if path == "state": + out = out.state + elif path == "action": + out = out.action + elif path == "message": + out = out.action + elif path: + return self.send_error_resp(404, "Status property not accessible", + "Requested status property is not accessible", self._id, ashead=ashead) + + return self.send_json(out.to_dict(), ashead=ashead) + + def do_PUT(self, path): + """ + request an action to be applied to the record + """ + path = path.strip('/') + if path: + return self.send_error_resp(405, "PUT not allowed", "PUT is not allowed on a status property") + + try: + req = self.get_json_body() + except self.FatalError as ex: + return self.send_fatal_error(ex) + + if not req.get('action'): + return self.send_error_resp(400, "Invalid input: missing action property" + "Input record is missing required action property") + return self._apply_action(req['action'], req.get('message')) + + def do_PATCH(self, path): + """ + request an action to be applied to the record or just update the associated message + """ + path = path.strip('/') + if path: + return self.send_error_resp(405, "PATCH not allowed", "PATCH is not allowed on a status property") + + try: + req = self.get_json_body() + except self.FatalError as ex: + return self.send_fatal_error(ex) + + # if action is not set, the message will just get updated. + return self._apply_action(req.get('action'), req.get('message')) + + def _apply_action(self, action, message=None): + try: + if message and action is None: + stat = self.svc.update_status_message(self._id, message) + elif action == 'finalize': + stat = self.svc.finalize(self._id, message) + elif action == 'submit': + stat = self.svc.submit(self._id, message) + else: + return self.send_error_resp(400, "Unrecognized action", + "Unrecognized action requested") + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except dbio.ObjectNotFound as ex: + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id) + except dbio.InvalidUpdate as ex: + return self.send_error_resp(400, "Request creates an invalid record", ex.format_errors()) + except dbio.NotEditable as ex: + return self.send_error_resp(409, "Not in editable state", "Record is not in state=edit or ready") + except dbio.NotSubmitable as ex: + return self.send_error_resp(409, "Not in editable state", "Record is not in state=edit or ready") + + return self.send_json(stat.to_dict()) + class MIDASProjectApp(SubApp): @@ -644,6 +774,8 @@ class MIDASProjectApp(SubApp): _data_update_handler = ProjectDataHandler _acls_update_handler = ProjectACLsHandler _info_update_handler = ProjectInfoHandler + _status_handler = ProjectStatusHandler + # _history_handler = ProjectHistoryHandler def __init__(self, service_factory: ProjectServiceFactory, log: Logger, config: dict={}): super(MIDASProjectApp, self).__init__(service_factory._prjtype, log, config) @@ -683,6 +815,11 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge idattrpart.append("") return self._data_update_handler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) + elif idattrpart[1] == "status": + # path=ID/status: get or act on the status of the record + if len(idattrpart) == 2: + idattrpart.append("") + return self._status_handler(service, self, env, start_resp, who, idattrpart[0], idattrpart[2]) elif idattrpart[1] == "acls": # path=ID/acls: get/update the access control on record ID if len(idattrpart) < 3: diff --git a/python/nistoar/pdr/publish/service/wsgi/base.py b/python/nistoar/pdr/publish/service/wsgi/base.py index 800a5fd..e773e80 100644 --- a/python/nistoar/pdr/publish/service/wsgi/base.py +++ b/python/nistoar/pdr/publish/service/wsgi/base.py @@ -6,6 +6,7 @@ from typing import Callable from functools import reduce from logging import Logger +from urllib.parse import parse_qs from wsgiref.headers import Headers # from urllib.parse import parse_qs @@ -27,9 +28,17 @@ class Unacceptable(Exception): class Handler(object): """ a default web request handler that also serves as a base class for the - handlers specialized for the supported resource paths. + handlers specialized for the supported resource paths. Key features built into this + class include: + * the ``who`` property that holds the identity of the remote user making the request + * support for an ``action`` query parameter to request an action to be applied to + the resource being requested (perhaps in addition to that implied by the HTTP + request method); see :py:meth:`get_action`. """ default_agent = PubAgent("public", PubAgent.UNKN, "anonymous") + ACTION_UPDATE = '' + ACTION_FINALIZE = "finalize" + ACTION_PUBLISH = "publish" def __init__(self, path: str, wsgienv: dict, start_resp: Callable, who=None, config: dict={}, log: Logger=None): @@ -47,6 +56,20 @@ def __init__(self, path: str, wsgienv: dict, start_resp: Callable, who=None, self._meth = self._env.get('REQUEST_METHOD', 'GET') + def get_action(self): + """ + return the value of the action query parameter of None if it was not provided + """ + qstr = self._env.get('QUERY_STRING') + if not qstr: + return self.ACTION_UPDATE + + params = parse_qs(qstr) + action = params.get('action') + if len(action) > 0 and action[0] in [self.ACTION_FINALIZE, self.ACTION_PUBLISH]: + return action[0] + return self.ACTION_UPDATE + def send_error(self, code, message, content=None, contenttype=None, ashead=None, encoding='utf-8'): """ respond to the client with an error of a given code and reason diff --git a/python/nistoar/pdr/publish/service/wsgi/pdp0.py b/python/nistoar/pdr/publish/service/wsgi/pdp0.py index f1af205..e1b4522 100644 --- a/python/nistoar/pdr/publish/service/wsgi/pdp0.py +++ b/python/nistoar/pdr/publish/service/wsgi/pdp0.py @@ -45,9 +45,6 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge class _Handler(Handler): default_agent = None - ACTION_UPDATE = '' - ACTION_FINALIZE = "finalize" - ACTION_PUBLISH = "publish" def __init__(self, app, path: str, wsgienv: dict, start_resp: Callable, who=None, config: dict={}): self._app = app @@ -56,20 +53,6 @@ def __init__(self, app, path: str, wsgienv: dict, start_resp: Callable, who=None if self._app._recorder: self._reqrec = self._app._recorder.from_wsgi(self._env) - def get_action(self): - """ - return the value of the action query parameter of None if it was not provided - """ - qstr = self._env.get('QUERY_STRING') - if not qstr: - return self.ACTION_UPDATE - - params = parse_qs(qstr) - action = params.get('action') - if len(action) > 0 and action[0] in [self.ACTION_FINALIZE, self.ACTION_PUBLISH]: - return action[0] - return self.ACTION_UPDATE - def send_error_resp(self, code, reason, explain, sipid=None, pdrid=None, ashead=False): """ respond to client with a JSON-formated error response. diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index 2a489ef..e05bf9f 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -635,6 +635,154 @@ def test_get_info(self): body = hdlr.handle() self.assertIn("404 ", self.resp[0]) + def test_get_status(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "big") + self.assertEqual(resp['owner'], "nstr1") + self.assertEqual(resp['id'], "mdm1:0003") + self.assertEqual(resp['status']['state'], "edit") + self.assertEqual(resp['status']['action'], "create") + + self.resp = [] + path = "mdm1:0003/status" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectStatusHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + + self.assertEqual(resp['state'], 'edit') + self.assertEqual(resp['action'], 'create') + self.assertIn('modified', resp) + self.assertIn('since', resp) + self.assertIn('message', resp) + + def test_update_status_message(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "big") + self.assertEqual(resp['owner'], "nstr1") + self.assertEqual(resp['id'], "mdm1:0003") + self.assertEqual(resp['status']['state'], "edit") + self.assertEqual(resp['status']['action'], "create") + self.assertTrue(resp['status'].get('message')) + self.assertNotEqual(resp['status']['message'], 'starting over') + + self.resp = [] + path = "mdm1:0003/status" + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"message": "starting over"})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectStatusHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + + self.assertEqual(resp['state'], 'edit') + self.assertEqual(resp['action'], 'create') + self.assertIn('modified', resp) + self.assertIn('since', resp) + self.assertEqual(resp['message'], 'starting over') + + def test_process(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "big") + self.assertEqual(resp['owner'], "nstr1") + self.assertEqual(resp['id'], "mdm1:0003") + self.assertEqual(resp['status']['state'], "edit") + self.assertEqual(resp['status']['action'], "create") + self.assertTrue(resp['status'].get('message')) + self.assertNotEqual(resp['status']['message'], 'starting over') + + self.resp = [] + path = "mdm1:0003/status" + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"message": "starting over", "action": "sleep"})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectStatusHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("400 ", self.resp[0]) + + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"action": "finalize"})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectStatusHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + + self.assertEqual(resp['state'], 'ready') + self.assertEqual(resp['action'], 'finalize') + self.assertIn('modified', resp) + self.assertIn('since', resp) + self.assertIn('ready', resp['message']) + + self.resp = [] + req['wsgi.input'] = StringIO(json.dumps({"action": "submit", "message": "I'm done!"})) + req['REQUEST_METHOD'] = 'PUT' + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectStatusHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + + self.assertEqual(resp['state'], 'submitted') + self.assertEqual(resp['action'], 'submit') + self.assertIn('modified', resp) + self.assertIn('since', resp) + self.assertEqual(resp['message'], "I'm done!") + + if __name__ == '__main__': From ce4abb7647d48e8b4ff90ae64d9469bd5484d7d7 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 07:23:52 -0500 Subject: [PATCH 064/123] bagit.builder: may disconnect_logfile() more robust --- python/nistoar/pdr/preserve/bagit/builder.py | 4 +-- .../pdr/preserve/bagit/test_builder.py | 30 +++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/python/nistoar/pdr/preserve/bagit/builder.py b/python/nistoar/pdr/preserve/bagit/builder.py index 444b717..aa97b12 100644 --- a/python/nistoar/pdr/preserve/bagit/builder.py +++ b/python/nistoar/pdr/preserve/bagit/builder.py @@ -281,8 +281,8 @@ def logfile_is_connected(self, logfile=None): def _handles_logfile(self, handler, logfilepath): # return True if the handler is set to write to a file with the given # name - return hasattr(handler,'stream') and hasattr(handler.stream, 'name') \ - and os.path.abspath(handler.stream.name) == os.path.abspath(logfilepath) + return hasattr(handler,'stream') and hasattr(handler, 'baseFilename') \ + and os.path.abspath(handler.baseFilename) == os.path.abspath(logfilepath) def _get_log_handler(self, logfilepath): if logfilepath not in self._log_handlers: diff --git a/python/tests/nistoar/pdr/preserve/bagit/test_builder.py b/python/tests/nistoar/pdr/preserve/bagit/test_builder.py index 8e778f4..91fab53 100644 --- a/python/tests/nistoar/pdr/preserve/bagit/test_builder.py +++ b/python/tests/nistoar/pdr/preserve/bagit/test_builder.py @@ -66,18 +66,22 @@ def setUp(self): self.tf.track("testbag") self.tf.track("issued-ids.json") if len(self.bag.plog.handlers) > 0: - print("warning: found stray handlers attached:"+ + print("warning: setUp found stray handlers attached:"+ ("\n".join([str(h) for h in self.bag.plog.handlers]))) def tearDown(self): self.bag.disconnect_logfile() - plog = self.bag.plog - del self.bag + plog = None + if self.bag: + plog = self.bag.plog + self.bag.disconnect_logfile() self.bag = None self.tf.clean() - if len(plog.handlers) > 0: - print("ERROR: found stray handlers attached:"+ - ("\n".join([str(h) for h in self.bag.plog.handlers]))) + if plog and len(plog.handlers) > 0: + print("ERROR: tearDown found stray handlers attached:"+ + ("\n".join([str(h) for h in plog.handlers]))) +# self.bag.disconnect_logfile() + def test_ctor(self): self.assertEqual(self.bag.bagname, "testbag") @@ -119,7 +123,7 @@ def test_ctor_on_existng_dir(self): bagdir = os.path.join(self.tf.root, "testbag") if not os.path.exists(bagdir): os.mkdir(bagdir) - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag.bagname, "testbag") @@ -136,7 +140,7 @@ def test_ctor_on_existng_dir(self): self.assertFalse(self.bag._has_resmd()) def test_ctor_with_id(self): - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg, id="edi00hw91c") @@ -171,7 +175,7 @@ def test_fix_id(self): self.bag._fix_id("ark:/88434/mds2-4193") self.cfg['validate_id'] = False - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag._fix_id("ark:/88434/edi00hw91c"), "ark:/88434/edi00hw91c") @@ -183,7 +187,7 @@ def test_fix_id(self): self.bag._fix_id("ark:/goober/foo") self.cfg['validate_id'] = r'(edi\d)|(mds[01])' - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) with self.assertRaises(ValueError): # validate this one @@ -200,7 +204,7 @@ def test_fix_id(self): self.cfg['validate_id'] = r'(edi\d)|(mds[01])' self.cfg['require_ark_id'] = False - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertEqual(self.bag._fix_id("edi00hw91c"), "edi00hw91c") self.assertEqual(self.bag._fix_id("ark:/88434/edi00hw91c"), @@ -1189,7 +1193,7 @@ def test_update_ediid(self): def test_add_res_nerd(self): self.cfg['ensure_nerdm_type_on_add'] = bldr.NERDM_SCH_ID_BASE + "v0.4" - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertIsNone(self.bag.ediid) with open(simplenerd) as fd: @@ -1244,7 +1248,7 @@ def test_add_ds_pod(self): def test_add_ds_pod_convert(self): self.cfg['ensure_nerdm_type_on_add'] = bldr.NERDM_SCH_ID_BASE + "v0.7" - del self.bag + self.bag.disconnect_logfile() self.bag = bldr.BagBuilder(self.tf.root, "testbag", self.cfg) self.assertIsNone(self.bag.ediid) From 7a690364e158032408601eb66c20c009b6322c9a Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 10:04:23 -0500 Subject: [PATCH 065/123] dbio.project: moved created,modified to status --- python/nistoar/midas/dbio/base.py | 23 +++--- python/nistoar/midas/dbio/status.py | 79 ++++++++++++++----- .../tests/nistoar/midas/dbio/test_record.py | 4 +- .../tests/nistoar/midas/dbio/test_status.py | 23 ++++++ 4 files changed, 98 insertions(+), 31 deletions(-) diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 1a9d68b..739f118 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -180,10 +180,6 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: """ now = time.time() - if 'created' not in recdata: - recdata['created'] = now - if 'modified' not in recdata: - recdata['modified'] = recdata['created'] if not recdata.get('acls'): recdata['acls'] = {} if not recdata.get('owner'): @@ -192,7 +188,7 @@ def _initialize(self, recdata: MutableMapping) -> MutableMapping: # Should be None or a date recdata['deactivated'] = None if 'status' not in recdata: - recdata['status'] = RecordStatus(recdata['id'], {}).to_dict(False) + recdata['status'] = RecordStatus(recdata['id'], {'created': -1}).to_dict(False) for perm in ACLs.OWN: if perm not in recdata['acls']: recdata['acls'][perm] = [recdata['owner']] if recdata['owner'] else [] @@ -214,28 +210,31 @@ def created(self) -> float: """ the epoch timestamp indicating when this record was first corrected """ - return self._data.get('created', 0) + return self.status.created @property def created_date(self) -> str: """ the creation timestamp formatted as an ISO string """ - return datetime.fromtimestamp(math.floor(self.created)).isoformat() + return self.status.created_date @property def modified(self) -> float: """ the epoch timestamp indicating when this record was last updated """ - return self._data.get('modified', self._data.get('created', 0)) + out = self.status.modified + if out < 1: + out = self.status.created + return out @property def modified_date(self) -> str: """ the timestamp for the last modification, formatted as an ISO string """ - return datetime.fromtimestamp(math.floor(self.modified)).isoformat() + return self.status.modified_date @property def deactivated(self) -> bool: @@ -306,12 +305,12 @@ def save(self): """ if not self.authorized(ACLs.WRITE): raise NotAuthorized(self._cli.user_id, "update record") - oldmod = self.modified - self._data['modified'] = time.time() + olddates = (self.status.modified, self.status.created, self.status.since) + self.status.set_times() try: self._cli._upsert(self._coll, self._data) except Exception as ex: - self._data['modified'] = oldmod + (self._data['modified'], self._data['created'], self._data['since']) = olddates raise def authorized(self, perm: Permissions, who: str = None): diff --git a/python/nistoar/midas/dbio/status.py b/python/nistoar/midas/dbio/status.py index 5c2baba..d1a9b9b 100644 --- a/python/nistoar/midas/dbio/status.py +++ b/python/nistoar/midas/dbio/status.py @@ -29,6 +29,7 @@ _since_p = "since" _action_p = "action" _modified_p = "modified" +_created_p = "created" _message_p = "message" # Common record actions @@ -61,17 +62,23 @@ def __init__(self, id: str, status_data: Mapping): if not self._data.get(_action_p): self._data[_action_p] = self.CREATE_ACTION - # try to keep since <= modified by default - if _since_p not in self._data or not isinstance(self._data[_since_p], int): + # try to keep created,since <= modified by default + if _created_p not in self._data or not isinstance(self._data[_created_p], (int, float)): + self._data[_created_p] = self._data.get(_modified_p) \ + if isinstance(self._data.get(_modified_p), float) else 0 + if _since_p not in self._data or not isinstance(self._data[_since_p], (int, float)): self._data[_since_p] = self._data.get(_modified_p) \ - if isinstance(self._data.get(_modified_p), int) else 0 - if _modified_p not in self._data or not isinstance(self._data[_modified_p], int): + if isinstance(self._data.get(_modified_p), float) else 0 + if _modified_p not in self._data or not isinstance(self._data[_modified_p], (int, float)): self._data[_modified_p] = -1 if self._data[_since_p] < 0 else 0 - if self._data[_since_p] < 0: - self._data[_since_p] = time() + now = time() if self._data[_modified_p] < 0: - self._data[_modified_p] = time() + self._data[_modified_p] = now + if self._data[_since_p] < 0 or (self._data[_modified_p] > 0 and self._data[_since_p] < 1): + self._data[_since_p] = now + if self._data[_created_p] < 0 or (self._data[_modified_p] > 0 and self._data[_created_p] < 1): + self._data[_created_p] = now if _message_p not in self._data: self._data[_message_p] = "" @@ -93,7 +100,23 @@ def state(self) -> str: return self._data[_state_p] @property - def since(self) -> int: + def created(self) -> float: + """ + The epoch timestamp when the record entered the current state + """ + return self._data[_created_p] + + @property + def created_date(self) -> str: + """ + the timestamp for when the record entered the current state, formatted as an ISO string + """ + if self.created <= 0: + return "pending" + return datetime.fromtimestamp(math.floor(self.created)).isoformat() + + @property + def since(self) -> float: """ The epoch timestamp when the record entered the current state """ @@ -117,7 +140,7 @@ def action(self) -> str: return self._data[_action_p] @property - def modified(self) -> int: + def modified(self) -> float: """ The epoch timestamp when the latest action was applied to the record. """ @@ -145,12 +168,12 @@ def message(self) -> str: def message(self, val): self._data[_message_p] = val - def act(self, action: str, message: str="", when: int=0): + def act(self, action: str, message: str="", when: float=0): """ record the application of a particular action on the record - :param str action: the name of the action being applied + :param str action: the name of the action being applied :param str message: a statement indicating the reason or intent of the action - :param int when: the epoch timestamp for when the action was applied. A value of + :param float when: the epoch timestamp for when the action was applied. A value of zero (default) indicates that the timestamp should be set when the record is saved. A value less than zero will cause the current time to be set. @@ -165,23 +188,43 @@ def act(self, action: str, message: str="", when: int=0): self._data[_action_p] = action self._data[_message_p] = message self._data[_modified_p] = when + if self._data[_created_p] < 1: + self._data[_created_p] = when - def set_state(self, state, when: int=-1): + def set_state(self, state, when: float=-1): """ record a new state that the record has entered. :param str state: the name of the new state that the record has entered - :param int when: the epoch timestamp for when the state changed. A value of + :param float when: the epoch timestamp for when the state changed. A value of zero indicates that the timestamp should be set when the record is saved. A value less than zero (default) will cause the current time to be set. """ if not state: raise ValueError("State not specified") - if when < 0: - when = time() - self._data[_state_p] = state - self._data[_since_p] = when + if self._data[_state_p] != state: + if when < 0: + when = time() + self._data[_state_p] = state + self._data[_since_p] = when + if self._data[_created_p] < 1: + self._data[_created_p] = when + + def set_times(self, set_modified=True): + """ + update any dates that are waiting to be set. This will be called when the record is + saved. + :param bool set_modified: if True (default), the modified time will always be updated; + otherwise, it is only updated if it is non-positive. + """ + now = time() + if self._data[_created_p] < 1: + self._data[_created_p] = now + if self._data[_since_p] < 1: + self._data[_since_p] = now + if set_modified or self._data[_modified_p] < 1: + self._data[_modified_p] = now def to_dict(self, with_id=True): """ diff --git a/python/tests/nistoar/midas/dbio/test_record.py b/python/tests/nistoar/midas/dbio/test_record.py index 382be49..48a584b 100644 --- a/python/tests/nistoar/midas/dbio/test_record.py +++ b/python/tests/nistoar/midas/dbio/test_record.py @@ -15,6 +15,8 @@ def setUp(self): {"id": "pdr0:2222", "name": "brains", "owner": self.user}, self.cli) def test_ctor(self): + self.rec = base.ProjectRecord(base.DRAFT_PROJECTS, + {"id": "pdr0:2222", "name": "brains", "owner": self.user}, self.cli) self.assertIs(self.rec._cli, self.cli) self.assertEqual(self.rec.id, "pdr0:2222") self.assertEqual(self.rec.name, "brains") @@ -45,7 +47,7 @@ def test_save(self): self.assertNotIn("pdr0:2222", self.cli._db[base.DRAFT_PROJECTS]) self.rec.save() - self.assertGreater(self.rec.modified, self.rec.created) + self.assertGreaterEqual(self.rec.modified, self.rec.created) oldmod = self.rec.modified self.assertIn("pdr0:2222", self.cli._db[base.DRAFT_PROJECTS]) self.assertEqual(self.cli._db[base.DRAFT_PROJECTS]["pdr0:2222"]['name'], "brains") diff --git a/python/tests/nistoar/midas/dbio/test_status.py b/python/tests/nistoar/midas/dbio/test_status.py index 569420d..c634337 100644 --- a/python/tests/nistoar/midas/dbio/test_status.py +++ b/python/tests/nistoar/midas/dbio/test_status.py @@ -15,8 +15,10 @@ def test_ctor(self): self.assertEqual(stat.message, "") self.assertEqual(stat.since, 0) self.assertEqual(stat.modified, 0) + self.assertEqual(stat.created, 0) self.assertEqual(stat.since_date, "pending") self.assertEqual(stat.modified_date, "pending") + self.assertEqual(stat.created_date, "pending") def test_act(self): stat = status.RecordStatus("goob", {"state": status.EDIT, "since": -1}) @@ -26,16 +28,20 @@ def test_act(self): self.assertEqual(stat.message, "") self.assertGreater(stat.since, 0) self.assertGreater(stat.modified, 0) + self.assertGreater(stat.created, 0) self.assertNotEqual(stat.since_date, "pending") self.assertNotEqual(stat.modified_date, "pending") + self.assertNotEqual(stat.created_date, "pending") stat.act(Action.PATCH, "made updates") self.assertEqual(stat.state, status.EDIT) self.assertEqual(stat.action, Action.PATCH) self.assertEqual(stat.message, "made updates") self.assertEqual(stat.modified, 0) + self.assertGreater(stat.created, 0) self.assertNotEqual(stat.since_date, "pending") self.assertEqual(stat.modified_date, "pending") + self.assertNotEqual(stat.created_date, "pending") stat.act(Action.PUT) self.assertEqual(stat.state, status.EDIT) @@ -74,6 +80,23 @@ def test_set_state(self): self.assertLess(stat.modified, stat.since) self.assertNotEqual(stat.since_date, "pending") self.assertNotEqual(stat.modified_date, "pending") + + def test_set_times(self): + stat = status.RecordStatus("goob", {}) + self.assertEqual(stat.since, 0) + self.assertEqual(stat.modified, 0) + self.assertEqual(stat.created, 0) + self.assertEqual(stat.since_date, "pending") + self.assertEqual(stat.modified_date, "pending") + self.assertEqual(stat.created_date, "pending") + + stat.set_times() + self.assertGreater(stat.since, 0) + self.assertGreater(stat.modified, 0) + self.assertGreater(stat.created, 0) + self.assertNotEqual(stat.since_date, "pending") + self.assertNotEqual(stat.modified_date, "pending") + self.assertNotEqual(stat.created_date, "pending") if __name__ == '__main__': From ffc9f364ea2ecc0568d59430a2afceb5dbc8de32 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 16:36:04 -0500 Subject: [PATCH 066/123] prov.py: allow jsonpatch to appear as value of the Action's object dictionary --- python/nistoar/pdr/publish/prov.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/nistoar/pdr/publish/prov.py b/python/nistoar/pdr/publish/prov.py index 2b1bb5f..bd58591 100644 --- a/python/nistoar/pdr/publish/prov.py +++ b/python/nistoar/pdr/publish/prov.py @@ -250,7 +250,7 @@ def message(self) -> str: return self._msg @message.setter - def message(self, msg: str) -> None: + def message(self, message: str) -> None: self._msg = message @property @@ -352,6 +352,14 @@ def _object_to_dict(self): return json.loads(self.object.to_string()) if hasattr(self.object, 'to_dict'): return self.object.to_dict() + if isinstance(self.object, Mapping): + out = OrderedDict() + for k,v in self.object.items(): + if isinstance(v, JsonPatch): + out[k] = json.loads(v.to_string()) + else: + out[k] = v + return out return self.object def _serialize_subactions(self, indent=4) -> str: From a730bdff7004d2c53cbb9f0e3109b30fcf44f9b0 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 16:43:24 -0500 Subject: [PATCH 067/123] publish.service.wsgi: support setting global headers from config --- .../nistoar/pdr/publish/service/wsgi/base.py | 35 ++++++++++++++----- .../nistoar/pdr/publish/service/wsgi/pdp0.py | 3 +- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/python/nistoar/pdr/publish/service/wsgi/base.py b/python/nistoar/pdr/publish/service/wsgi/base.py index e773e80..cda2ea6 100644 --- a/python/nistoar/pdr/publish/service/wsgi/base.py +++ b/python/nistoar/pdr/publish/service/wsgi/base.py @@ -7,6 +7,8 @@ from functools import reduce from logging import Logger from urllib.parse import parse_qs +from collections import OrderedDict +from collections.abc import Mapping from wsgiref.headers import Headers # from urllib.parse import parse_qs @@ -41,7 +43,7 @@ class include: ACTION_PUBLISH = "publish" def __init__(self, path: str, wsgienv: dict, start_resp: Callable, who=None, - config: dict={}, log: Logger=None): + config: dict={}, log: Logger=None, app=None): self._path = path self._env = wsgienv self._start = start_resp @@ -54,6 +56,10 @@ def __init__(self, path: str, wsgienv: dict, start_resp: Callable, who=None, self.who = who self.log = log + self._app = app + if self._app and hasattr(app, 'include_headers'): + self._hdr = Headers(list(app.include_headers.items())) + self._meth = self._env.get('REQUEST_METHOD', 'GET') def get_action(self): @@ -141,7 +147,8 @@ def send_json(self, data, message="OK", code=200, ashead=False, encoding='utf-8' def _send(self, code, message, content, contenttype, ashead, encoding): if ashead is None: ashead = self._meth.upper() == "HEAD" - status = "{0} {1}".format(str(code), message) + # status = "{0} {1}".format(str(code), message) + self.set_response(code, message) if content: if not isinstance(content, list): @@ -156,15 +163,12 @@ def _send(self, code, message, content, contenttype, ashead, encoding): # convert to bytes content = [(isinstance(c, str) and c.encode(encoding)) or c for c in content] - hdrs = [] if contenttype: - hdrs = Headers([]) - hdrs.add_header("Content-Type", contenttype) - hdrs = hdrs.items() + self.add_header("Content-Type", contenttype) if len(content) > 0: - hdrs.append(("Content-Length", str(reduce(lambda x, t: x+len(t), content, 0)))) + self.add_header("Content-Length", str(reduce(lambda x, t: x+len(t), content, 0))) - self._start(status, hdrs, None) + self.end_headers() return (not ashead and content) or [] def add_header(self, name, value): @@ -202,7 +206,7 @@ def end_headers(self): return the body content (as an iterable). """ status = "{0} {1}".format(str(self._code), self._msg) - self._start(status, self._hdr.items()) + self._start(status, self._hdr.items(), None) def handle(self): """ @@ -285,6 +289,19 @@ def __init__(self, appname, log, config=None): wrlogf = os.path.join(cfgmod.global_logdir, wrlogf) self._recorder = WebRecorder(wrlogf, self._name) + self.include_headers = Headers() + if config.get("include_headers"): + try: + if isinstance(config.get("include_headers"), Mapping): + self.include_headers = Headers(list(config.get("include_headers").items())) + elif isinstance(config.get("include_headers"), list): + self._default_headers = Headers(list(config.get("include_headers"))) + else: + raise TypeError("Not a list of 2-tuples") + except TypeError as ex: + raise ConfigurationException("include_headers: must be either a dict or a list of "+ + "name-value pairs") + @abstractmethod def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAgent) -> Handler: """ diff --git a/python/nistoar/pdr/publish/service/wsgi/pdp0.py b/python/nistoar/pdr/publish/service/wsgi/pdp0.py index e1b4522..88547d6 100644 --- a/python/nistoar/pdr/publish/service/wsgi/pdp0.py +++ b/python/nistoar/pdr/publish/service/wsgi/pdp0.py @@ -47,8 +47,7 @@ class _Handler(Handler): default_agent = None def __init__(self, app, path: str, wsgienv: dict, start_resp: Callable, who=None, config: dict={}): - self._app = app - Handler.__init__(self, path, wsgienv, start_resp, who, config, self._app.log) + Handler.__init__(self, path, wsgienv, start_resp, who, config, app.log, app) self._reqrec = None if self._app._recorder: self._reqrec = self._app._recorder.from_wsgi(self._env) From ffa4194faf5c6520163e9c6ab70b38b45ef01b2b Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 16:46:08 -0500 Subject: [PATCH 068/123] Bookkeeping fixes: * move dates to status block * fix update action constant * ensure provenance recording * don't keep instantiating RecordStatus objects * don't record provenance actions if PartNotAccessible * allow editing under status.READY state * wsgi: make use of global header feature --- python/nistoar/midas/dbio/base.py | 8 +- python/nistoar/midas/dbio/project.py | 30 ++- python/nistoar/midas/dbio/wsgi/base.py | 11 +- .../tests/nistoar/midas/dbio/test_project.py | 9 + .../midas/dbio/test_project_fsbased.py | 171 ++++++++++++++++++ .../nistoar/midas/dbio/wsgi/test_project.py | 38 ++-- 6 files changed, 236 insertions(+), 31 deletions(-) create mode 100644 python/tests/nistoar/midas/dbio/test_project_fsbased.py diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 739f118..1af8984 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -167,6 +167,7 @@ def __init__(self, servicetype: str, recdata: Mapping, dbclient: DBClient=None): raise ValueError("Record data is missing its 'id' property") self._data = self._initialize(recdata) self._acls = ACLs(self, self._data.get("acls", {})) + self._status = RecordStatus(self.id, self._data['status']) def _initialize(self, recdata: MutableMapping) -> MutableMapping: """ @@ -287,7 +288,7 @@ def status(self) -> RecordStatus: return the status object that indicates the current state of the record and the last action applied to it. """ - return RecordStatus(self.id, self._data.get('status')) + return self._status @property def acls(self) -> ACLs: @@ -376,8 +377,9 @@ def to_dict(self): out = deepcopy(self._data) out['acls'] = self.acls._perms out['type'] = self._coll - out['createdDate'] = self.created_date - out['modifiedDate'] = self.modified_date + out['status']['createdDate'] = self.status.created_date + out['status']['modifiedDate'] = self.status.modified_date + out['status']['sinceDate'] = self.status.since_date return out class Group(ProtectedRecord): diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index c3209fa..5dcdd4b 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -24,7 +24,7 @@ from nistoar.pdr.publish.prov import PubAgent, Action _STATUS_ACTION_CREATE = RecordStatus.CREATE_ACTION -_STATUS_ACTION_UPDATE = RecordStatus.CREATE_ACTION +_STATUS_ACTION_UPDATE = RecordStatus.UPDATE_ACTION _STATUS_ACTION_CLEAR = "clear" _STATUS_ACTION_FINALIZE = "finalize" _STATUS_ACTION_SUBMIT = "submit" @@ -135,7 +135,7 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: else: prec.save() - self.dbcli.record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) + self._record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) return prec def _get_id_shoulder(self, user: PubAgent): @@ -232,8 +232,6 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): the given ``newdata`` is a value that should be set to the property pointed to by ``part``. :param str message: an optional message that will be recorded as an explanation of the update. - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to ``id``. - If this is not provided, the record will by fetched anew based on the ``id``. :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record @@ -298,6 +296,10 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): try: data = self._save_data(data, _prec, message, set_action and _STATUS_ACTION_UPDATE) + except InvalidUpdate as ex: + provact.message = "Failed to save update due to invalid data: " + ex.format_errors() + raise + except Exception as ex: self.log.error("Failed to save update for project, %s: %s", _prec.id, str(ex)) provact.message = "Failed to save update due to an internal error" @@ -432,12 +434,17 @@ def replace_data(self, id, newdata, part=None, message="", _prec=None): try: data = self._save_data(data, _prec, message, set_action and _STATUS_ACTION_UPDATE) + except PartNotAccessible as ex: + # client request error; don't record action + raise + except Exception as ex: self.log.error("Failed to save update to project, %s: %s", _prec.id, str(ex)) provact.message = "Failed to save update due to an internal error" + self._record_action(provact) raise - finally: + else: self._record_action(provact) return self._extract_data_part(data, part) @@ -498,9 +505,14 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): given by `id`. :raises PartNotAccessible: if clearing of the part of the data specified by `part` is not allowed. """ + set_state = False if not prec: + set_state = True prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + if _prec.status.state not in [status.EDIT, status.READY]: + raise NotEditable(id) + initdata = self._new_data_for(prec.id, prec.meta) if not part: # clearing everything: return record to its initial defaults @@ -542,8 +554,11 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): else: provact = Action(Action.DELETE, tgt, self.who, prec.status.message) + if set_state: + prec.status.set_state(status.EDIT) + try: - self.save() + prec.save() except Exception as ex: self.log.error("Failed to save cleared data for project, %s: %s", tgt, str(ex)) @@ -800,7 +815,7 @@ def __init__(self, message: str=None, recid: str=None, part: str=None, message = "Unknown validation errors encountered while updating data" errors = [] - super(InvalidUpdate, self).__init__(recid, message, sys) + super(InvalidRecord, self).__init__(recid, message, sys) self.record_part = part self.errors = errors @@ -850,6 +865,7 @@ def __init__(self, message: str=None, recid=None, part=None, errors: List[str]=N this parameter if the entire record was provided. :param [str] errors: a listing of the individual errors uncovered in the data """ + super(InvalidUpdate, self).__init__(message, recid, part, errors, sys) class PartNotAccessible(DBIORecordException): """ diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py index 276fe04..3b34026 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -33,12 +33,11 @@ def __init__(self, subapp: SubApp, dbclient: DBClient, wsgienv: dict, start_resp :param Logger log: the logger to use within this handler; if not provided (typical), the logger attached to the SubApp will be used. """ - self._app = subapp - if config is None: - config = self._app.cfg - if not log: - log = self._app.log - Handler.__init__(self, path, wsgienv, start_resp, who, config, log) + if config is None and hasattr(subapp, 'cfg'): + config = subapp.cfg + if not log and hasattr(subapp, 'log'): + log = subapp.log + Handler.__init__(self, path, wsgienv, start_resp, who, config, log, subapp) self._dbcli = dbclient self._reqrec = None if hasattr(self._app, "_recorder") and self._app._recorder: diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py index 1c24be2..2a27723 100644 --- a/python/tests/nistoar/midas/dbio/test_project.py +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -58,6 +58,9 @@ def last_action_for(self, recid): return None return acts[-1] + def assertActionCount(self, recid, count): + self.assertEqual(len(self.project.dbcli._db.get(base.PROV_ACT_LOG, {}).get(recid,[])), count) + def test_ctor(self): self.create_service() self.assertTrue(self.project.dbcli) @@ -171,6 +174,11 @@ def test_update_replace_data(self): self.assertEqual(prec.id, "mdm1:0003") self.assertEqual(prec.data, {}) self.assertEqual(prec.meta, {}) + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.CREATE) + self.assertNotIn('subactions', lastact) +# self.assertEqual(len(lastact['subactions']), 1) data = self.project.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) @@ -181,6 +189,7 @@ def test_update_replace_data(self): self.assertEqual(lastact['subject'], prec.id) self.assertEqual(lastact['type'], prov.Action.PATCH) self.assertNotIn('subactions', lastact) + self.assertActionCount(prec.id, 2) data = self.project.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) diff --git a/python/tests/nistoar/midas/dbio/test_project_fsbased.py b/python/tests/nistoar/midas/dbio/test_project_fsbased.py new file mode 100644 index 0000000..12fb770 --- /dev/null +++ b/python/tests/nistoar/midas/dbio/test_project_fsbased.py @@ -0,0 +1,171 @@ +import os, json, pdb, logging, tempfile +from pathlib import Path +import unittest as test + +from nistoar.midas.dbio import fsbased, base +from nistoar.midas.dbio import project +from nistoar.pdr.publish import prov + +tmpdir = tempfile.TemporaryDirectory(prefix="_test_project.") +loghdlr = None +rootlog = None +def setUpModule(): + global loghdlr + global rootlog + rootlog = logging.getLogger() + loghdlr = logging.FileHandler(os.path.join(tmpdir.name,"test_pdp.log")) + loghdlr.setLevel(logging.DEBUG) + rootlog.addHandler(loghdlr) + +def tearDownModule(): + global loghdlr + if loghdlr: + if rootlog: + rootlog.removeHandler(loghdlr) + loghdlr.flush() + loghdlr.close() + loghdlr = None + tmpdir.cleanup() + +nistr = prov.PubAgent("midas", prov.PubAgent.USER, "nstr1") + +class TestInMemoryDBClientFactory(test.TestCase): + + def setUp(self): + self.outdir = tempfile.TemporaryDirectory(prefix="_test_dbclient.", dir=".") + self.cfg = { "goob": "gurn" } + self.fact = fsbased.FSBasedDBClientFactory(self.cfg, self.outdir.name) + + def tearDown(self): + self.outdir.cleanup() + +class TestProjectService(test.TestCase): + + def setUp(self): + self.cfg = { + "clients": { + "midas": { + "default_shoulder": "mdm1" + }, + "default": { + "default_shoulder": "mdm0" + } + }, + "dbio": { + "allowed_project_shoulders": ["mdm1", "spc1"], + "default_shoulder": "mdm0" + } + } + self.outdir = tempfile.TemporaryDirectory(prefix="_test_dbclient.", dir=".") + self.fact = fsbased.FSBasedDBClientFactory(self.cfg["dbio"], self.outdir.name) + + def tearDown(self): + self.outdir.cleanup() + + def create_service(self, request=None): + self.project = project.ProjectService(base.DMP_PROJECTS, self.fact, self.cfg, nistr, + rootlog.getChild("project")) + return self.project + + def last_action_for(self, recid): + recpath = os.path.join(self.fact._dbroot, base.PROV_ACT_LOG, (recid+".lis")) + line = None + with open(recpath) as fd: + for line in fd: + pass + return json.loads(line) + + def assertActionCount(self, recid, count): + recpath = os.path.join(self.fact._dbroot, base.PROV_ACT_LOG, (recid+".lis")) + with open(recpath) as fd: + i = 0 + for line in fd: + i += 1 + self.assertEqual(i, count) + + def test_ctor(self): + self.create_service() + self.assertTrue(self.project.dbcli) + self.assertEqual(self.project.cfg, self.cfg) + self.assertEqual(self.project.who.actor, "nstr1") + self.assertEqual(self.project.who.group, "midas") + self.assertTrue(self.project.log) + + def test_update_replace_data(self): + self.create_service() + self.assertTrue(not self.project.dbcli.name_exists("goob")) + + prec = self.project.create_record("goob") + self.assertEqual(prec.name, "goob") + self.assertEqual(prec.id, "mdm1:0001") + self.assertEqual(prec.data, {}) + self.assertEqual(prec.meta, {}) + self.assertEqual(prec.status.state, "edit") + self.assertEqual(prec.status.action, "create") + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.CREATE) + self.assertNotIn('subactions', lastact) +# self.assertEqual(len(lastact['subactions']), 1) + + data = self.project.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.assertEqual(prec.status.state, "edit") + self.assertEqual(prec.status.action, "update") + + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PATCH) + self.assertNotIn('subactions', lastact) + self.assertActionCount(prec.id, 2) + + data = self.project.update_data(prec.id, {"y": 1, "z": 10, "grid": "B"}, "pos") + self.assertEqual(data, {"x": 23, "y": 1, "z": 10, "grid": "B"}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "B"}}) + + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PATCH) + self.assertEqual(len(lastact['subactions']), 1) + self.assertEqual(lastact['subactions'][0]['type'], prov.Action.PATCH) + self.assertEqual(lastact['subactions'][0]['subject'], prec.id+"#data.pos") + + data = self.project.update_data(prec.id, "C", "pos/grid") + self.assertEqual(data, "C") + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 1, "z": 10, "grid": "C"}}) + + # replace + data = self.project.replace_data(prec.id, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + self.assertEqual(data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + lastact = self.last_action_for(prec.id) + self.assertEqual(lastact['subject'], prec.id) + self.assertEqual(lastact['type'], prov.Action.PUT) + self.assertNotIn('subactions', lastact) + + # update again + data = self.project.update_data(prec.id, "blue", "color") + self.assertEqual(data, "blue") + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "blue", "pos": {"vec": [15, 22, 1], "grid": "Z"}}) + + with self.assertRaises(project.PartNotAccessible): + self.project.update_data(prec.id, 2, "pos/vec/x") + + self.assertActionCount(prec.id, 6) + + + + + + + + +if __name__ == '__main__': + test.main() diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index e05bf9f..9452008 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -57,12 +57,15 @@ def setUp(self): "superusers": [ "rlp" ], "allowed_project_shoulders": ["mdm1", "spc1"], "default_shoulder": "mdm0" + }, + "include_headers": { + "Access-Control-Allow-Origin": "*" } } self.dbfact = inmem.InMemoryDBClientFactory({}, { "nextnum": { "mdm1": 2 }}) self.svcfact = prj.ProjectServiceFactory(base.DMP_PROJECTS, self.dbfact, self.cfg, rootlog.getChild("midas.prj")) - self.app = prj.MIDASProjectApp(self.svcfact, rootlog.getChild("dmpapi")) + self.app = prj.MIDASProjectApp(self.svcfact, rootlog.getChild("dmpapi"), self.cfg) self.resp = [] self.rootpath = "/midas/dmp/" @@ -85,7 +88,7 @@ def test_create_handler_name(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectNameHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -112,6 +115,11 @@ def test_get_name(self): body = hdlr.handle() self.assertIn("404 ", self.resp[0]) + # Check for CORS header + cors = [h for h in self.resp if h.startswith("Access-Control-Allow-Origin")] + self.assertGreater(len(cors), 0) + self.assertTrue(cors[0].startswith("Access-Control-Allow-Origin: *")) + def test_put_name(self): path = "mdm1:0003/name" req = { @@ -178,7 +186,7 @@ def test_create_handler_full(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -189,7 +197,7 @@ def test_create_handler_full(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") @@ -258,7 +266,7 @@ def test_create(self): req['wsgi.input'] = StringIO(json.dumps({"data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("400 ", self.resp[0]) @@ -267,7 +275,7 @@ def test_create(self): req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("201 ", self.resp[0]) @@ -287,7 +295,7 @@ def test_search(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("200 ", self.resp[0]) @@ -340,7 +348,7 @@ def test_getput_data(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -411,7 +419,7 @@ def test_create_handler_datapart(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "authors") self.assertEqual(hdlr._id, "pdr0:0012") @@ -423,7 +431,7 @@ def test_create_handler_acls(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -460,7 +468,7 @@ def test_getupd_aclsperm(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "read") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -528,7 +536,7 @@ def test_getdel_aclspermmem(self): } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectACLsHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "write/hank") self.assertEqual(hdlr._id, "mdm1:0003") body = hdlr.handle() @@ -644,7 +652,7 @@ def test_get_status(self): req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("201 ", self.resp[0]) @@ -683,7 +691,7 @@ def test_update_status_message(self): req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("201 ", self.resp[0]) @@ -725,7 +733,7 @@ def test_process(self): req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", "data": {"color": "red"}})) hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) - self.assertEqual(hdlr.cfg, {}) + self.assertNotEqual(hdlr.cfg, {}) self.assertEqual(hdlr._path, "") body = hdlr.handle() self.assertIn("201 ", self.resp[0]) From 9643a9742fee787b3097376fd9d7e839176fa828 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 18:46:32 -0500 Subject: [PATCH 069/123] update the DMP API doc for the new status property --- docs/dmpsvc-openapi.yml | 236 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 222 insertions(+), 14 deletions(-) diff --git a/docs/dmpsvc-openapi.yml b/docs/dmpsvc-openapi.yml index 4354d08..7302307 100644 --- a/docs/dmpsvc-openapi.yml +++ b/docs/dmpsvc-openapi.yml @@ -310,6 +310,140 @@ paths: schema: "$ref": "#/components/schemas/ErrorResponse" + /mdm1/{projid}/status: + summary: + information about the status of the record, including its current state and the last action + applied to the record. + parameters: + "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record + get: + summary: "the current status data" + description: + this returns the value of the "status" property that is returned by the "/mdm1/{projid}" + endpoint. The properties in this describe the current state of the record. + responses: + "200": + description: The DMP record was found and its status info was returned. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + put: + summary: "request an action be applied to the record" + description: + This endpoint can be used to request special processing be applied to the record. The + "action" property in the input object indicates what action should be applied. The only + accepted values are "finalize" and "submit". The finalize action will cause the last of + the automated updates be applied + before the record can be submitted for publishing. For example, the version + that the record will be published as will be set as part of this action. Applying finalize + before the submit action allows the client to give the record one last examination before + submission. The submit action will apply the finalize action (again) and then submit the + record to be published. Note that all input properties besides "action" and "message" will + be ignored. + responses: + "200": + description: + The requested action was successfully applied to the record. If the + requested action was "finalize", the new state returned will be "ready". + If the requested action was "submit", the new state will be "submitted". + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "202": + description: + The requested action was initiated on the record and is still underway. + The new state returned will be "processing". The record must leave this + state before further edits or actions can be applied. If the processing + eventually fails, the "message" property will be set to an error message. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "400": + description: + The inputs for the requested action were illegal in some way. In particular, an + unsupported "action" value will result in this error. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + patch: + summary: "update the status (synonymous to a PUT request)" + description: + This method is synonymous to a PUT request in that it can also be used to request + special processing. In addition, if the input includes only the "message" + property, the status message will just be updated with no other processing applied. + responses: + "200": + description: + The requested action was successfully applied to the record or the message was + updated (depending on whether an action was requested). + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "202": + description: + Special processing was requested via the "action" input property; the + processing was initiated on the record and is still underway. + The new state returned will be "processing". The record must leave this + state before further edits or actions can be applied. If the processing + eventually fails, the "message" property will be set to an error message. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "400": + description: + The inputs for the requested action were illegal in some way. In particular, an + unsupported "action" value will result in this error. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DMP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + components: parameters: projid: @@ -365,20 +499,13 @@ components: type: string minimum: 1 required: true - "created": - description: the epoch date-time that this record was created - type: integer - required: true - "createdDate": - description: the ISO 8601-formatted data-time that this record was created - type: string - "lastModified": - description: the epoch date-time that this record was last updated via the API - type: integer - required: true - "lastModifiedDate": - description: the ISO 8601-formatted data-time that this record was last updated via the API - type: string + "status": + description: + information describing the current state of the record and the last change that was + applied to it. + "$ref": "#/components/schemas/RecordStatus" + minimum: 1 + maximum: 1 "curators": description: the list of IDs for people who have been assigned as curators for this record; it will be empty @@ -400,6 +527,87 @@ components: description: metadata associated with the client, managed by the service type: object + ActionRequest: + title: a form of a RecordStatus that is used to request an action be applied to the record + description: + Use this payload type to request that an action (e.g. "finalize" or "submit") be applied to + the record. The finalize action will cause the last of the automated updates be applied + before the record can be submitted for publishing. For example, the version + that the record will be published as will be set as part of this action. Applying finalize + before the submit action allows the client to give the record one last examination before + submission. The submit action will apply the finalize action (again) and then submit the + record to be published. + properties: + "action": + description: + the name of the action to apply. Currently, only "finalize" and "submit" are allowed + action names that can be applied. If not provided, only the status message will be + updated. + type: string + minimum: 0 + maximum: 1 + "message": + description: + an optional message to record as to the reason or intent for applying the action. If, + for example, the action is submit which would revise a previous publication, the client + can providea message describing what has changed. If not provided, a default message + will be recorded. + + RecordStatus: + title: a description of the current state of the record + description: + This object provides information about the current state of the record and the action that + was applied to it. It also includes the three timestamps--when it was created, last + modified, and when it entered its current state. + properties: + "state": + description: + a label indicating it stage in its life-cycle. Possible values are "edit", "processing", + "ready", "submitted", "published", and "unwell". When a DMP is created, it enters the + "edit" state. After being finalized, it will be in the "ready" state. Further updates + to the record are only allowed when it is in the "edit" or "ready" state. + After it is submitted, it will first be in the "submitted" state and then eventually the + "published" state. If it gets into an erroneous state that cannot be auto-corrected, + it may go into the "unwell" state. + type: string + minimum: 1 + enum: ["edit", "processing", "ready", "submitted", "published", "unwell" ] + "action": + description: + the name of the last action that was applied. Possible values include "create", + "update", "finalize", and "submit". + type: string + minimum: 1 + maximum: 1 + "message": + description: + a human-oriented message describing the last action applied to the record. In some + cases this can be client provided. + type: string + minimum: 1 + maximum: 1 + "created": + description: the epoch timestamp in fractional seconds when the record was created + type: number + "createdDate": + description: the ISO 8601-formatted data-time that this record was created + type: string + "modified": + description: the epoch timestamp in fractional seconds when the record was last updated + type: number + "modifiedDate": + description: the ISO 8601-formatted data-time that this record was last updated + type: string + "since": + description: + the epoch timestamp in fractional seconds when the record was entered its current state. + For example, if the current state is "submitted", this is the date that the record was + submitted for publication. + type: number + "sinceDate": + description: the ISO 8601-formatted data-time that this record entered its current state + type: string + CreateRecordRequest: title: a form of a ProjectRecord that is used as a request to create a new one description: From 17b3c797944ccc15d54b7aeb64a5049be09b3cbf Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 18:48:15 -0500 Subject: [PATCH 070/123] midasserver: open up CORS permission --- docker/midasserver/midas-dmpdap_conf.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index a853c98..8d88f14 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -25,6 +25,9 @@ services: allowed_project_shoulders: ["mdsx", "mds3", "mds0", "pdr0"] default_shoulder: mdsx + include_headers: + "Access-Control-Allow-Origin": "*" + default_convention: mdsx conventions: mdsx: @@ -54,6 +57,9 @@ services: allowed_project_shoulders: ["mdm0", "mdm1"] default_shoulder: mdm1 + include_headers: + "Access-Control-Allow-Origin": "*" + default_convention: mdm1 conventions: mdm1: From 82f23c19d86d843ad52ea84da8005c7b03a5ecb2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 10 Mar 2023 18:48:47 -0500 Subject: [PATCH 071/123] scripts/install.sh: fix installation of python code --- scripts/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install.sh b/scripts/install.sh index 57829f2..6989898 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -22,6 +22,7 @@ oarmd_pkg=$base/metadata #install the PDR python library mkdir -p $PY_LIBDIR echo Installing python libraries into $PY_LIBDIR... +(cd $PY_LIBDIR && PY_LIBDIR=$PWD) (cd $SOURCE_DIR/python && python setup.py install --install-purelib=$PY_LIBDIR --install-scripts=$BINDIR) #install the JAVA jars From ad54b8917ef176255186a6b248658257ef2b458e Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 11 Mar 2023 17:03:47 -0500 Subject: [PATCH 072/123] dap mds3: integrate provenance tracking --- python/nistoar/midas/dap/nerdstore/base.py | 3 + python/nistoar/midas/dap/service/mds3.py | 527 ++++++++++++++------- 2 files changed, 358 insertions(+), 172 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py index e0fd111..3c56e44 100644 --- a/python/nistoar/midas/dap/nerdstore/base.py +++ b/python/nistoar/midas/dap/nerdstore/base.py @@ -714,6 +714,9 @@ def path_exists(self, filepath: str) -> bool: """ raise NotImplementedError() + def __contains__(self, idorpath: str) -> bool: + return self.exists(idorpath) or self.path_exists(idorpath) + @abstractmethod def path_is_collection(self, filepath: str) -> bool: """ diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 24e48fc..f220984 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -28,11 +28,12 @@ InvalidUpdate, ObjectNotFound, PartNotAccessible, ProjectService, ProjectServiceFactory, DAP_PROJECTS) from ...dbio.wsgi.project import MIDASProjectApp, ProjectDataHandler, SubApp +from ...dbio import status from nistoar.base.config import ConfigurationException, merge_config from nistoar.nerdm import constants as nerdconst, utils as nerdutils from nistoar.pdr import def_schema_dir, def_etc_dir, constants as const from nistoar.pdr.utils import build_mime_type_map, read_json -from nistoar.pdr.publish.prov import PubAgent +from nistoar.pdr.publish.prov import PubAgent, Action from . import validate from .. import nerdstore @@ -270,6 +271,7 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: self.log.error("Error while cleaning up DAP record after create failure: %s", str(ex)) raise + self._record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) return prec def _new_data_for(self, recid, meta=None, schemaid=None): @@ -445,7 +447,7 @@ def replace_data(self, id, newdata, part=None): """ return self._update_data(id, newdata, part, replace=True) - def update_data(self, id, newdata, part=None): + def update_data(self, id, newdata, part=None, message="", _prec=None): """ merge the given data into the currently save data content for the record with the given identifier. :param str id: the identifier for the record whose data should be updated. @@ -453,9 +455,7 @@ def update_data(self, id, newdata, part=None): :param stt part: the slash-delimited pointer to an internal data property. If provided, the given ``newdata`` is a value that should be set to the property pointed to by ``part``. - :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to - ``id``. If this is not provided, the record will by fetched anew based on - the ``id``. + :param str message: an optional message that will be recorded as an explanation of the update. :raises ObjectNotFound: if no record with the given ID exists or the ``part`` parameter points to an undefined or unrecognized part of the data :raises NotAuthorized: if the authenticated user does not have permission to read the record @@ -465,7 +465,7 @@ def update_data(self, id, newdata, part=None): :raises InvalidUpdate: if the provided ``newdata`` represents an illegal or forbidden update or would otherwise result in invalid data content. """ - return self._update_data(id, newdata, part, replace=False) + return self._update_data(id, newdata, part, replace=False, message="", prec=_prec) def clear_data(self, id, part=None, _prec=None): """ @@ -484,7 +484,9 @@ def clear_data(self, id, part=None, _prec=None): :raises PartNotAccessible: if clearing of the part of the data specified by ``part`` is not allowed. """ + set_state = False if not _prec: + set_state = True _prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized if not self._store.exists(id): @@ -493,37 +495,85 @@ def clear_data(self, id, part=None, _prec=None): self._store.load_from(nerd) nerd = self._store.open(id) - if part: - if part == "authors": + try: + if part: + what = part + if part == "authors": + nerd.authors.empty() + elif part == "references": + nerd.references.empty() + elif part == FILE_DELIM: + what = "files" + nerd.files.empty() + elif part == LINK_DELIM: + what = "links" + nerd.nonfiles.empty() + elif part == "components": + nerd.files.empty() + nerd.nonfiles.empty() + elif part in "title rights disclaimer description".split(): + resmd = nerd.get_res_data() + del resmd[part] + nerd.replace_res_data(resmd) + else: + raise PartNotAccessible(_prec.id, path, "Clearing %s not allowed" % path) + + provact = Action(Action.PATCH, _prec.id, self.who, "clearing "+what) + part = ("/"+part) if part.startswith("pdr:") else ("."+part) + provact.add_subaction(Action(Action.DELETE, _prec.id+"#data"+part, self.who, + "clearing "+what)) + prec.status.act(self.STATUS_ACTION_CLEAR, "cleared "+what) + + else: nerd.authors.empty() - elif part == "references": nerd.references.empty() - elif part == FILE_DELIM: nerd.files.empty() - elif part == LINK_DELIM: nerd.nonfiles.empty() - elif part == "components": - nerd.files.empty() - nerd.nonfiles.empty() - elif part in "title rights disclaimer description".split(): - resmd = nerd.get_res_data() - del resmd[part] - nerd.replace_res_data(resmd) - else: - raise PartNotAccessible(_prec.id, path, "Clearing %s not allowed" % path) + nerd.replace_res_data(self._new_data_for(_prec.id, prec.meta)) - else: - nerd.authors.empty() - nerd.references.empty() - nerd.files.empty() - nerd.nonfiles.empty() - nerd.replace_res_data(self._new_data_for(_prec.id, prec.meta)) + provact = Action(Action.PATCH, _prec.id, self.who, "clearing all NERDm data") + prec.status.act(self.STATUS_ACTION_CLEAR, "cleared all NERDm data") + + except PartNotAccessible: + # client request error; don't record action + raise + + except Exception as ex: + self.log.error("Failed to clear requested NERDm data, %s: %s", _prec.id, str(ex)) + self.log.warning("Partial update is possible") + provact.message = "Failed to clear requested NERDm data" + self._record_action(provact) + + prec.status.act(self.STATUS_ACTION_CLEAR, "Failed to clear NERDm data") + prec.set_state(status.EDIT) + prec.data = self._summarize(nerd) + self._try_save(prec) + raise + + prec.data = self._summarize(nerd) + if set_state: + prec.status.set_state(status.EDIT) + try: + prec.save() + + except Exception as ex: + self.log.error("Failed to saved DBIO record, %s: %s", prec.id, str(ex)) + raise - def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=False): + finally: + self._record_action(provact) + + + def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=False, message=""): + set_action = False if not prec: + set_action = True # setting the last action will NOT be the caller's responsibility prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized + if prec.status.state not in [status.EDIT, status.READY]: + raise NotEditable(id) + if not nerd: if not self._store.exists(id): self.log.warning("NERDm data for id=%s not found in metadata store", prec.id) @@ -534,10 +584,14 @@ def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=Fal nerd = self._store.open(id) + provact = Action(Action.PUT if not part and replace else Action.PATCH, + prec.id, self.who, message) + if not part: # this is a complete replacement; save updated NERDm data to the metadata store try: - data = self._update_all_nerd(prec, nerd, newdata, replace) + # prep the provenance record + data = self._update_all_nerd(prec, nerd, newdata, provact, replace) except InvalidUpdate as ex: ex.record_id = prec.id raise @@ -551,9 +605,7 @@ def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=Fal ex.record_part = part raise - prec.data = self._summarize(nerd) - prec.save() - + self._save_data(self._summarize(nerd), prec, message, set_action and self.STATUS_ACTION_UPDATE) return data def _summarize(self, nerd: NERDResource): @@ -575,7 +627,9 @@ def _summarize(self, nerd: NERDResource): "bureauCode programCode systemOfRecords primaryITInvestmentUII " + \ "doi ediid releaseHistory status theme").split() - def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mapping, replace=False): + def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, + data: Mapping, provact: Action, replace=False): + # filter out properties that the user is not allow to update newdata = OrderedDict() for prop in data: @@ -631,38 +685,86 @@ def _update_all_nerd(self, prec: ProjectRecord, nerd: NERDResource, data: Mappin raise InvalidUpdate("Input validation error: "+str(errors[0]), prec.id, errors=errors) # all data is merged and validated; now commit - nerd.replace_res_data(newdata) - if replace: - nerd.authors.empty() - if authors: - nerd.authors.replace_all_with(authors) - nerd.references.empty() - if refs: - nerd.references.replace_all_with(refs) - nerd.nonfiles.empty() - if nonfiles: - nerd.nonfiles.replace_all_with(nonfiles) - else: - def put_listitem_into(item, objlist): - if item.get("@id"): - objlist.set(item.get("@id"), item) + try: + old = nerd.get_res_data() + nerd.replace_res_data(newdata) + provact.add_subaction(Action(Action.PUT if replace else Action.PATCH, + prec.id+"#data/pdr.r", self.who, + "updating resource-level metadata", + self._jsondiff(old, newdata))) + + if replace: + old = nerd.authors.get_data() + nerd.authors.empty() + if authors: + provact.add_subaction(Action(Action.PUT, prec.id+"#data.authors", self.who, + "replacing authors", self._jsondiff(old, authors))) + nerd.authors.replace_all_with(authors) else: - objlist.append(item) - def put_each_into(data, objlist): - for item in data: - put_listitem_into(item, objlist) - - if authors: - put_each_into(authors, nerd.authors) - if refs: - put_each_into(refs, nerd.references) - if nonfiles: - put_each_into(nonfiles, nerd.nonfiles) + provact.add_subaction(Action(Action.DELETE, prec.id+"#data.authors", self.who, + "removing authors")) - if replace: - nerd.files.empty() - for fmd in files: - nerd.files.set_file_at(fmd) + old = nerd.references.get_data() + nerd.references.empty() + if refs: + provact.add_subaction(Action(Action.PUT, prec.id+"#data.references", self.who, + "replacing references", self._jsondiff(old, refs))) + nerd.references.replace_all_with(refs) + else: + provact.add_subaction(Action(Action.DELETE, prec.id+"#data.references", self.who, + "removing references")) + + old = nerd.nonfiles.get_data() + nerd.nonfiles.empty() + if nonfiles: + provact.add_subaction(Action(Action.PUT, prec.id+"#data/pdr:see", self.who, + "replacing non-file components", self._jsondiff(old, nonfiles))) + nerd.nonfiles.replace_all_with(nonfiles) + else: + provact.add_subaction(Action(Action.DELETE, prec.id+"#data/pdr:see", self.who, + "removing non-file components")) + + else: + def put_listitem_into(item, objlist): + if item.get("@id"): + objlist.set(item.get("@id"), item) + else: + objlist.append(item) + def put_each_into(data, objlist): + for item in data: + put_listitem_into(item, objlist) + + if authors: + provact.add_subaction(Action(Action.PATCH, prec.id+"#data.authors", self.who, + "updating authors", self._jsondiff(old, nonfiles))) + put_each_into(authors, nerd.authors) + if refs: + provact.add_subaction(Action(Action.PATCH, prec.id+"#data.references", self.who, + "updating references", self._jsondiff(old, refs))) + put_each_into(refs, nerd.references) + if nonfiles: + provact.add_subaction(Action(Action.PATCH, prec.id+"#data/pdr:see", self.who, + "updating non-file components", self._jsondiff(old, nonfiles))) + put_each_into(nonfiles, nerd.nonfiles) + + if replace: + provact.add_subaction(Action(Action.PUT, prec.id+"#data/pdr:f", self.who, + "replacing non-file components")) + nerd.files.empty() + else: + provact.add_subaction(Action(Action.PUT, prec.id+"#data/pdr:f", self.who, + "replacing non-file components")) + for fmd in files: + nerd.files.set_file_at(fmd) + + except Exception as ex: + provact.message = "Failed to save NERDm data update due to internal error" + self.log.error("Failed to save NERDm metadata: "+str(ex)) + self.log.warning("Failed NERDm save may have been partial") + raise + + finally: + self._record_action(provact) return nerd.get_data(True) @@ -675,123 +777,202 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, # respectively. schemabase = prec.data.get("_schema") or NERDMPUB_SCH_ID - - m = re.search(r'^([a-z]+s)\[([\w\d\.\/]+)\]$', path) - if m: - # path is of the form xxx[k] and refers to an item in a list - key = m.group(2) - try: - key = int(key) - except ValueError: - pass - - if m.group(1) == "authors": - data["_schema"] = schemabase+"/definitions/Person" - data = self._update_listitem(nerd.authors, self._moderate_author, data, key, replace, doval) - elif m.group(1) == "references": - data["_schema"] = schemabase+"/definitions/BibliographicReference" - data = self._update_listitem(nerd.references, self._moderate_reference, data, key, - replace, doval) - elif m.group(1) == LINK_DELIM: - data["_schema"] = schemabase+"/definitions/Component" - data = self._update_listitem(nerd.nonfiles, self._moderate_nonfile, data, key, - replace, doval) - elif m.group(1) == "components" or m.group(1) == FILE_DELIM: - data["_schema"] = schemabase+"/definitions/Component" - data = self._update_component(nerd, data, key, replace, doval=doval) - else: - raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + subacttype = Action.PUT if replace else Action.PATCH + provact = Action(Action.PATCH, prec.id, self.who, "updating NERDm part") - elif path == "authors": - if not isinstance(data, list): - err = "authors data is not a list" - raise InvalidUpdate(err, id, path, errors=[err]) - if replace: - data = self._replace_objlist(nerd.authors, self._moderate_author, data, doval) - else: - data = self._update_objlist(nerd.authors, self._moderate_author, data, doval) + try: + m = re.search(r'^([a-z]+s)\[([\w\d\.\/]+)\]$', path) + if m: + # path is of the form xxx[k] and refers to an item in a list + key = m.group(2) + try: + key = int(key) + except ValueError: + pass - elif path == "references": - if not isinstance(data, list): - err = "references data is not a list" - raise InvalidUpdate(err, id, path, errors=[err]) - if replace: - data = self._replace_objlist(nerd.references, self._moderate_reference, data, doval) - else: - data = self._update_objlist(nerd.references, self._moderate_reference, data, doval) + old = {} + if m.group(1) == "authors": + what = "adding author" + if key in nerd.authors: + old = nerd.authors.get(key) + what = "updating author" + data["_schema"] = schemabase+"/definitions/Person" + data = self._update_listitem(nerd.authors, self._moderate_author, data, key, + replace, doval) + provact.add_subaction(Action(subacttype, "%s#data.authors[%s]" % (prec.id, str(key)), + what, self._jsondiff(old, data))) + + elif m.group(1) == "references": + what = "adding author" + if key in nerd.authors: + old = nerd.authors.get(key) + what = "updating author" + data["_schema"] = schemabase+"/definitions/BibliographicReference" + data = self._update_listitem(nerd.references, self._moderate_reference, data, key, + replace, doval) + provact.add_subaction(Action(subacttype, "%s#data.references[%s]" % (prec.id, str(key)), + what, self._jsondiff(old, data))) + + elif m.group(1) == LINK_DELIM: + what = "adding link" + if key in nerd.nonfiles: + old = nerd.nonfiles.get(key) + what = "updating link" + data["_schema"] = schemabase+"/definitions/Component" + data = self._update_listitem(nerd.nonfiles, self._moderate_nonfile, data, key, + replace, doval) + provact.add_subaction(Action(subacttype, "%s#data/pdr:see[%s]" % (prec.id, str(key)), + what, self._jsondiff(old, data))) + + elif m.group(1) == "components" or m.group(1) == FILE_DELIM: + if ('filepath' not in data and key in nerd.nonfiles): + old = nerd.nonfiles.get(key) + what = "updating link" + elif key in nerd.files: + old = nerd.files.get(key) + what = "updating file" + else: + old = {} + what = "adding component" + data["_schema"] = schemabase+"/definitions/Component" + data = self._update_component(nerd, data, key, replace, doval=doval) + provact.add_subaction(Action(subacttype, "%s#data/pdr:f[%s]" % (prec.id, str(key)), + what, self._jsondiff(old, data))) + + else: + raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + + elif path == "authors": + if not isinstance(data, list): + err = "authors data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + old = nerd.authors.get_data() + if replace: + data = self._replace_objlist(nerd.authors, self._moderate_author, data, doval) + else: + data = self._update_objlist(nerd.authors, self._moderate_author, data, doval) + provact.add_subaction(Action(subacttype, prec.id+"#data.authors", "updating authors", + self._jsondiff(old, data))) - elif path == LINK_DELIM: - if not isinstance(data, list): - err = "non-file (links) data is not a list" - raise InvalidUpdate(err, id, path, errors=[err]) - if replace: - data = self._replace_objlist(nerd.nonfies, self._moderate_nonfile, data, doval) - else: - data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) - - # elif path == FILE_DELIM: - # if not isinstance(data, list): - # err = "components data is not a list" - # raise InvalidUpdate(err, id, path, errors=[err]) - - elif path == "components" or path == FILE_DELIM: - if not isinstance(data, list): - err = "components data is not a list" - raise InvalidUpdate(err, id, path, errors=[err]) - files, nonfiles = self._merge_comps_for_update(nerd, data, replace, doval) - if replace: + elif path == "references": + if not isinstance(data, list): + err = "references data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + old = nerd.references.get_data() + if replace: + data = self._replace_objlist(nerd.references, self._moderate_reference, data, doval) + else: + data = self._update_objlist(nerd.references, self._moderate_reference, data, doval) + provact.add_subaction(Action(subacttype, prec.id+"#data.references", "updating references", + self._jsondiff(old, data))) + + elif path == LINK_DELIM: + if not isinstance(data, list): + err = "non-file (links) data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + old = nerd.nonfiles.get_data() + if replace: + data = self._replace_objlist(nerd.nonfies, self._moderate_nonfile, data, doval) + else: + data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", "updating link list", + self._jsondiff(old, data))) + + + # elif path == FILE_DELIM: + # if not isinstance(data, list): + # err = "components data is not a list" + # raise InvalidUpdate(err, id, path, errors=[err]) + + elif path == "components" or path == FILE_DELIM: + if not isinstance(data, list): + err = "components data is not a list" + raise InvalidUpdate(err, id, path, errors=[err]) + oldn = nerd.nonfiles.get_data() + oldf = nerd.files.get_files() + files, nonfiles = self._merge_comps_for_update(nerd, data, replace, doval) + if replace: + if path == "components": + nerd.nonfiles.empty() + nerd.files.empty() if path == "components": - nerd.nonfiles.empty() - nerd.files.empty() - if path == "components": - for cmp in nonfiles: - if cmp.get("@id"): - nerd.nonfiles.set(cmp['@id']) - else: - nerd.nonfiles.append(cmp) - for cmp in files: - nerd.files.set_file_at(cmp) + for cmp in nonfiles: + if cmp.get("@id"): + nerd.nonfiles.set(cmp['@id']) + else: + nerd.nonfiles.append(cmp) + + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:f", "updating file list", + self._jsondiff(oldn, nerd.nonfiles.get_data()))) + for cmp in files: + nerd.files.set_file_at(cmp) - if path == "components": - data = nerd.nonfiles.get_data() + nerd.files.get_files() + if path == "components": + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", "updating link list", + self._jsondiff(oldf, nerd.nonfiles.get_data()))) + data = nerd.nonfiles.get_data() + nerd.files.get_files() + else: + data = nerd.files.get_files() + + elif path == "contactPoint": + if not isinstance(data, Mapping): + raise InvalidUpdate("contactPoint data is not an object", sys=self) + res = nerd.get_res_data() + old = res['contactPoint'] + res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) + # may raise InvalidUpdate + provact.add_subaction(Action(subacttype, prec.id+"#data.contactPoint", + "updating contact point", self._jsondiff(old, res['contactPoint']))) + nerd.replace_res_data(res) + data = res[path] + + elif path == "@type": + if not isinstance(data, (list, str)): + raise InvalidUpdate("@type data is not a list of strings", sys=self) + res = nerd.get_res_data() + old = res['@type'] + res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) + provact.add_subaction(Action(subacttype, prec.id+"#data.@type", "updating resource types", + self._jsondiff(old, res['@type']))) + nerd.replace_res_data(res) + data = res[path] + + elif path == "description": + if not isinstance(data, (list, str)): + raise InvalidUpdate("description data is not a list of strings", sys=self) + res = nerd.get_res_data() + old = res['description'] + res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate + provact.add_subaction(Action(subacttype, prec.id+"#data.description", "updating description", + self._jsondiff(old, res['description']))) + nerd.replace_res_data(res) + data = res[path] + + elif path in "title rights disclaimer".split(): + if not isinstance(data, str): + raise InvalidUpdate("%s value is not a string" % path, sys=self) + res = nerd.get_res_data() + old = res[path] + res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate + provact.add_subaction(Action(subacttype, prec.id+"#data."+path, "updating "+path, + self._jsondiff(old, res[path]))) + nerd.replace_res_data(res) + data = res[path] + else: - data = nerd.files.get_files() - - elif path == "contactPoint": - if not isinstance(data, Mapping): - raise InvalidUpdate("contactPoint data is not an object", sys=self) - res = nerd.get_res_data() - res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) - # may raise InvalidUpdate - nerd.replace_res_data(res) - data = res[path] - - elif path == "@type": - if not isinstance(data, (list, str)): - raise InvalidUpdate("@type data is not a list of strings", sys=self) - res = nerd.get_res_data() - res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) - nerd.replace_res_data(res) - data = res[path] - - elif path == "description": - if not isinstance(data, (list, str)): - raise InvalidUpdate("description data is not a list of strings", sys=self) - res = nerd.get_res_data() - res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate - nerd.replace_res_data(res) - data = res[path] - - elif path in "title rights disclaimer".split(): - if not isinstance(data, str): - raise InvalidUpdate("%s value is not a string" % path, sys=self) - res = nerd.get_res_data() - res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate - nerd.replace_res_data(res) - data = res[path] - + raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + + except PartNotAccessible: + # client request error; don't record action + raise + except Exception as ex: + self.log.error("Failed to save update to NERDm data, %s: %s", prec.id, str(ex)) + self.log.warning("Partial update is possible") + provact.message = "Failed to update NERDm part" + self._record_action(provact) + raise else: - raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) + self._record_action(provact) return data @@ -1238,6 +1419,8 @@ def _moderate_text(self, val, resmd=None, doval=True): def _moderate_description(self, val, resmd=None, doval=True): if isinstance(val, str): val = val.split("\n\n") + if not isinstance(val, Sequence): + raise InvalidUpdate("Description value is not a string or array of strings", sys=self) return [self._moderate_text(t, resmd, doval=doval) for t in val if t != ""] _pfx_for_type = OrderedDict([ From 3913f0b103798d07487138055dc4f609edc685bc Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 13 Mar 2023 06:35:27 -0400 Subject: [PATCH 073/123] dbio.project: make finalize() more extensible --- python/nistoar/midas/dbio/project.py | 86 +++++++++++++++++-- .../tests/nistoar/midas/dbio/test_project.py | 2 +- 2 files changed, 79 insertions(+), 9 deletions(-) diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 5dcdd4b..a08cde8 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -9,6 +9,7 @@ subclassed to handle the creation of the different types of projects and conventions, policies, and interaction models for manipulating them. """ +import re from logging import Logger, getLogger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence @@ -22,6 +23,8 @@ from . import status from .. import MIDASException, MIDASSystem from nistoar.pdr.publish.prov import PubAgent, Action +from nistoar.id.versions import OARVersion +from nistoar.pdr import ARK_NAAN _STATUS_ACTION_CREATE = RecordStatus.CREATE_ACTION _STATUS_ACTION_UPDATE = RecordStatus.UPDATE_ACTION @@ -630,7 +633,7 @@ def finalize(self, id, message=None, as_version=None, _prec=None) -> status.Reco {"name": "finalize", "errors": ex.errors})) stat.set_state(status.EDIT) stat.act(self.STATUS_ACTION_FINALIZE, ex.format_errors()) - self._try_save(prec) + self._try_save(_prec) raise except Exception as ex: @@ -640,7 +643,7 @@ def finalize(self, id, message=None, as_version=None, _prec=None) -> status.Reco {"name": "finalize", "errors": [emsg]})) stat.set_state(status.EDIT) stat.act(self.STATUS_ACTION_FINALIZE, emsg) - self._try_save(prec) + self._try_save(_prec) raise else: @@ -653,14 +656,81 @@ def finalize(self, id, message=None, as_version=None, _prec=None) -> status.Reco _prec.save() return stat.clone() - - def _apply_final_updates(self, prec): - # update the data - + MAJOR_VERSION_LEV = 0 + MINOR_VERSION_LEV = 1 + TRIVIAL_VERSION_LEV = 2 + + def _apply_final_updates(self, prec: ProjectRecord, vers_inc_lev: int=None): + # update the data content + self._finalize_data(prec) + + # ensure a finalized version + ver = self._finalize_version(prec, vers_inc_lev) + + # ensure a finalized data identifier + id = self._finalize_id(prec) + + self._validate_data(prec) + return "draft is ready for submission as %s, %s" % (id, ver) + + def _finalize_version(self, prec: ProjectRecord, vers_inc_lev: int=None): + """ + determine what the version string for the to-be-submitted document should be + and save it into the record. + + This implementation will increment the "minor" (i.e. second) field in the version + string by default and save as part of the data field as a "@version" property. To + be incremented, the current version must include a suffix the starts with "+"; after + being incremented, the suffix is dropped (to indicate that it should not be further + incremented). If a version is not yet assigned, it will be set as 1.0.0. + + :param ProjectRecord prec: the record to finalize + :param int vers_inc_lev: the field position in the version to increment if the + version needs incrmenting. + :returns: the version that the record will be published as + """ + # determine output version + if vers_inc_lev is None: + # assess the state of the revision to determine proper level + vers_inc_lev = self.MINOR_VERSION_LEV - self._validate_data(prec.data) - return "draft is ready for submission" + vers = OARVersion(prec.data.setdefault('@version', "1.0.0")) + if vers.is_draft(): + vers.drop_suffix().increment_field(vers_inc_lev) + prec.date['@version'] = str(vers) + + return prec.data['@version'] + + def _finalize_id(self, prec): + """ + finalize the identifier that will be attached to to-be-submitted document. Generally, + an identifier is assigned once the first time it is finalized and is normally not changed + subsequently; however, an implementations may alter this ID according to policy. + """ + # determine output identifier + if not prec.data.get('@id'): + prec.data['@id'] = self._arkify_recid(prec.id) + + return prec.data['@id'] + + def _arkify_recid(self, recid): + """ + turn a standard project record identifier into an institutional ARK identifier. + If the the given identifier is not recognized as a project record identifier + (based on its form), it is returned unchanged. + """ + naan = self.cfg.get('ark_naan', ARK_NAAN) + m = re.match('^(\w+):([\w\-\/]+)$', recid) + if m: + return "ark:/%s/%s-%s" % (naan, m.group(1), m.group(2)) + return recid + + def _finalize_data(self, prec): + """ + update the data content for the record in preparation for submission. + """ + pass def submit(self, id: str, message: str=None, _prec=None) -> status.RecordStatus: """ diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py index 2a27723..74d7455 100644 --- a/python/tests/nistoar/midas/dbio/test_project.py +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -239,7 +239,7 @@ def test_finalize(self): self.project.finalize(prec.id) stat = self.project.get_status(prec.id) self.assertEqual(stat.state, "ready") - self.assertEqual(stat.message, "draft is ready for submission") + self.assertTrue(stat.message.startswith("draft is ready for submission as ")) prec = self.project.get_record(prec.id) prec._data['status']['state'] = "ennui" From 89e495c5654832f2b5f16f960f0d92123e353077 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 13 Mar 2023 08:34:12 -0400 Subject: [PATCH 074/123] fix oar-metadata build issues due to changes in setuptools --- docker/pyenv/Dockerfile | 2 +- metadata | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/pyenv/Dockerfile b/docker/pyenv/Dockerfile index a2065a3..c438ea6 100644 --- a/docker/pyenv/Dockerfile +++ b/docker/pyenv/Dockerfile @@ -13,7 +13,7 @@ FROM oar-metadata/ejsonschema RUN apt-get update && apt-get install -y python-yaml curl wget less sudo zip \ p7zip-full ca-certificates git -RUN pip install --upgrade pip setuptools +# RUN pip install --upgrade pip setuptools RUN pip install funcsigs 'bagit>=1.6.3,<2.0' 'fs>=2.0.21' jsonpatch mako # install multibag from source diff --git a/metadata b/metadata index e6e2b3e..1e83bb6 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit e6e2b3e5f06d99153917b68c8894adeadcdb7041 +Subproject commit 1e83bb634ca5098033e3ea8b67817c8f74ecc853 From 615a71861424ddc84c6a23c196dc3e0fac517a73 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 13 Mar 2023 08:51:19 -0400 Subject: [PATCH 075/123] enable dap/mds3 in midasserver --- docker/midasserver/midas-dmpdap_conf.yml | 12 ++++++++++-- python/nistoar/midas/wsgi.py | 5 +++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index 8d88f14..7932e68 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -28,16 +28,24 @@ services: include_headers: "Access-Control-Allow-Origin": "*" - default_convention: mdsx + default_convention: mds3 conventions: mdsx: about: - title: "Digital Asset Publication (DAP) Authoring API (mds3 convention)" + title: "Digital Asset Publication (DAP) Authoring API (experimental)" describedBy: "http://localhost:9091/docs/dapsvc-elements.html" href: "http://localhost:9091/midas/dap/mdsx" version: mdsx assign_doi: always doi_naan: "18434" + mdsx: + about: + title: "Digital Asset Publication (DAP) Authoring API (mds3 convention)" + describedBy: "http://localhost:9091/docs/dapsvc-elements.html" + href: "http://localhost:9091/midas/dap/mds3" + version: mds3 + assign_doi: always + doi_naan: "18434" dmp: about: diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index cd1b9fe..1fdab57 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -124,7 +124,7 @@ from . import system from .dbio.base import DBClientFactory from .dbio.wsgi import project as prj, SubApp, Handler, DBIOHandler -from .dap.service import mdsx +from .dap.service import mdsx, mds3 from .dbio.inmem import InMemoryDBClientFactory from .dbio.fsbased import FSBasedDBClientFactory from .dbio.mongo import MongoDBClientFactory @@ -442,7 +442,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: PubAge _MIDASSubApps = { # "dmp/mdm1": mdm1.DMPApp, "dmp/mdm1": prj.MIDASProjectApp.factory_for("dmp"), - "dap/mdsx": mdsx.DAPApp + "dap/mdsx": mdsx.DAPApp, + "dap/mds3": mds3.DAPApp } class MIDASApp: From 254bca8391d4ae46843c9329981fa8ef88d40e3c Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 13 Mar 2023 09:36:23 -0400 Subject: [PATCH 076/123] midasserver: need nerdstorage config --- docker/midasserver/midas-dmpdap_conf.yml | 5 ++++- python/nistoar/midas/dap/nerdstore/fsbased.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index 7932e68..aee39d0 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -38,7 +38,7 @@ services: version: mdsx assign_doi: always doi_naan: "18434" - mdsx: + mds3: about: title: "Digital Asset Publication (DAP) Authoring API (mds3 convention)" describedBy: "http://localhost:9091/docs/dapsvc-elements.html" @@ -46,6 +46,9 @@ services: version: mds3 assign_doi: always doi_naan: "18434" + nerdstorage: + type: fsbased + store_dir: /data/midas/nerdm dmp: about: diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index e73798c..2e126fe 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -964,6 +964,11 @@ def __init__(self, storeroot: str, newidprefix: str="nrd", logger: Logger=None): :param str newidprefix: a prefix to use when minting new identifiers """ self._dir = Path(storeroot) + pdir = self._dir.parents[0] + if not pdir.is_dir(): + raise StorageFormatException("%s: does not exist as a directory" % str(pdir)) + if not self._dir.exists(): + self._dir.mkdir() if not self._dir.is_dir(): raise StorageFormatException("%s: does not exist as a directory" % str(self._dir)) if not os.access(self._dir, os.R_OK|os.W_OK|os.X_OK): From e5c191e29b3c86d2c0a442c89541d220e63a49de Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 13 Mar 2023 12:07:09 -0400 Subject: [PATCH 077/123] github actions: fix testall (remove artifacts from oar-pdr origin) --- .github/workflows/testall.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/testall.yml b/.github/workflows/testall.yml index 2e34ec0..8ad3bb2 100644 --- a/.github/workflows/testall.yml +++ b/.github/workflows/testall.yml @@ -27,13 +27,5 @@ jobs: cd docker && bash ./dockbuild.sh - name: Build & Run Python Tests via Docker - run: cd docker && ./testall python + run: scripts/testall.docker - - name: Test Java via Docker - run: cd docker && ./testall java - - - name: Build Angular Code - run: cd docker && ./makedist angular - - - name: Run Angular Tests - run: cd docker && ./testall angular From c93a7184a68395b505bd66adef2113d6d1d21ed6 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 14 Mar 2023 09:26:52 -0400 Subject: [PATCH 078/123] tested/debugged integrated midasserver with its configuration bug fixes: * fixed doi_naan configuration param * fixed some dap service exception creation errors * fixed DAPApp creation (added missing log) * made sure service factories have common signature * scripts/install.sh: force use of python3 enhancements: * minor doc fixes * added midasserver unit tests * print uwsgi command line --- docker/midasserver/entrypoint.sh | 3 + docker/midasserver/midas-dmpdap_conf.yml | 8 +- python/nistoar/midas/dap/nerdstore/base.py | 2 +- python/nistoar/midas/dap/service/mds3.py | 9 +- python/nistoar/midas/dbio/base.py | 4 +- python/nistoar/midas/dbio/wsgi/project.py | 8 +- python/tests/nistoar/midas/test_wsgi.py | 110 ++++++++++++++++++++- scripts/install.sh | 2 +- 8 files changed, 129 insertions(+), 17 deletions(-) diff --git a/docker/midasserver/entrypoint.sh b/docker/midasserver/entrypoint.sh index 24d0417..99e50ee 100644 --- a/docker/midasserver/entrypoint.sh +++ b/docker/midasserver/entrypoint.sh @@ -21,6 +21,9 @@ opts= oar_midas_db_type=$1 [ -z "$oar_midas_db_type" ] || opts="--set-ph oar_midas_db_type=$oar_midas_db_type" +echo '++' uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=/docs \ + --set-ph oar_config_file=$OAR_MIDASSERVER_CONFIG \ + --set-ph oar_working_dir=$OAR_WORKING_DIR $opts uwsgi --plugin python3 --http-socket :$port --wsgi-file $script --static-map /docs=/docs \ --set-ph oar_config_file=$OAR_MIDASSERVER_CONFIG \ --set-ph oar_working_dir=$OAR_WORKING_DIR $opts diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index aee39d0..4f300e0 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -16,9 +16,9 @@ services: clients: midas: - default_shoulder: mdsx + default_shoulder: mds3 default: - default_shoulder: mdsx + default_shoulder: mds3 dbio: superusers: [ "rlp3" ] @@ -37,7 +37,7 @@ services: href: "http://localhost:9091/midas/dap/mdsx" version: mdsx assign_doi: always - doi_naan: "18434" + doi_naan: "10.18434" mds3: about: title: "Digital Asset Publication (DAP) Authoring API (mds3 convention)" @@ -45,7 +45,7 @@ services: href: "http://localhost:9091/midas/dap/mds3" version: mds3 assign_doi: always - doi_naan: "18434" + doi_naan: "10.18434" nerdstorage: type: fsbased store_dir: /data/midas/nerdm diff --git a/python/nistoar/midas/dap/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py index 3c56e44..c5baa2a 100644 --- a/python/nistoar/midas/dap/nerdstore/base.py +++ b/python/nistoar/midas/dap/nerdstore/base.py @@ -12,7 +12,7 @@ __all__ = [ "NERDResource", "NERDAuthorList", "NERDRefList", "NERDNonFileComps", "NERDFileComps", "NERDStorageException", "MismatchedIdentifier", "RecordDeleted", "ObjectNotFound", - "CollectionRemovalDissallowed", "NERDResourceStorage" ] + "StorageFormatException", "CollectionRemovalDissallowed", "NERDResourceStorage" ] NERDResource = NewType("NERDResource", ABC) NERDAuthorList = NewType("NERDAuthorList", NERDResource) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index f220984..0e6aafd 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -414,11 +414,13 @@ def get_nerdm_data(self, id: str, part: str=None): try: out = nerd.files.get_file_by_path(fprts[1]) except nerdstore.ObjectNotFound as ex: - raise ObjectNotFound(id, part, str(ex)) + raise ObjectNotFound(prec.id, part, str(ex)) else: out = nerd.get_res_data() if part in out: out = out[part] + elif part in "description @type contactPoint title rights disclaimer".split(): + raise ObjectNotFound(prec.id, part, "%s property not set yet" % part) else: raise PartNotAccessible(prec.id, part, "Accessing %s not supported" % part) @@ -1882,9 +1884,10 @@ def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, service_factory: ProjectServiceFactory=None, project_coll: str=None): if not project_coll: project_coll = DAP_PROJECTS + uselog = log.getChild(project_coll) if not service_factory: - service_factory = DAPServiceFactory(dbcli_factory, config, project_coll) - super(DAPApp, self).__init__(service_factory, log.getChild(project_coll), config) + service_factory = DAPServiceFactory(dbcli_factory, config, uselog, project_coll=project_coll) + super(DAPApp, self).__init__(service_factory, uselog, config) self._data_update_handler = DAPProjectDataHandler class DAPProjectDataHandler(ProjectDataHandler): diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 1af8984..082da9a 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -872,8 +872,8 @@ def _next_recnum(self, shoulder): def _new_record_data(self, id): """ - return a new ProjectRecord instance with the given identifier assigned to it. Generally, - this record should not be committed yet. + return a dictionary containing data that will constitue a new ProjectRecord with the given + identifier assigned to it. Generally, this record should not be committed yet. """ return {"id": id} diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 03feec0..79605e5 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -837,10 +837,10 @@ class _factory: def __init__(self, project_coll): self._prjcoll = project_coll def __call__(self, dbcli_factory: dbio.DBClientFactory, log: Logger, config: dict={}, - prjcoll: str=None): - if not prjcoll: - prjcoll = self._prjcoll - service_factory = ProjectServiceFactory(prjcoll, dbcli_factory, config, log) + project_coll: str=None): + if not project_coll: + project_coll = self._prjcoll + service_factory = ProjectServiceFactory(project_coll, dbcli_factory, config, log) return MIDASProjectApp(service_factory, log, config) @classmethod diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 9342342..502ec52 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -1,9 +1,11 @@ -import os, json, pdb, logging, tempfile +import os, json, pdb, logging, tempfile, shutil from collections import OrderedDict from io import StringIO +from pathlib import Path import unittest as test +import yaml -from nistoar.midas.dbio import inmem, base +from nistoar.midas.dbio import inmem, fsbased, base from nistoar.midas import wsgi as app from nistoar.pdr.publish import prov @@ -608,6 +610,110 @@ def test_dap(self): self.assertEqual(data["owner"], "anonymous") self.assertEqual(data["type"], "drafts") +midasserverdir = Path(__file__).parents[4] / 'docker' / 'midasserver' +midasserverconf = midasserverdir / 'midas-dmpdap_conf.yml' + +class TestMIDASServer(test.TestCase): + # This tests midas wsgi app with the configuration provided in docker/midasserver + # In particular it tests the examples given in the README + + def start(self, status, headers=None, extup=None): + self.resp.append(status) + for head in headers: + self.resp.append("{0}: {1}".format(head[0], head[1])) + + def body2dict(self, body): + return json.loads("\n".join(self.tostr(body)), object_pairs_hook=OrderedDict) + + def tostr(self, resplist): + return [e.decode() for e in resplist] + + def setUp(self): + self.resp = [] + self.workdir = os.path.join(tmpdir.name, 'midasdata') + self.dbdir = os.path.join(self.workdir, 'dbfiles') + if not os.path.exists(self.dbdir): + if not os.path.exists(self.workdir): + os.mkdir(self.workdir) + os.mkdir(self.dbdir) + with open(midasserverconf) as fd: + self.config = yaml.safe_load(fd) + self.config['working_dir'] = self.workdir + self.config['services']['dap']['conventions']['mds3']['nerdstorage']['store_dir'] = \ + os.path.join(self.workdir, 'nerdm') + self.clifact = fsbased.FSBasedDBClientFactory({}, self.dbdir) + self.app = app.MIDASApp(self.config, self.clifact) + + def tearDown(self): + if os.path.exists(self.workdir): + shutil.rmtree(self.workdir) + + def test_set_up(self): + self.assertTrue(self.app.subapps) + self.assertIn("dmp/mdm1", self.app.subapps) + self.assertIn("dap/mdsx", self.app.subapps) + self.assertIn("dap/mds3", self.app.subapps) + + self.assertEqual(self.app.subapps["dmp/mdm1"].svcfact._prjtype, "dmp") + self.assertEqual(self.app.subapps["dap/mdsx"].svcfact._prjtype, "dap") + self.assertEqual(self.app.subapps["dap/mds3"].svcfact._prjtype, "dap") + + self.assertTrue(os.path.isdir(self.workdir)) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles'))) + self.assertTrue(not os.path.exists(os.path.join(self.workdir, 'dbfiles', 'nextnum'))) + + def test_create_dmp(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dmp/mdm1', + 'wsgi.input': StringIO('{"name": "CoTEM", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mdm1:0001') + self.assertEqual(data['name'], "CoTEM") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + + self.assertTrue(os.path.isdir(self.workdir)) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles'))) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles', 'dmp'))) + self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'dbfiles', 'dmp', 'mdm1:0001.json'))) + self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'dbfiles', 'nextnum', 'mdm1.json'))) + + self.resp = [] + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dmp/mdm1/mdm1:0001/data', + 'wsgi.input': StringIO('{"expectedDataSize": "2 TB"}') + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertTrue(data['title'].startswith("Microscopy of ")) + self.assertEqual(data['expectedDataSize'], "2 TB") + + def test_create_dap3(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + + self.assertTrue(os.path.isdir(self.workdir)) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles'))) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles', 'dap'))) + self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'dbfiles', 'dap', 'mds3:0001.json'))) + self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'dbfiles', 'nextnum', 'mds3.json'))) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'nerdm'))) + self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'nerdm', '_seq.json'))) + self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'nerdm', 'mds3:0001'))) diff --git a/scripts/install.sh b/scripts/install.sh index 6989898..1aec65d 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -23,7 +23,7 @@ oarmd_pkg=$base/metadata mkdir -p $PY_LIBDIR echo Installing python libraries into $PY_LIBDIR... (cd $PY_LIBDIR && PY_LIBDIR=$PWD) -(cd $SOURCE_DIR/python && python setup.py install --install-purelib=$PY_LIBDIR --install-scripts=$BINDIR) +(cd $SOURCE_DIR/python && python3 setup.py install --install-purelib=$PY_LIBDIR --install-scripts=$BINDIR) #install the JAVA jars # None at this time From 5e45387a2c3ff314e6a65e08fbbfb96c8ebcd432 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 14 Mar 2023 12:46:00 -0400 Subject: [PATCH 079/123] dbio.wsgi: add support for CORS preflight checks --- python/nistoar/midas/dbio/wsgi/project.py | 21 ++++++++++++++ .../nistoar/pdr/publish/service/wsgi/base.py | 29 ++++++++++++++++++- python/tests/nistoar/midas/test_wsgi.py | 11 +++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 79605e5..4489ac8 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -85,6 +85,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start # programming error raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET"]) + def do_GET(self, path, ashead=False): try: prec = self.svc.get_record(self._id) @@ -130,6 +133,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start # programming error raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET"]) + def do_GET(self, path, ashead=False): if not path: # programming error @@ -186,6 +192,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start # programming error raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET", "PUT"]) + def do_GET(self, path, ashead=False): try: prec = self.svc.get_record(self._id) @@ -251,6 +260,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start # programming error raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET", "PUT", "PATCH"]) + def do_GET(self, path, ashead=False): """ respond to a GET request @@ -344,6 +356,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start super(ProjectSelectionHandler, self).__init__(service, subapp, wsgienv, start_resp, who, "", config, log) + def do_OPTIONS(self, path): + return self.send_options(["GET", "POST"]) + def do_GET(self, path, ashead=False): """ respond to a GET request, interpreted as a search for records accessible by the user @@ -447,6 +462,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET", "POST", "PUT", "PATCH", "DELETE"]) + def do_GET(self, path, ashead=False): try: prec = self.svc.get_record(self._id) @@ -676,6 +694,9 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start # programming error raise ValueError("Missing ProjectRecord id") + def do_OPTIONS(self, path): + return self.send_options(["GET", "PUT", "PATCH"]) + def do_GET(self, path, ashead=False): """ return the status object in response to a GET request diff --git a/python/nistoar/pdr/publish/service/wsgi/base.py b/python/nistoar/pdr/publish/service/wsgi/base.py index cda2ea6..aae8270 100644 --- a/python/nistoar/pdr/publish/service/wsgi/base.py +++ b/python/nistoar/pdr/publish/service/wsgi/base.py @@ -3,7 +3,7 @@ """ import sys, re, json from abc import ABCMeta, abstractmethod, abstractproperty -from typing import Callable +from typing import Callable, List from functools import reduce from logging import Logger from urllib.parse import parse_qs @@ -144,6 +144,33 @@ def send_json(self, data, message="OK", code=200, ashead=False, encoding='utf-8' """ return self._send(code, message, json.dumps(data, indent=2), "application/json", ashead, encoding) + def send_options(self, allowed_methods: List[str]=None, origin: str=None, extra=None, + forcors: bool=True): + """ + send a response to a OPTIONS request. This implememtation is primarily for CORS preflight requests + :param List[str] allowed_methods: a list of the HTTP methods that are allowed for request + :param str origin: + :param dict|Headers extra: extra headers to include in the output. This is either a + dictionary-like object or a list of 2-tuples (like + wsgiref.header.Headers). + """ + meths = list(allowed_methods) + if 'OPTIONS' not in meths: + meths.append('OPTIONS') + if forcors: + self.add_header('Access-Control-Allow-Methods', ", ".join(meths)) + if origin: + self.add_header('Access-Control-Allow-Origin', origin) + self.add_header('Access-Control-Allow-Headers', "Content-Type") + if isinstance(extra, Mapping): + for k,v in extra.items(): + self.add_header(k, v) + elif isinstance(extra, (list, tuple)): + for k,v in extra: + self.add_header(k, v) + + return self.send_ok(message="No Content") + def _send(self, code, message, content, contenttype, ashead, encoding): if ashead is None: ashead = self._meth.upper() == "HEAD" diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 502ec52..0fe4fc6 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -693,6 +693,17 @@ def test_create_dmp(self): self.assertTrue(data['title'].startswith("Microscopy of ")) self.assertEqual(data['expectedDataSize'], "2 TB") + def test_cors_preflight(self): + req = { + 'REQUEST_METHOD': 'OPTIONS', + 'PATH_INFO': '/midas/dmp/mdm1', + 'HTTP_ACCESS-CONTROL-REQUEST-METHOD': 'POST' + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + self.assertIn("Access-Control-Allow-Methods: GET, POST, OPTIONS", self.resp) + + def test_create_dap3(self): req = { 'REQUEST_METHOD': 'POST', From 69a976b4d2ca15381d6f65df8bc16563b507d2c5 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 14 Mar 2023 23:58:39 -0400 Subject: [PATCH 080/123] create DAP openapi docs (and fix DMP doc) --- docs/dapsvc-elements.html | 11 + docs/dapsvc-openapi.yml | 792 ++++++++++++++++++++++++++++++++++++++ docs/dmpsvc-elements.html | 2 +- docs/dmpsvc-openapi.yml | 20 +- 4 files changed, 814 insertions(+), 11 deletions(-) create mode 100644 docs/dapsvc-elements.html create mode 100644 docs/dapsvc-openapi.yml diff --git a/docs/dapsvc-elements.html b/docs/dapsvc-elements.html new file mode 100644 index 0000000..a2c6271 --- /dev/null +++ b/docs/dapsvc-elements.html @@ -0,0 +1,11 @@ + +MIDAS Data Asset Publication Service (DAPS) + + + + + + + + + diff --git a/docs/dapsvc-openapi.yml b/docs/dapsvc-openapi.yml new file mode 100644 index 0000000..e94dd5d --- /dev/null +++ b/docs/dapsvc-openapi.yml @@ -0,0 +1,792 @@ +openapi: 3.1.0 +info: + title: MIDAS Data Asset Publication Service (DAPS) Interface, Convention MDS3 + summary: A service allows the creation, updating, and publishing of Data Asset Publications (DAPs). + description: |- + A client uses the DAPS interface to create and edit metadata that describe a draft Data Asset + Pulication. Clients are expected to be either automated systems or user-driven, interactive + tools. After completing the draft publication via this API, the client can submit it for review + and final publication. It also possible to revise previously published DAPs through this API. + + A typical life cycle of a DAP managed by the service looks like this: + 1. Create a new draft via a [POST to `/mds3`](/paths/mds3/post). The **`id`** property in the + returned record is used to retrieve and update the recrd (via the `{draftid}` parameter). + 2. Update the draft publication via a [PUT](/paths/mds3-draftid--data/put) or + [PATCH](/paths/mds3-draftid--data/patch) to `/mds3/{draftid}/data + 3. Inspect the draft publication document by retrieving it via a + [GET to `/mds3/{draftid}/data`](/paths/mds3-draftid--data/get). + 4. When updates are complete, finalize the document with a + [PUT to `/mds3/{draftid}/status`](/paths/mds3-draftid--status/put) (setting the `action` + property to "**finalize**". Review the result via + [GET to `/mds3/{draftid}/data`](/paths/mds3-draftid--data/get); further updates are still + possible if needed. + 5. Submit the record for review with a + [PUT to `/mds3/{draftid}/status`](/paths/mds3-draftid--status/put) (setting the `action` + property to "**submit**". + + contact: + name: MIDAS support office + email: datasupport@nist.gov + + version: 0.1 + +servers: + - url: https://localhost/midas/dap + +paths: + /mds3: + summary: the resource representing the full collection of publications created under the MDS3 convention + get: + summary: search for DAP records + description: return all or a subset of DAP records that the requesting user is authorized to read + parameters: [ ] # parameters for selecting matching records. + responses: + "200": + description: >- + A list of the matched DAP records. See the DAP record description for details. + content: + "application/json": + schema: + description: a list of matched DAP records. + type: array + items: { "type": { "$ref": "#/components/schemas/ProjectRecord" } } + examples: + "empty": + summary: no matching DAPs found + value: '[]' + post: + summary: create a new DAP record + description: >- + The client provides a name and initial data to be used to create the new record. The + server will assign a new identifier to the newly created record, and returns the actual + ProjectRecord saved as a result of the request. + + requestBody: + summary: the data that should be used to initialize the newly created record + description: >- + This body contains the initial data for the record. It must include the human-oriented + name to assign to the record. The data field is optional, but if given, it must only + include properties that are part of the NERDm schema for a Resource object. Properties + provide within the optional meta field are advisory, only. + content: + "application/json": + schema: + "$ref": "#/components/schemas/CreateRecordRequest" + responses: + "201": + description: >- + The request was accepted and a DAP record was created. The response is DAP record that + was actually created. The `data` property provides a digest of the draft publication + document with selected properties and summary information; to see the full draft + publication document, access the + [/mds3/{draftid}/data endpoint](../mds3-draftid--data/get). + content: + "applicaiton/json": + schema: + "$ref": "#/components/schemas/ProjectRecord" + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as JSON + * The JSON document in the request body was not compliant with the CreateRecordRequest + schema, such as not including the name field, or including unrecognized properties as + part of the data or meta fields. + * The requested name is already applied to another record owned by the user + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: >- + The client did not submit recognized credentials and thus is not authorized + to create DAP records. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mds3/{draftid}: + summary: access to a specific DAP record + parameters: + - "$ref": "#/components/parameters/draftid" + summary: the identifier assigned to the desired DAP record + get: + summary: Retrieve a DAP record + description: >- + The returns the DAP project record with the specified identifier. The DAP record is an + administrative wrapper around the draft publication document that contains metadata about + the draft, such as its identifier, owner, status and history. The `data` property + provides a digest of the draft publication document with selected properties and summary + information; to see the full draft publication document, access the + [/mds3/{draftid}/data endpoint](../mds3-draftid--data/get). + responses: + "200": + description: The DAP draft record with the given identifier was found and returned + content: + "application/json": + schema: + "$ref": "#/components/schemas/ProjectRecord" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: >- + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mds3/{draftid}/data: + summary: the draft publication document + parameters: + - "$ref": "#/components/parameters/draftid" + summary: the identifier assigned to the desired DAP record + get: + summary: return the draft publication + description: >- + This returns the complete draft publication document--the document that the client wishes + to eventually publish. It is encoded as a NERDm Resource JSON object. + responses: + "200": + description: The DAP record with the given identifier was found and it data content was returned + content: + "application/json": + schema: + "type": object + "description": a NERDm Resource object + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + put: + summary: "replace the document data" + description: >- + This replaces the contents of the draft publication document with the given JSON object. + requestBody: + summary: the data that should replace the currently stored publication document data + description: >- + The request message body is a NERDm Resource JSON object whose content represents the + draft publication. This JSON object will completely replace the data object previously + saved for this record. Note that some normal NERDm compliance requirements are relax + to allow incomplete documents to be submitted. Some properties (e.g. those that are + considered read-only and thus not updatable) will be ignored and not affect the content + that is actually saved. + content: + "application/json": + schema: + type: object + responses: + "200": + description: >- + The DAP record with the given identifier was found and it document content was updated and + returned. The returned data may modified from what was submitted as required by + policy or convention. + content: + "application/json": + schema: + "type": object + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON object + * The JSON document in the request body was not a compliant NERDm Resource document. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: >- + The authenticated user is not authorized to update this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + patch: + summary: update selected document data + description: >- + This merges the input data representing a partial update to the draft publication document + that is already stored. Any sub-properties that match those in the input object will + updated with the given values; any previously stored properties not provided in the input will + remain unchanged. Input subproperty object values can also be partially complete; these will be + correspondingly merged hierarchically. + requestBody: + summary: the data that will be merged into the NERDm publication document + description: >- + The request message body is a JSON object whose properties are taken from the NERDm + Resource Schema. This JSON object will merged in with the publicatoin data previously + saved for this record. + content: + "application/json": + schema: + type: object + responses: + "200": + description: The DAP record with the given identifier was found and it data content was updated and returned + content: + "application/json": + schema: + "type": object + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON object + * The JSON object in the request body contains unrecognized or invalid properties as + expected by the DAP service + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to update this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mds3/{draftid}/data/{nerdmprop}: + summary: a property from the NERDm description of the publication + description: >- + This endpoint allows one to access and update selected top-level NERDm properties + describing the draft publication. This can be more efficient for getting at particular + pieces of information when the NERDm document is large (e.g. due to many files or authors). + Note that some properties (namely, `authors`, `references`, and others) support an expanded + interface; see the documentation for those endpoints specifically. + parameters: + - "$ref": "#/components/parameters/draftid" + summary: the identifier assigned to the desired DAP record + - "$ref": "#/components/parameters/nerdmprop" + summary: a top-level property name from the NERDm Resource description of the draft publication + + get: + summary: "return a NERDm property value" + description: >- + This endpoint allows one to retrieve selected top-level NERDm properties + describing the draft publication. This can be more efficient for getting at particular + pieces of information when the NERDm document is large (e.g. due to many files or authors). + Any top-level property can be retrieved from this endpoint. + + In addition to standard NERDm resource properties, a few _pseudo-property_ names are + supported: + * **`pdr:f`**: this will return a list of NERDm components representing files and folders + * **`pdr:see`**: this return the list of NERDM components that are _not_ files or + folders. Generally these are links to other web sites. + responses: + "200": + description: >- + The DAP record with the given identifier was found and the requested property value was + returned. The type of the output is the type defined for the property in the NERDm schema. + content: + "application/json": + description: the type of the output is the type defined for the property in the NERDm schema + schema: + "oneOf": + - "type": object + - "type": string + "404": + description: >- + The DAP draft record with the given identifier was not found, or the particular property + is either not a supported property or the property has not been set yet. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + put: + summary: "replace a NERDm property value" + description: >- + This replaces the value of a particular, top-level NERDm property. + Note that not all properties that can be retrieved via GET are updateable via PUT. + This may be because the property is read-only (i.e. set only by the server). + requestBody: + summary: the data that should replace the currently stored data content + description: >- + The request message body is a JSON object whose properties represent the content of the + draft publication. This JSON object will completely replace the data object previously + saved for this record. + content: + "application/json": + description: the type of the input is the type defined for the property in the NERDm schema + schema: + oneOf: + - type: object + - type: string + responses: + "200": + description: >- + The DAP record with the given identifier was found, the requested property value was + updated, and the updated value was retuned. The value may modified from what was + submitted as required by policy or convention. + content: + "application/json": + description: the type of the output is the type defined for the property in the NERDm schema + schema: + oneOf: + - type: object + - type: string + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON object + * The JSON document in the request body was not compliant with that required + for the requested property (i.e. its type and/or format). + * The value would otherwise result in an invalid description document + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: >- + The authenticated user is not authorized to update this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "405": + description: >- + The requested property cannot be updated because it is a read-only property or + because an independent update of the property is not supported. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mds3/{draftid}/name: + summary: the mnemonic name for the DAP record with the given identifier + parameters: + - "$ref": "#/components/parameters/draftid" + summary: the identifier assigned to the desired DAP record + get: + summary: "get the DAP record's name" + description: >- + this returns the value of the `name` property that is returned by the `/mds3/{draftid}` + endpoint. + responses: + "200": + description: The DAP record was found and its name was returned. + content: + "application/json": + schema: + "type": string + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + put: + summary: Change the DAP record's name + description: This endpoint method is used to change the mnemonic name assigned to the record. + requestBody: + summary: the new name to assign to the DAP + content: + "application/json": + schema: + type: string + responses: + "200": + description: the record was found, the name was successfully changed and the new name returned + content: + "application": + "type": string + "400": + description: |- + The client sent bad or unusabe input of one of the following forms: + * The input request body is not parsable as a JSON string + * The string represents a name that is already attached to another record owned by the + user. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to change its name. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + + /mds3/{draftid}/status: + summary: >- + information about the status of the record, including its current state and the last action + applied to the record. + parameters: + "$ref": "#/components/parameters/draftid" + summary: the identifier assigned to the desired DAP record + get: + summary: "return the record's current status" + description: >- + this returns the value of the `status` property that is returned by the `/mds3/{draftid}` + endpoint. The properties in this object describe the current state of the record. + responses: + "200": + description: The DAP record was found and its status info was returned. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + put: + summary: "apply an action to the record" + description: >- + This endpoint can be used to request special processing be applied to the record. The + `action` property in the input object indicates what action should be applied. The only + accepted values are "**finalize**" and "**submit**". The finalize action will cause the last of + the automated updates be applied + before the record can be submitted for publishing. For example, the version + that the record will be published as will be set as part of this action. Applying finalize + before the submit action allows the client to give the record one last examination before + submission. The submit action will apply the finalize action (again) and then submit the + record to be published. Note that all input properties besides `action` and `message` will + be ignored. + responses: + "200": + description: >- + The requested action was successfully applied to the record. If the + requested action was "**finalize**", the new state returned will be "**ready**". + If the requested action was "**submit**", the new state will be "**submitted**". + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "202": + description: >- + The requested action was initiated on the record and is still underway. + The new state returned will be "**processing**". The record must leave this + state before further edits or actions can be applied. If the processing + eventually fails, the `message` property will be set to an error message. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "400": + description: >- + The inputs for the requested action were illegal in some way. In particular, an + unsupported `action` value will result in this error. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + patch: + summary: "update the status message" + description: >- + This method is synonymous to a PUT request in that it can also be used to request + special processing. In addition, if the input includes only the `message` + property, the status message will just be updated with no other processing applied. + responses: + "200": + description: >- + The requested action was successfully applied to the record or the message was + updated (depending on whether an action was requested). + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "202": + description: >- + Special processing was requested via the `action` input property; the + processing was initiated on the record and is still underway. + The new state returned will be "**processing**". The record must leave this + state before further edits or actions can be applied. If the processing + eventually fails, the `message` property will be set to an error message. + content: + "application/json": + schema: + "$ref": "#/components/schemas/RecordStatus" + "400": + description: >- + The inputs for the requested action were illegal in some way. In particular, an + unsupported `action` value will result in this error. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "404": + description: The DAP draft record with the given identifier was not found + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + "401": + description: >- + The authenticated user is not authorized to read this record. + content: + "application/json": + schema: + "$ref": "#/components/schemas/ErrorResponse" + +components: + parameters: + draftid: + name: draftid + in: path + description: The identifer used to refer to a DAP + required: true + schema: + type: string + + nerdmprop: + name: nerdmprop + in: path + description: >- + The name of a top-level NERDm Resource object property (e.g. "**title**", "**description**", etc.) + required: true + schema: + type: string + + schemas: + ErrorResponse: + title: a JSON-encoded error response + description: + This is a JSON-encoded description of an error detected by the service while processing + a service request. + properties: + "http:code": + description: the HTTP status code response + type: integer + minimum: 400 + exclusiveMaximum: 600 + "http:reason": + description: the (brief) HTTP status message associated with the code + type: string + "pdr:message": + description: + a more detailed explanation of the error detected. This message may be quite lengthy. + type: string + "pdr:sipid": + description: + the identifier for the SIP being accessed, if known and exists. + required: [ 'http:code', 'http:reason', 'pdr:message' ] + + ProjectRecord: + title: a JSON-encoded DBIO project record + description: + This record describes a project record being drafted by a client + properties: + "name": + description: the mnemonic, user-chosen name for the record + type: string + minimum: 1 + required: true + "id": + description: the unique identifier assigned to the record by the system at its creation + type: string + minimum: 3 + required: true + "owner": + description: the identifier of the user that is primarily responsible for maintaining this record + type: string + minimum: 1 + required: true + "status": + description: + information describing the current state of the record and the last change that was + applied to it. + "$ref": "#/components/schemas/RecordStatus" + minimum: 1 + maximum: 1 + "curators": + description: + the list of IDs for people who have been assigned as curators for this record; it will be empty + if no curators are currently assigned. + type: array + items: { type: string } + "deactivated": + description: + a boolean set to True if this record has been deactivated, preventing any further editing or + listing + type: boolean + "acl": + description: the access control lists associated with this record + type: { $ref: ACLs } + "data": + description: a digest of the draft publication document, featuring selected and summary properties + type: object + "meta": + description: metadata associated with the client, managed by the service + type: object + + ActionRequest: + title: a form of a RecordStatus that is used to request an action be applied to the record + description: + Use this payload type to request that an action (e.g. "finalize" or "submit") be applied to + the record. The finalize action will cause the last of the automated updates be applied + before the record can be submitted for publishing. For example, the version + that the record will be published as will be set as part of this action. Applying finalize + before the submit action allows the client to give the record one last examination before + submission. The submit action will apply the finalize action (again) and then submit the + record to be published. + properties: + "action": + description: + the name of the action to apply. Currently, only "finalize" and "submit" are allowed + action names that can be applied. If not provided, only the status message will be + updated. + type: string + minimum: 0 + maximum: 1 + "message": + description: + an optional message to record as to the reason or intent for applying the action. If, + for example, the action is submit which would revise a previous publication, the client + can providea message describing what has changed. If not provided, a default message + will be recorded. + + RecordStatus: + title: a description of the current state of the record + description: + This object provides information about the current state of the record and the action that + was applied to it. It also includes the three timestamps--when it was created, last + modified, and when it entered its current state. + properties: + "state": + description: + a label indicating it stage in its life-cycle. Possible values are "edit", "processing", + "ready", "submitted", "published", and "unwell". When a DAP is created, it enters the + "edit" state. After being finalized, it will be in the "ready" state. Further updates + to the record are only allowed when it is in the "edit" or "ready" state. + After it is submitted, it will first be in the "submitted" state and then eventually the + "published" state. If it gets into an erroneous state that cannot be auto-corrected, + it may go into the "unwell" state. + type: string + minimum: 1 + enum: ["edit", "processing", "ready", "submitted", "published", "unwell" ] + "action": + description: + the name of the last action that was applied. Possible values include "create", + "update", "finalize", and "submit". + type: string + minimum: 1 + maximum: 1 + "message": + description: + a human-oriented message describing the last action applied to the record. In some + cases this can be client provided. + type: string + minimum: 1 + maximum: 1 + "created": + description: the epoch timestamp in fractional seconds when the record was created + type: number + "createdDate": + description: the ISO 8601-formatted data-time that this record was created + type: string + "modified": + description: the epoch timestamp in fractional seconds when the record was last updated + type: number + "modifiedDate": + description: the ISO 8601-formatted data-time that this record was last updated + type: string + "since": + description: + the epoch timestamp in fractional seconds when the record was entered its current state. + For example, if the current state is "submitted", this is the date that the record was + submitted for publication. + type: number + "sinceDate": + description: the ISO 8601-formatted data-time that this record entered its current state + type: string + + CreateRecordRequest: + title: a form of a ProjectRecord that is used as a request to create a new one + description: + This record describes a project record being drafted by a client + properties: + "name": + description: the mnemonic, user-chosen name to give to the record + type: string + minimum: 1 + required: true + "data": + description: the initial DAP record data to set + type: object + "meta": + description: + initial meta-information to associate with the record. This will be considered + advisory only; the server may override some or all of this data based on policy. + type: object + + + + + + + diff --git a/docs/dmpsvc-elements.html b/docs/dmpsvc-elements.html index 8a6e19c..271ad29 100644 --- a/docs/dmpsvc-elements.html +++ b/docs/dmpsvc-elements.html @@ -6,6 +6,6 @@ - + diff --git a/docs/dmpsvc-openapi.yml b/docs/dmpsvc-openapi.yml index 7302307..259ceed 100644 --- a/docs/dmpsvc-openapi.yml +++ b/docs/dmpsvc-openapi.yml @@ -20,6 +20,7 @@ paths: summary: the resource representing the full collection of DMPs get: summary: return all or a subset of DMP records that the requesting user is authorized to read + parameters: [ ] # parameters for selecting matching records. responses: "200": description: @@ -40,13 +41,12 @@ paths: The client provides a name and initial data to be used to create the new record. The server will assign a new identifier to the newly created record, and returns the actual ProjectRecord saved as a result of the request. - parameters: [ ] # parameters for selecting matching records. requestBody: summary: the data that should be used to initialize the newly created record description: This body contains the initial data for the record. It must include the human-oriented - name to assign to the record. The data field is optionaly, but if given, it must only + name to assign to the record. The data field is optional, but if given, it must only include properties that are part of the DMP schema. Properties provide within the optional meta field are advisory, only. content: @@ -86,8 +86,8 @@ paths: /mdm1/{projid}: summary: access to a specific DMP record parameters: - "$ref": "#/components/parameters/projid" - summary: the identifier assigned to the desired DMP record + - "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record get: summary: Return the DMP record with the specified identifier. description: @@ -117,8 +117,8 @@ paths: /mdm1/{projid}/data: summary: the data describing the contents of a Data Management Plan (DMP) parameters: - "$ref": "#/components/parameters/projid" - summary: the identifier assigned to the desired DMP record + - "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record get: summary: return the DMP record contents description: @@ -243,8 +243,8 @@ paths: /mdm1/{projid}/name: summary: the mnemonic name for the DMP record with the given identifier parameters: - "$ref": "#/components/parameters/projid" - summary: the identifier assigned to the desired DMP record + - "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record get: summary: "return the DMP record's mnemonic name" description: @@ -315,8 +315,8 @@ paths: information about the status of the record, including its current state and the last action applied to the record. parameters: - "$ref": "#/components/parameters/projid" - summary: the identifier assigned to the desired DMP record + - "$ref": "#/components/parameters/projid" + summary: the identifier assigned to the desired DMP record get: summary: "the current status data" description: From a8d35dae581cab341b8c5da29ff675b3cf4ff594 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 15 Mar 2023 00:00:14 -0400 Subject: [PATCH 081/123] partial support contact on create (without a people service) --- python/nistoar/midas/dap/service/mds3.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 0e6aafd..e882e0e 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -304,6 +304,11 @@ def _new_data_for(self, recid, meta=None, schemaid=None): out['components'] = [swcomp] + out['components'] # contact info + # if meta.get("creatorIsContact"): + # # base contact on the currently logged in user + # elif meta.get("contactName"): + if meta.get("contactName"): + out['contactPoint'] = self._moderate_contact({"fn": meta["contactName"]}, doval=False) return out @@ -619,6 +624,8 @@ def _summarize(self, nerd: NERDResource): out["@type"] = resmd.get("@type", ["nrd:Resource"]) if 'doi' in resmd: out["doi"] = resmd["doi"] + if 'contactPoint' in resmd: + out["contactPoint"] = resmd["contactPoint"] out["author_count"] = nerd.authors.count out["file_count"] = nerd.files.count out["nonfile_count"] = nerd.nonfiles.count From 513524cdb4d0b81d0dd18e99abfaa84baed671ba Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 15 Mar 2023 00:01:55 -0400 Subject: [PATCH 082/123] midas-uwsgi.py: fix configuration name --- scripts/midas-uwsgi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/midas-uwsgi.py b/scripts/midas-uwsgi.py index d6cc018..1e6077f 100644 --- a/scripts/midas-uwsgi.py +++ b/scripts/midas-uwsgi.py @@ -69,12 +69,12 @@ def _dec(obj): _dec(uwsgi.opt.get('oar_config_env'))) srvc.wait_until_up(int(_dec(uwsgi.opt.get('oar_config_timeout', 10))), True, sys.stderr) - cfg = srvc.get(_dec(uwsgi.opt.get('oar_config_appname', 'pdr-pdp'))) + cfg = srvc.get(_dec(uwsgi.opt.get('oar_config_appname', 'midas-dbio'))) elif config.service: config.service.wait_until_up(int(os.environ.get('OAR_CONFIG_TIMEOUT', 10)), True, sys.stderr) - cfg = config.service.get(os.environ.get('OAR_CONFIG_APP', 'pdr-resolve')) + cfg = config.service.get(os.environ.get('OAR_CONFIG_APP', 'midas-dbio')) else: raise config.ConfigurationException("resolver: nist-oar configuration not provided") From 1c72711ece88b7ad7f3a650a0b1cf2a579b2af43 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 16 Mar 2023 07:05:20 -0400 Subject: [PATCH 083/123] dapsvc-openapi.yml: minor edits --- docs/dapsvc-openapi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/dapsvc-openapi.yml b/docs/dapsvc-openapi.yml index e94dd5d..bd8bcdb 100644 --- a/docs/dapsvc-openapi.yml +++ b/docs/dapsvc-openapi.yml @@ -17,12 +17,12 @@ info: [GET to `/mds3/{draftid}/data`](/paths/mds3-draftid--data/get). 4. When updates are complete, finalize the document with a [PUT to `/mds3/{draftid}/status`](/paths/mds3-draftid--status/put) (setting the `action` - property to "**finalize**". Review the result via + property to "**finalize**"). Review the result via [GET to `/mds3/{draftid}/data`](/paths/mds3-draftid--data/get); further updates are still possible if needed. 5. Submit the record for review with a [PUT to `/mds3/{draftid}/status`](/paths/mds3-draftid--status/put) (setting the `action` - property to "**submit**". + property to "**submit**"). contact: name: MIDAS support office From cdd0db4632355c9662f3c391ebac6287d774958a Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 17 Mar 2023 12:36:14 -0400 Subject: [PATCH 084/123] dap.mds3 bug fixes: * fix provenance recording for partial updates * return result of updates to lists --- python/nistoar/midas/dap/service/mds3.py | 64 ++++++++++++++------ python/nistoar/midas/dbio/project.py | 4 +- python/tests/nistoar/midas/test_wsgi.py | 75 ++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 19 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index e882e0e..76a41cc 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -809,7 +809,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._update_listitem(nerd.authors, self._moderate_author, data, key, replace, doval) provact.add_subaction(Action(subacttype, "%s#data.authors[%s]" % (prec.id, str(key)), - what, self._jsondiff(old, data))) + self.who, what, self._jsondiff(old, data))) elif m.group(1) == "references": what = "adding author" @@ -820,7 +820,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._update_listitem(nerd.references, self._moderate_reference, data, key, replace, doval) provact.add_subaction(Action(subacttype, "%s#data.references[%s]" % (prec.id, str(key)), - what, self._jsondiff(old, data))) + self.who, what, self._jsondiff(old, data))) elif m.group(1) == LINK_DELIM: what = "adding link" @@ -831,7 +831,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._update_listitem(nerd.nonfiles, self._moderate_nonfile, data, key, replace, doval) provact.add_subaction(Action(subacttype, "%s#data/pdr:see[%s]" % (prec.id, str(key)), - what, self._jsondiff(old, data))) + self.who, what, self._jsondiff(old, data))) elif m.group(1) == "components" or m.group(1) == FILE_DELIM: if ('filepath' not in data and key in nerd.nonfiles): @@ -846,7 +846,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data["_schema"] = schemabase+"/definitions/Component" data = self._update_component(nerd, data, key, replace, doval=doval) provact.add_subaction(Action(subacttype, "%s#data/pdr:f[%s]" % (prec.id, str(key)), - what, self._jsondiff(old, data))) + self.who, what, self._jsondiff(old, data))) else: raise PartNotAccessible(prec.id, path, "Updating %s not allowed" % path) @@ -860,8 +860,8 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._replace_objlist(nerd.authors, self._moderate_author, data, doval) else: data = self._update_objlist(nerd.authors, self._moderate_author, data, doval) - provact.add_subaction(Action(subacttype, prec.id+"#data.authors", "updating authors", - self._jsondiff(old, data))) + provact.add_subaction(Action(subacttype, prec.id+"#data.authors", self.who, + "updating authors", self._jsondiff(old, data))) elif path == "references": if not isinstance(data, list): @@ -872,8 +872,8 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._replace_objlist(nerd.references, self._moderate_reference, data, doval) else: data = self._update_objlist(nerd.references, self._moderate_reference, data, doval) - provact.add_subaction(Action(subacttype, prec.id+"#data.references", "updating references", - self._jsondiff(old, data))) + provact.add_subaction(Action(subacttype, prec.id+"#data.references", self.who, + "updating references", self._jsondiff(old, data))) elif path == LINK_DELIM: if not isinstance(data, list): @@ -884,8 +884,8 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, data = self._replace_objlist(nerd.nonfies, self._moderate_nonfile, data, doval) else: data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) - provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", "updating link list", - self._jsondiff(old, data))) + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", self.who, + "updating link list", self._jsondiff(old, data))) # elif path == FILE_DELIM: @@ -911,13 +911,15 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, else: nerd.nonfiles.append(cmp) - provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:f", "updating file list", + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:f", self.who, + "updating file list", self._jsondiff(oldn, nerd.nonfiles.get_data()))) for cmp in files: nerd.files.set_file_at(cmp) if path == "components": - provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", "updating link list", + provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", self.who, + "updating link list", self._jsondiff(oldf, nerd.nonfiles.get_data()))) data = nerd.nonfiles.get_data() + nerd.files.get_files() else: @@ -930,7 +932,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, old = res['contactPoint'] res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) # may raise InvalidUpdate - provact.add_subaction(Action(subacttype, prec.id+"#data.contactPoint", + provact.add_subaction(Action(subacttype, prec.id+"#data.contactPoint", self.who, "updating contact point", self._jsondiff(old, res['contactPoint']))) nerd.replace_res_data(res) data = res[path] @@ -941,8 +943,8 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() old = res['@type'] res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) - provact.add_subaction(Action(subacttype, prec.id+"#data.@type", "updating resource types", - self._jsondiff(old, res['@type']))) + provact.add_subaction(Action(subacttype, prec.id+"#data.@type", self.who, + "updating resource types", self._jsondiff(old, res['@type']))) nerd.replace_res_data(res) data = res[path] @@ -952,10 +954,23 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() old = res['description'] res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate - provact.add_subaction(Action(subacttype, prec.id+"#data.description", "updating description", + provact.add_subaction(Action(Action.PUT, prec.id+"#data.description", self.who, + "updating description", self._jsondiff(old, res['description']))) nerd.replace_res_data(res) data = res[path] + + elif path == "landingPage": + if not isinstance(data, str): + raise InvalidUpdate("description data is not a string", sys=self) + res = nerd.get_res_data() + old = res['landingPage'] + res[path] = self._moderate_landingPage(data, res, doval) # may raise InvalidUpdate + provact.add_subaction(Action(Action.PUT, prec.id+"#data.landingPage", self.who, + "updating landingPage", + self._jsondiff(old, res['landingPage']))) + nerd.replace_res_data(res) + data = res[path] elif path in "title rights disclaimer".split(): if not isinstance(data, str): @@ -963,7 +978,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, res = nerd.get_res_data() old = res[path] res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate - provact.add_subaction(Action(subacttype, prec.id+"#data."+path, "updating "+path, + provact.add_subaction(Action(subacttype, prec.id+"#data."+path, self.who, "updating "+path, self._jsondiff(old, res[path]))) nerd.replace_res_data(res) data = res[path] @@ -1340,6 +1355,7 @@ def _replace_objlist(self, objlist, moderate_func, data: List[Mapping], doval: b objlist.empty() for item in data: objlist.append(item) + return objlist.get_data() def _update_objlist(self, objlist, moderate_func, data: List[Mapping], doval: bool=False): # match the items in the given list to existing items currently store by their ids; for each @@ -1369,6 +1385,7 @@ def _update_objlist(self, objlist, moderate_func, data: List[Mapping], doval: bo objlist.set(item["@id"], item) else: objlist.append(item) + return objlist.get_data() ## This is an implementation based on position rather than id # curcount = len(objlist) @@ -1432,6 +1449,19 @@ def _moderate_description(self, val, resmd=None, doval=True): raise InvalidUpdate("Description value is not a string or array of strings", sys=self) return [self._moderate_text(t, resmd, doval=doval) for t in val if t != ""] + def _moderate_landingPage(self, val, resmd=None, doval=True): + try: + url = urlparse(val) + if url.scheme not in "https http".split() or not url.netloc: + raise InvalidInput("landingPage: Not a complete HTTP URL") + except ValueError as ex: + raise InvalidInput("landingPage: Not a legal URL: "+str(ex)) + if resmd and doval: + resmd['landingPage'] = val + self.validate_json(resmd) + return val + + _pfx_for_type = OrderedDict([ ("ScienceTheme", NERDMAGG_PRE), ("ExperimentalData", NERDMEXP_PRE), diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index a08cde8..487c909 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -216,8 +216,8 @@ def _record_action(self, act: Action): try: self.dbcli.record_action(act) except Exception as ex: - self.log.error("Failed to record provenance action for %s: %s: %s", - act.subject, act.type, act.message) + self.log.error("Failed to record provenance action for %s (%s: %s): %s", + act.subject, act.type, act.message, str(ex)) def _try_save(self, prec): # this is tolerant of recording errors diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 0fe4fc6..399e9d9 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -726,6 +726,81 @@ def test_create_dap3(self): self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'nerdm', '_seq.json'))) self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'nerdm', 'mds3:0001'))) + def test_put_authors(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + self.assertEqual(data['data']['author_count'], 0) + + self.resp = [] + authors = [ + {"familyName": "Cranston", "givenName": "Gurn" }, + {"familyName": "Howard", "givenName": "Dr."} + ] + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/authors', + 'wsgi.input': StringIO(json.dumps(authors)) + } + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertTrue(isinstance(data, list)) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]['familyName'], "Cranston") + self.assertEqual(data[1]['familyName'], "Howard") + self.assertEqual(data[1]['givenName'], "Dr.") + + hold = data[0] + data[0] = data[1] + data[1] = hold + data[0]['givenName'] = "Doctor" + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/authors', + 'wsgi.input': StringIO(json.dumps(data)) + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertTrue(isinstance(data, list)) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]['familyName'], "Cranston") + self.assertEqual(data[1]['familyName'], "Howard") + self.assertEqual(data[1]['givenName'], "Doctor") + + # change order + hold = data[0] + data[0] = data[1] + data[1] = hold + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/authors', + 'wsgi.input': StringIO(json.dumps(data)) + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertTrue(isinstance(data, list)) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]['familyName'], "Howard") + self.assertEqual(data[0]['givenName'], "Doctor") + self.assertEqual(data[1]['familyName'], "Cranston") + + From 348ad042086344484ea47c466b73a41842f74312 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 17 Mar 2023 13:40:25 -0400 Subject: [PATCH 085/123] dap.mds3 bug fix: careful getting old value for prov diff recording --- python/nistoar/midas/dap/service/mds3.py | 19 ++++---- python/tests/nistoar/midas/test_wsgi.py | 59 ++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 76a41cc..cb1aaf9 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -23,6 +23,7 @@ from collections.abc import Mapping, MutableMapping, Sequence, Callable from typing import List, Union from copy import deepcopy +from urllib.parse import urlparse from ...dbio import (DBClient, DBClientFactory, ProjectRecord, AlreadyExists, NotAuthorized, ACLs, InvalidUpdate, ObjectNotFound, PartNotAccessible, @@ -626,6 +627,8 @@ def _summarize(self, nerd: NERDResource): out["doi"] = resmd["doi"] if 'contactPoint' in resmd: out["contactPoint"] = resmd["contactPoint"] + if 'landingPage' in resmd: + out["landingPage"] = resmd["landingPage"] out["author_count"] = nerd.authors.count out["file_count"] = nerd.files.count out["nonfile_count"] = nerd.nonfiles.count @@ -929,7 +932,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, if not isinstance(data, Mapping): raise InvalidUpdate("contactPoint data is not an object", sys=self) res = nerd.get_res_data() - old = res['contactPoint'] + old = res.get('contactPoint') res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) # may raise InvalidUpdate provact.add_subaction(Action(subacttype, prec.id+"#data.contactPoint", self.who, @@ -941,18 +944,18 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, if not isinstance(data, (list, str)): raise InvalidUpdate("@type data is not a list of strings", sys=self) res = nerd.get_res_data() - old = res['@type'] + old = res.get('@type') res = self._moderate_restype(data, res, nerd, replace=replace, doval=doval) provact.add_subaction(Action(subacttype, prec.id+"#data.@type", self.who, "updating resource types", self._jsondiff(old, res['@type']))) nerd.replace_res_data(res) - data = res[path] + data = res.get(path) elif path == "description": if not isinstance(data, (list, str)): raise InvalidUpdate("description data is not a list of strings", sys=self) res = nerd.get_res_data() - old = res['description'] + old = res.get('description') res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate provact.add_subaction(Action(Action.PUT, prec.id+"#data.description", self.who, "updating description", @@ -964,7 +967,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, if not isinstance(data, str): raise InvalidUpdate("description data is not a string", sys=self) res = nerd.get_res_data() - old = res['landingPage'] + old = res.get('landingPage') res[path] = self._moderate_landingPage(data, res, doval) # may raise InvalidUpdate provact.add_subaction(Action(Action.PUT, prec.id+"#data.landingPage", self.who, "updating landingPage", @@ -976,7 +979,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, if not isinstance(data, str): raise InvalidUpdate("%s value is not a string" % path, sys=self) res = nerd.get_res_data() - old = res[path] + old = res.get(path) res[path] = self._moderate_text(data, res, doval=doval) # may raise InvalidUpdate provact.add_subaction(Action(subacttype, prec.id+"#data."+path, self.who, "updating "+path, self._jsondiff(old, res[path]))) @@ -1453,9 +1456,9 @@ def _moderate_landingPage(self, val, resmd=None, doval=True): try: url = urlparse(val) if url.scheme not in "https http".split() or not url.netloc: - raise InvalidInput("landingPage: Not a complete HTTP URL") + raise InvalidUpdate("landingPage: Not a complete HTTP URL") except ValueError as ex: - raise InvalidInput("landingPage: Not a legal URL: "+str(ex)) + raise InvalidUpdate("landingPage: Not a legal URL: "+str(ex)) if resmd and doval: resmd['landingPage'] = val self.validate_json(resmd) diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 399e9d9..2adc116 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -799,6 +799,65 @@ def test_put_authors(self): self.assertEqual(data[0]['familyName'], "Howard") self.assertEqual(data[0]['givenName'], "Doctor") self.assertEqual(data[1]['familyName'], "Cranston") + + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001' + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertEqual(data['data']['author_count'], 2) + self.assertNotIn('authors', data['data']) # not included in summary + + def test_put_landingpage(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + self.assertNotIn('landingPage', data['data']) + + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/landingPage', + 'wsgi.input': StringIO('"ftp://goob.gov/data/index.html"') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("400 ", self.resp[0]) + + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/landingPage', + 'wsgi.input': StringIO('"https://nist.gov/"') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, 'https://nist.gov/') + + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001' + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertEqual(data['data']['landingPage'], 'https://nist.gov/') # in summary From 11cfd24a6e580821081afa7a59835d46a7ce2e86 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 17 Mar 2023 17:19:21 -0400 Subject: [PATCH 086/123] dap.mds3: fix setting via property name; properly handle complex values --- python/nistoar/midas/dap/service/mds3.py | 97 ++++++++++++++++-------- python/tests/nistoar/midas/test_wsgi.py | 60 +++++++++++++++ 2 files changed, 125 insertions(+), 32 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index cb1aaf9..813eb4f 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -309,7 +309,7 @@ def _new_data_for(self, recid, meta=None, schemaid=None): # # base contact on the currently logged in user # elif meta.get("contactName"): if meta.get("contactName"): - out['contactPoint'] = self._moderate_contact({"fn": meta["contactName"]}, doval=False) + out['contactPoint'] = self._moderate_contactPoint({"fn": meta["contactName"]}, doval=False) return out @@ -933,7 +933,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, raise InvalidUpdate("contactPoint data is not an object", sys=self) res = nerd.get_res_data() old = res.get('contactPoint') - res['contactPoint'] = self._moderate_contact(data, res, replace=replace, doval=doval) + res['contactPoint'] = self._moderate_contactPoint(data, res, replace=replace, doval=doval) # may raise InvalidUpdate provact.add_subaction(Action(subacttype, prec.id+"#data.contactPoint", self.who, "updating contact point", self._jsondiff(old, res['contactPoint']))) @@ -953,13 +953,26 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, elif path == "description": if not isinstance(data, (list, str)): - raise InvalidUpdate("description data is not a list of strings", sys=self) + raise InvalidUpdate(part+" data is not a list of strings", sys=self) res = nerd.get_res_data() - old = res.get('description') + old = res.get(path) + res[path] = self._moderate_description(data, res, doval=doval) # may raise InvalidUpdate - provact.add_subaction(Action(Action.PUT, prec.id+"#data.description", self.who, - "updating description", - self._jsondiff(old, res['description']))) + provact.add_subaction(Action(Action.PUT, prec.id+"#data."+path, self.who, "updating "+path, + self._jsondiff(old, res[path]))) + nerd.replace_res_data(res) + data = res[path] + + elif path == "keywords": + if not isinstance(data, (list, str)): + raise InvalidUpdate(part+" data is not a list of strings", sys=self) + res = nerd.get_res_data() + old = res.get(path) + + res[path] = self._moderate_keywords(data, res, doval=doval, replace=replace) # InvalidUpdate + provact.add_subaction(Action(Action.PUT if replace else Action.PATCH, + prec.id+"#data."+path, self.who, "updating "+path, + self._jsondiff(old, res[path]))) nerd.replace_res_data(res) data = res[path] @@ -1445,14 +1458,36 @@ def _moderate_text(self, val, resmd=None, doval=True): raise InvalidUpdate("Text value is not a string", sys=self) return val - def _moderate_description(self, val, resmd=None, doval=True): + def _moderate_description(self, val, resmd=None, doval=True, replace=True): + # replace is ignored + if val is None: + val = [] if isinstance(val, str): - val = val.split("\n\n") + val = re.split(r'\n\n+', val) if not isinstance(val, Sequence): - raise InvalidUpdate("Description value is not a string or array of strings", sys=self) - return [self._moderate_text(t, resmd, doval=doval) for t in val if t != ""] + raise InvalidUpdate("description value is not a string or array of strings", sys=self) + return [self._moderate_text(t, resmd, doval=doval) for t in val if t] - def _moderate_landingPage(self, val, resmd=None, doval=True): + def _moderate_keywords(self, val, resmd=None, doval=True, replace=True): + if val is None: + val = [] + if isinstance(val, str): + val = re.split(r'\n+', val) + if not isinstance(val, Sequence): + raise InvalidUpdate("keywords value is not a string or array of strings", sys=self) + + # uniquify list + out = resmd.get('keywords', []) if resmd and not replace else [] + for v in val: + if v not in out: + out.append(self._moderate_text(v, resmd, doval=doval)) + + return out + + def _moderate_landingPage(self, val, resmd=None, doval=True, replace=True): + # replace is ignored + if val is None: + val = "" try: url = urlparse(val) if url.scheme not in "https http".split() or not url.netloc: @@ -1482,6 +1517,8 @@ def _moderate_landingPage(self, val, resmd=None, doval=True): } def _moderate_restype(self, types, resmd, nerd=None, replace=True, doval=True): + if types is None: + types = [] if not isinstance(types, list): types = [types] if any([not isinstance(t, str) for t in types]): @@ -1550,7 +1587,9 @@ def _has_exp_prop(self, md): return False _contact_props = set("fn hasEmail postalAddress phoneNumber timezone proxyFor".split()) - def _moderate_contact(self, info, resmd=None, replace=False, doval=True): + def _moderate_contactPoint(self, info, resmd=None, replace=False, doval=True): + if info is None: + info = OrderedDict() if not isinstance(info, Mapping): raise InvalidUpdate("contactPoint data is not an object", sys=self) info = OrderedDict([(k,v) for k,v in info.items() if k in self._contact_props]) @@ -1845,25 +1884,19 @@ def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): resmd["@type"] = restypes errors = [] - if 'contactPoint' in resmd: - if "contactPoint" not in resmd and not resmd.get("contactPoint"): - del resmd["contactPoint"] - else: - try: - resmd["contactPoint"] = self._moderate_contact(resmd["contactPoint"], resmd, - replace=True, doval=False) - except InvalidUpdate as ex: - errors.extend(ex.errors) - - if 'description' in resmd: - if "description" not in resmd and not resmd.get("description"): - del resmd["description"] - else: - try: - resmd["description"] = self._moderate_description(resmd["description"], resmd, - doval=False) - except InvalidUpdate as ex: - errors.extend(ex.errors) + for prop in "contactPoint description keywords landingPage".split(): + if prop in resmd: + if resmd.get(prop) is None: + del resmd[prop] + else: + try: + moderate = '_moderate_' + prop + if hasattr(self, moderate): + moderate = getattr(self, moderate) + resmd[prop] = moderate(resmd[prop], resmd, replace=True, doval=False) + + except InvalidUpdate as ex: + errors.extend(ex.errors) resmd.setdefault("@type", []) try: diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 2adc116..aa04b63 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -859,6 +859,66 @@ def test_put_landingpage(self): self.assertEqual(data['name'], "first") self.assertEqual(data['data']['landingPage'], 'https://nist.gov/') # in summary + def test_put_keywords(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + self.assertNotIn('landingPage', data['data']) + + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keywords', + 'wsgi.input': StringIO('["CICD", "testing"]') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, "CICD testing".split()) + + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keywords', + 'wsgi.input': StringIO('["frameworks", "testing"]') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, "CICD testing frameworks".split()) + + updates = { + "title": "a draft", + "description": "read me, please.\n\nPlease", + "keywords": "testing frameworks".split(), + "landingPage": "https://data.nist.gov/" + } + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data', + 'wsgi.input': StringIO(json.dumps(updates)) + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['title'], "a draft") + self.assertEqual(data['description'], ["read me, please.", "Please"]) + self.assertEqual(data['keywords'], ["testing", "frameworks"]) + self.assertEqual(data['landingPage'], "https://data.nist.gov/") + + + + + From c79d47a0d80e475f68d0e1454088506d6d73fabf Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 20 Mar 2023 06:42:46 -0400 Subject: [PATCH 087/123] dap.mds3: fix contactPoint moderate func. --- python/nistoar/midas/dap/service/mds3.py | 6 ++-- python/tests/nistoar/midas/test_wsgi.py | 37 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 813eb4f..4c56ba5 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -1593,9 +1593,11 @@ def _moderate_contactPoint(self, info, resmd=None, replace=False, doval=True): if not isinstance(info, Mapping): raise InvalidUpdate("contactPoint data is not an object", sys=self) info = OrderedDict([(k,v) for k,v in info.items() if k in self._contact_props]) + if info.get('hasEmail') and not info['hasEmail'].startswith("mailto:"): + info['hasEmail'] = "mailto:"+info['hasEmail'].strip() - if not replace and resmd and resmd.get('contactInfo'): - info = self._merge_into(info, resmd['contactInfo']) + if not replace and resmd and resmd.get('contactPoint'): + info = self._merge_into(info, resmd['contactPoint']) info['@type'] = "vcard:Contact" if doval: diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index aa04b63..ccc68ce 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -915,6 +915,43 @@ def test_put_keywords(self): self.assertEqual(data['keywords'], ["testing", "frameworks"]) self.assertEqual(data['landingPage'], "https://data.nist.gov/") + def test_patch_contact(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': + StringIO('{"name": "first", "data": {"contactPoint": {"hasEmail": "mailto:who@where.com"}}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertEqual(data['data']['contactPoint']['hasEmail'], "mailto:who@where.com") + + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/contactPoint', + 'wsgi.input': StringIO('{"fn": "The Doctor", "phoneNumber": "555-1212"}') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, {"fn": "The Doctor", "phoneNumber": "555-1212", + "hasEmail": "mailto:who@where.com", "@type": "vcard:Contact"}) + + data['hasEmail'] = "drwho@where.com" + req['REQUEST_METHOD'] = 'PUT' + req['wsgi.input'] = StringIO(json.dumps(data)) + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, {"fn": "The Doctor", "phoneNumber": "555-1212", + "hasEmail": "mailto:drwho@where.com", "@type": "vcard:Contact"}) + + From 551fc1d479205c17f0fa52acd0d775dbaf4c9ed2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 20 Mar 2023 06:50:25 -0400 Subject: [PATCH 088/123] dap.service.mds3: update test_mds3.py for changes in _moderate_contactPoint() --- .../nistoar/midas/dap/service/test_mds3.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index ba3d87c..6c9f4d8 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -256,24 +256,25 @@ def test_moderate_contact(self): self.create_service() try: - contact = self.svc._moderate_contact({"fn": "Gurn Cranston", "hasEmail": "gurn.cranston@gmail.com", - "foo": "bar", "phoneNumber": "Penn6-5000"}) + contact = self.svc._moderate_contactPoint({"fn": "Gurn Cranston", + "hasEmail": "gurn.cranston@gmail.com", + "foo": "bar", "phoneNumber": "Penn6-5000"}) except InvalidUpdate as ex: self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(contact['fn'], "Gurn Cranston") - self.assertEqual(contact['hasEmail'], "gurn.cranston@gmail.com") + self.assertEqual(contact['hasEmail'], "mailto:gurn.cranston@gmail.com") self.assertEqual(contact['phoneNumber'], "Penn6-5000") self.assertNotIn("foo", contact) self.assertEqual(contact["@type"], "vcard:Contact") self.assertEqual(len(contact), 4) try: - contact = self.svc._moderate_contact({"fn": "Gurn J. Cranston", "goob": "gurn"}, - {"contactInfo": contact}) + contact = self.svc._moderate_contactPoint({"fn": "Gurn J. Cranston", "goob": "gurn"}, + {"contactPoint": contact}) except InvalidUpdate as ex: self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(contact['fn'], "Gurn J. Cranston") - self.assertEqual(contact['hasEmail'], "gurn.cranston@gmail.com") + self.assertEqual(contact['hasEmail'], "mailto:gurn.cranston@gmail.com") self.assertEqual(contact['phoneNumber'], "Penn6-5000") self.assertNotIn("foo", contact) self.assertEqual(contact["@type"], "vcard:Contact") @@ -285,8 +286,8 @@ def test_moderate_contact(self): try: - contact = self.svc._moderate_contact({"fn": "Gurn Cranston", "goob": "gurn"}, - {"contactInfo": contact}, True) + contact = self.svc._moderate_contactPoint({"fn": "Gurn Cranston", "goob": "gurn"}, + {"contactPoint": contact}, True) except InvalidUpdate as ex: self.fail("Validation Error: "+ex.format_errors()) self.assertEqual(contact['fn'], "Gurn Cranston") @@ -329,7 +330,7 @@ def test_moderate_res_data(self): self.assertEqual(res.get("_extensionSchemas"), [ mds3.NERDMPUB_DEF+"PublicDataResource" ]) self.assertEqual(res.get("description"), ["This is it."]) self.assertIn("contactPoint", res) - self.assertEqual(res.get("contactPoint",{}).get("hasEmail"), "eap@dead.com") + self.assertEqual(res.get("contactPoint",{}).get("hasEmail"), "mailto:eap@dead.com") self.assertEqual(res.get("contactPoint",{}).get("@type"), "vcard:Contact") From 5106ee210d5f0267ce24a6876b8aa13087e9aaf1 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 20 Mar 2023 08:17:57 -0400 Subject: [PATCH 089/123] dap.service.mds3: change way to access list items --- python/nistoar/midas/dap/service/mds3.py | 68 +++++++++++++++--------- python/tests/nistoar/midas/test_wsgi.py | 25 +++++++++ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 4c56ba5..eea45ad 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -374,24 +374,31 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.get_data() else: - m = re.search(r'^([a-z]+s)\[([\w\d/#\.]+)\]$', part) - if m: - # part is of the form xxx[k] and refers to an item in a list - key = m.group(2) - try: - key = int(key) - except ValueError: - pass + steps = part.split('/') + if len(steps) > 1: + if len(steps) > 2: + raise ObjectNotFound(id, part) + + # part is of the form ppp/kkk and refers to an item in list, ppp, where + # kkk is either an element identifier or an element index of the form, + # [N]. + key = steps[1] + m = re.search(r'^\[(\d+)\]$', key) + if m: + try: + key = int(m.group(1)) + except ValueError as ex: + raise PartNotAccessible(id, part, "Accessing %s not supported" % part) - if m.group(1) == "authors": + if steps[0] == "authors": out = nerd.authors.get(key) - elif m.group(1) == "references": + elif steps[0] == "references": out = nerd.reference.get(key) - elif m.group(1) == LINK_DELIM: + elif steps[0] == LINK_DELIM: out = nerd.nonfiles.get(key) - elif m.group(1) == FILE_DELIM: + elif steps[0] == FILE_DELIM: out = nerd.files.get(key) - elif m.group(1) == "components": + elif steps[0] == "components": out = None try: out = nerd.nonfiles.get(key) @@ -402,6 +409,8 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.files.get_file_by_id(key) except nerdstore.ObjectNotFound as ex: raise ObjectNotFound(id, part, str(ex)) + else: + raise PartNotAccessible(id, part, "Accessing %s not supported" % part) elif part == "authors": out = nerd.authors.get_data() @@ -793,28 +802,35 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, provact = Action(Action.PATCH, prec.id, self.who, "updating NERDm part") try: - m = re.search(r'^([a-z]+s)\[([\w\d\.\/]+)\]$', path) - if m: - # path is of the form xxx[k] and refers to an item in a list - key = m.group(2) - try: - key = int(key) - except ValueError: - pass + steps = path.split('/') + if len(steps) > 1: + if len(steps) > 2: + raise ObjectNotFound(id, path) + + # path is of the form ppp/kkk and refers to an item in list, ppp, where + # kkk is either an element identifier or an element index of the form, + # [N]. + key = steps[1] + m = re.search(r'^\[(\d+)\]$', key) + if m: + try: + key = int(m.group(1)) + except ValueError as ex: + raise PartNotAccessible(id, path, "Accessing %s not supported" % path) old = {} - if m.group(1) == "authors": + if steps[0] == "authors": what = "adding author" if key in nerd.authors: old = nerd.authors.get(key) what = "updating author" - data["_schema"] = schemabase+"/definitions/Person" + # data["_schema"] = schemabase+"/definitions/Person" data = self._update_listitem(nerd.authors, self._moderate_author, data, key, replace, doval) provact.add_subaction(Action(subacttype, "%s#data.authors[%s]" % (prec.id, str(key)), self.who, what, self._jsondiff(old, data))) - elif m.group(1) == "references": + elif steps[0] == "references": what = "adding author" if key in nerd.authors: old = nerd.authors.get(key) @@ -825,7 +841,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, provact.add_subaction(Action(subacttype, "%s#data.references[%s]" % (prec.id, str(key)), self.who, what, self._jsondiff(old, data))) - elif m.group(1) == LINK_DELIM: + elif steps[0] == LINK_DELIM: what = "adding link" if key in nerd.nonfiles: old = nerd.nonfiles.get(key) @@ -836,7 +852,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, provact.add_subaction(Action(subacttype, "%s#data/pdr:see[%s]" % (prec.id, str(key)), self.who, what, self._jsondiff(old, data))) - elif m.group(1) == "components" or m.group(1) == FILE_DELIM: + elif steps[0] == "components" or steps[0] == FILE_DELIM: if ('filepath' not in data and key in nerd.nonfiles): old = nerd.nonfiles.get(key) what = "updating link" diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index ccc68ce..c702d4c 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -800,6 +800,31 @@ def test_put_authors(self): self.assertEqual(data[0]['givenName'], "Doctor") self.assertEqual(data[1]['familyName'], "Cranston") + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/authors/[0]', + 'wsgi.input': StringIO('{"givenName": "The Doctor"}') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['familyName'], "Howard") + self.assertEqual(data['givenName'], "The Doctor") + + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/authors/' + data['@id'], + 'wsgi.input': StringIO('{"givenName": "Doctor", "fn": "The Doctor"}') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['familyName'], "Howard") + self.assertEqual(data['fn'], "The Doctor") + self.assertEqual(data['givenName'], "Doctor") + req = { 'REQUEST_METHOD': 'GET', 'PATH_INFO': '/midas/dap/mds3/mds3:0001' From 6153a37f38018d9d12cfffc0ece74414b0905fb3 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 20 Mar 2023 13:37:23 -0400 Subject: [PATCH 090/123] dap.service.mds3: fix access to links (non-files) --- python/nistoar/midas/dap/service/mds3.py | 17 +++----- python/tests/nistoar/midas/test_wsgi.py | 51 +++++++++++++++++++++++- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index eea45ad..9afd0dc 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -314,11 +314,10 @@ def _new_data_for(self, recid, meta=None, schemaid=None): return out def _get_sw_desc_for(self, link): - id = link.rsplit('/', 1)[-1] - id = "%s/repo:%s" % (const.LINKCMP_EXTENSION.lstrip('/'), id) + # id = link.rsplit('/', 1)[-1] + # id = "%s/repo:%s" % (const.LINKCMP_EXTENSION.lstrip('/'), id) # let moderate handle this out = OrderedDict([ - ("@id", id), - ("@type", ["nrd:AccessPage", "dcat:Distribution"]), + ("@type", ["nrdp:AccessPage"]), ("title", "Software Repository"), ("accessURL", link) ]) @@ -374,11 +373,8 @@ def get_nerdm_data(self, id: str, part: str=None): out = nerd.get_data() else: - steps = part.split('/') + steps = part.split('/', 1) if len(steps) > 1: - if len(steps) > 2: - raise ObjectNotFound(id, part) - # part is of the form ppp/kkk and refers to an item in list, ppp, where # kkk is either an element identifier or an element index of the form, # [N]. @@ -802,11 +798,8 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, provact = Action(Action.PATCH, prec.id, self.who, "updating NERDm part") try: - steps = path.split('/') + steps = path.split('/', 1) if len(steps) > 1: - if len(steps) > 2: - raise ObjectNotFound(id, path) - # path is of the form ppp/kkk and refers to an item in list, ppp, where # kkk is either an element identifier or an element index of the form, # [N]. diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index c702d4c..17d8922 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -726,7 +726,7 @@ def test_create_dap3(self): self.assertTrue(os.path.isfile(os.path.join(self.workdir, 'nerdm', '_seq.json'))) self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'nerdm', 'mds3:0001'))) - def test_put_authors(self): + def test_upd_authors(self): req = { 'REQUEST_METHOD': 'POST', 'PATH_INFO': '/midas/dap/mds3', @@ -976,6 +976,55 @@ def test_patch_contact(self): self.assertEqual(data, {"fn": "The Doctor", "phoneNumber": "555-1212", "hasEmail": "mailto:drwho@where.com", "@type": "vcard:Contact"}) + def test_upd_links(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"},' + ' "meta": {"softwareLink": "https://github.com/usnistgov/oar-pdr-py"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + self.assertEqual(data['data']['nonfile_count'], 1) + + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see/[0]' + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data['accessURL'], "https://github.com/usnistgov/oar-pdr-py") + self.assertEqual(data['title'], "Software Repository in GitHub") + self.assertEqual(data['@type'], ["nrdp:AccessPage"]) + self.assertEqual(data['@id'], "cmp_0") + self.assertNotIn('description', data) + + req = { + 'REQUEST_METHOD': 'PATCH', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see/'+data['@id'], + 'wsgi.input': StringIO('{"description": "fork me!",' + ' "title": "OAR Software repository"}') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data['accessURL'], "https://github.com/usnistgov/oar-pdr-py") + self.assertEqual(data['title'], "OAR Software repository") + self.assertEqual(data['@type'], ["nrdp:AccessPage"]) + self.assertEqual(data['description'], "fork me!") + self.assertIn('@id', data) + + + From 60511f942cf30f48b1b48c09847fb0d8c9aea248 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 20 Mar 2023 16:56:11 -0400 Subject: [PATCH 091/123] dap.service.mds3: fix access to refenences, keywords, links --- python/nistoar/midas/dap/service/mds3.py | 24 +++++++++---------- .../nistoar/midas/dap/service/test_mds3.py | 22 ++++++++--------- python/tests/nistoar/midas/test_wsgi.py | 8 +++---- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 9afd0dc..7792832 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -389,7 +389,7 @@ def get_nerdm_data(self, id: str, part: str=None): if steps[0] == "authors": out = nerd.authors.get(key) elif steps[0] == "references": - out = nerd.reference.get(key) + out = nerd.references.get(key) elif steps[0] == LINK_DELIM: out = nerd.nonfiles.get(key) elif steps[0] == FILE_DELIM: @@ -804,7 +804,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, # kkk is either an element identifier or an element index of the form, # [N]. key = steps[1] - m = re.search(r'^\[(\d+)\]$', key) + m = re.search(r'^\[([\+\-]?\d+)\]$', key) if m: try: key = int(m.group(1)) @@ -824,14 +824,14 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, self.who, what, self._jsondiff(old, data))) elif steps[0] == "references": - what = "adding author" - if key in nerd.authors: - old = nerd.authors.get(key) - what = "updating author" + what = "adding reference" + if key in nerd.references: + old = nerd.references.get(key) + what = "updating reference" data["_schema"] = schemabase+"/definitions/BibliographicReference" data = self._update_listitem(nerd.references, self._moderate_reference, data, key, replace, doval) - provact.add_subaction(Action(subacttype, "%s#data.references[%s]" % (prec.id, str(key)), + provact.add_subaction(Action(subacttype, "%s#data.references/%s" % (prec.id, str(key)), self.who, what, self._jsondiff(old, data))) elif steps[0] == LINK_DELIM: @@ -972,13 +972,13 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, nerd.replace_res_data(res) data = res[path] - elif path == "keywords": + elif path == "keyword": if not isinstance(data, (list, str)): raise InvalidUpdate(part+" data is not a list of strings", sys=self) res = nerd.get_res_data() old = res.get(path) - res[path] = self._moderate_keywords(data, res, doval=doval, replace=replace) # InvalidUpdate + res[path] = self._moderate_keyword(data, res, doval=doval, replace=replace) # InvalidUpdate provact.add_subaction(Action(Action.PUT if replace else Action.PATCH, prec.id+"#data."+path, self.who, "updating "+path, self._jsondiff(old, res[path]))) @@ -1477,7 +1477,7 @@ def _moderate_description(self, val, resmd=None, doval=True, replace=True): raise InvalidUpdate("description value is not a string or array of strings", sys=self) return [self._moderate_text(t, resmd, doval=doval) for t in val if t] - def _moderate_keywords(self, val, resmd=None, doval=True, replace=True): + def _moderate_keyword(self, val, resmd=None, doval=True, replace=True): if val is None: val = [] if isinstance(val, str): @@ -1486,7 +1486,7 @@ def _moderate_keywords(self, val, resmd=None, doval=True, replace=True): raise InvalidUpdate("keywords value is not a string or array of strings", sys=self) # uniquify list - out = resmd.get('keywords', []) if resmd and not replace else [] + out = resmd.get('keyword', []) if resmd and not replace else [] for v in val: if v not in out: out.append(self._moderate_text(v, resmd, doval=doval)) @@ -1895,7 +1895,7 @@ def _moderate_res_data(self, resmd, basemd, nerd, replace=False, doval=True): resmd["@type"] = restypes errors = [] - for prop in "contactPoint description keywords landingPage".split(): + for prop in "contactPoint description keyword landingPage".split(): if prop in resmd: if resmd.get(prop) is None: del resmd[prop] diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index 6c9f4d8..8d40c55 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -801,16 +801,14 @@ def test_get_sw_desc_for(self): self.create_service() cmp = self.svc._get_sw_desc_for("https://github.com/foo/bar") self.assertEqual(cmp, { - "@id": "pdr:see/repo:bar", - "@type": ["nrd:AccessPage", "dcat:Distribution"], + "@type": ["nrdp:AccessPage"], "title": "Software Repository in GitHub", "accessURL": "https://github.com/foo/bar" }) cmp = self.svc._get_sw_desc_for("https://bitbucket.com/foo/bar") self.assertEqual(cmp, { - "@id": "pdr:see/repo:bar", - "@type": ["nrd:AccessPage", "dcat:Distribution"], + "@type": ["nrdp:AccessPage"], "title": "Software Repository", "accessURL": "https://bitbucket.com/foo/bar" }) @@ -868,7 +866,7 @@ def test_update(self): self.assertNotIn("authors", nerd) self.assertEqual(nerd["references"][0]["refType"], "IsReferencedBy") - result = self.svc.update_data(prec.id, {"refType": "References"}, "references[0]") + result = self.svc.update_data(prec.id, {"refType": "References"}, "references/[0]") self.assertEqual(result["location"], "https://doi.org/10.1364/OE.24.014100") self.assertEqual(result["refType"], "References") nerd = self.svc.get_nerdm_data(prec.id) @@ -880,16 +878,16 @@ def test_update(self): self.assertEqual(nerd["references"][0]["refType"], "References") with self.assertRaises(ObjectNotFound): - self.svc.update_data(prec.id, {"refType": "References"}, "references[1]") - with self.assertRaises(PartNotAccessible): - self.svc.update_data(prec.id, {"refType": "References"}, "references[-1]") + self.svc.update_data(prec.id, {"refType": "References"}, "references/[1]") with self.assertRaises(ObjectNotFound): - self.svc.update_data(prec.id, {"refType": "References"}, "references[goober]") + self.svc.update_data(prec.id, {"refType": "References"}, "references/[-2]") + with self.assertRaises(ObjectNotFound): + self.svc.update_data(prec.id, {"refType": "References"}, "references/goober") with self.assertRaises(InvalidUpdate): - self.svc.update_data(prec.id, {"refType": "IsGurnTo"}, "references[0]") + self.svc.update_data(prec.id, {"refType": "IsGurnTo"}, "references/[0]") try: - result = self.svc.update_data(prec.id, {"refType": "IsSourceOf"}, "references[ref_0]") + result = self.svc.update_data(prec.id, {"refType": "IsSourceOf"}, "references/ref_0") except InvalidUpdate as ex: self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) self.assertEqual(result["location"], "https://doi.org/10.1364/OE.24.014100") @@ -905,7 +903,7 @@ def test_update(self): self.assertEqual(filemd["size"], 69) filemd["size"] = 70 try: - result = self.svc.update_data(prec.id, filemd, "components[trial1.json]") + result = self.svc.update_data(prec.id, filemd, "components/trial1.json") except InvalidUpdate as ex: self.fail(str(ex) + ":\n" + "\n".join([str(e) for e in ex.errors])) self.assertEqual(result["filepath"], "trial1.json") diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 17d8922..47f1591 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -900,7 +900,7 @@ def test_put_keywords(self): req = { 'REQUEST_METHOD': 'PUT', - 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keywords', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keyword', 'wsgi.input': StringIO('["CICD", "testing"]') } self.resp = [] @@ -911,7 +911,7 @@ def test_put_keywords(self): req = { 'REQUEST_METHOD': 'PATCH', - 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keywords', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/keyword', 'wsgi.input': StringIO('["frameworks", "testing"]') } self.resp = [] @@ -923,7 +923,7 @@ def test_put_keywords(self): updates = { "title": "a draft", "description": "read me, please.\n\nPlease", - "keywords": "testing frameworks".split(), + "keyword": "testing frameworks".split(), "landingPage": "https://data.nist.gov/" } req = { @@ -937,7 +937,7 @@ def test_put_keywords(self): data = self.body2dict(body) self.assertEqual(data['title'], "a draft") self.assertEqual(data['description'], ["read me, please.", "Please"]) - self.assertEqual(data['keywords'], ["testing", "frameworks"]) + self.assertEqual(data['keyword'], ["testing", "frameworks"]) self.assertEqual(data['landingPage'], "https://data.nist.gov/") def test_patch_contact(self): From 43412f683e5b311f90350d8fc97a150227332e28 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 21 Mar 2023 15:29:57 -0400 Subject: [PATCH 092/123] dap.service.mds3: fix bug replacing links (with PUT) --- python/nistoar/midas/dap/service/mds3.py | 3 +- python/tests/nistoar/midas/test_wsgi.py | 88 ++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 7792832..9a4f123 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -893,7 +893,7 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, raise InvalidUpdate(err, id, path, errors=[err]) old = nerd.nonfiles.get_data() if replace: - data = self._replace_objlist(nerd.nonfies, self._moderate_nonfile, data, doval) + data = self._replace_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) else: data = self._update_objlist(nerd.nonfiles, self._moderate_nonfile, data, doval) provact.add_subaction(Action(subacttype, prec.id+"#data/pdr:see", self.who, @@ -1508,7 +1508,6 @@ def _moderate_landingPage(self, val, resmd=None, doval=True, replace=True): self.validate_json(resmd) return val - _pfx_for_type = OrderedDict([ ("ScienceTheme", NERDMAGG_PRE), ("ExperimentalData", NERDMEXP_PRE), diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 47f1591..a84ff4b 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -1023,6 +1023,94 @@ def test_upd_links(self): self.assertEqual(data['description'], "fork me!") self.assertIn('@id', data) + def test_upd_links2(self): + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3', + 'wsgi.input': StringIO('{"name": "first", "data": {"title": "Microscopy of Cobalt Samples"}}') + } + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data['id'], 'mds3:0001') + self.assertEqual(data['name'], "first") + self.assertTrue(data['data']['title'].startswith("Microscopy of ")) + self.assertEqual(data['data']['nonfile_count'], 0) + + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see', + 'wsgi.input': StringIO('{"accessURL": "https://data.nist.gov", "description": "test",' + ' "@id": "pdr:see/repo:data.nist.gov" }') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("201 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data['accessURL'], "https://data.nist.gov") + self.assertEqual(data['description'], "test") + self.assertEqual(data['@type'], ["nrdp:AccessPage"]) + self.assertEqual(data['@id'], "pdr:see/repo:data.nist.gov") + + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see/[0]' + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data['accessURL'], "https://data.nist.gov") + self.assertEqual(data['description'], "test") + self.assertEqual(data['@type'], ["nrdp:AccessPage"]) + self.assertEqual(data['@id'], "pdr:see/repo:data.nist.gov") + self.assertNotIn("title", data) + + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see/'+data['@id'], + 'wsgi.input': StringIO('{"accessURL": "https://data.nist.gov", "description": "test",' + ' "@id": "pdr:see/repo:data.nist.gov", "title": "PDR",' + ' "@type": ["nrdp:AccessPage", "dcat:Distribution"]}') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + + self.assertEqual(data['accessURL'], "https://data.nist.gov") + self.assertEqual(data['description'], "test") + self.assertEqual(data['@type'], ["nrdp:AccessPage", "dcat:Distribution"]) + self.assertEqual(data['@id'], "pdr:see/repo:data.nist.gov") + self.assertEqual(data['title'], "PDR") + + req = { + 'REQUEST_METHOD': 'PUT', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see', + 'wsgi.input': StringIO('[]') + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, []) + + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see' + } + self.resp = [] + body = self.app(req, self.start) + self.assertIn("200 ", self.resp[0]) + data = self.body2dict(body) + self.assertEqual(data, []) + + + + + From 4b8af9fa6a480b229384a8a6c92279d6ebb36bf2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 21 Mar 2023 17:19:38 -0400 Subject: [PATCH 093/123] dap.service.mds3: temporarily protected non-std reference properties --- python/nistoar/midas/dap/service/mds3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 9a4f123..0d425d9 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -1693,7 +1693,7 @@ def _moderate_author(self, auth, doval=True): return auth _refprops = set(("@id _schema _extensionSchemas title abbrev proxyFor location label "+ - "description citation refType doi inprep").split()) + "description citation refType doi inPreparation vol volNumber pages publishYear").split()) _reftypes = set(("IsDocumentedBy IsSupplementTo IsSupplementedBy IsCitedBy Cites IsReviewedBy "+ "IsReferencedBy References IsSourceOf IsDerivedFrom "+ "IsNewVersionOf IsPreviousVersionOf").split()) From 657f30a144919d45ce7902f530976dfe1af297f2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 22 Mar 2023 06:07:21 -0400 Subject: [PATCH 094/123] dap.nerdstore.fsbased: keep authors,refs,comps out of res-level md --- python/nistoar/midas/dap/nerdstore/fsbased.py | 5 +++-- python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py | 9 +++++++++ python/tests/nistoar/midas/test_wsgi.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index 2e126fe..4a8fa74 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -807,7 +807,7 @@ class FSBasedResource(NERDResource): files on disk. """ - _subprops = "authors references components @id".split() + _subprops = "authors references components".split() def __init__(self, id: str, storeroot: str, create: bool=True, parentlog: Logger=None): super(FSBasedResource, self).__init__(id, parentlog) @@ -899,7 +899,8 @@ def _cache_res_md(self, md): raise StorageFormatException("%s: Failed to write file metadata: %s" % (str(self._seqp), str(ex))) - def replace_res_data(self, md): + def replace_res_data(self, md): + md = OrderedDict(p for p in md.items() if p[0] not in self._subprops) self._cache_res_md(md) def get_res_data(self) -> Mapping: diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py index 3282814..9911f25 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py @@ -874,6 +874,15 @@ def test_ctor(self): with self.assertRaises(fsbased.RecordDeleted): self.res.authors + def test_replace_res_data(self): + nerd = load_simple() + self.res.replace_res_data(nerd) + resmd = self.res.get_res_data() + self.assertNotIn('authors', resmd) + self.assertNotIn('references', resmd) + self.assertNotIn('components', resmd) + + class TestFSBasedResourceStorage(test.TestCase): def setUp(self): diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index a84ff4b..6396bbc 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -1099,13 +1099,13 @@ def test_upd_links2(self): req = { 'REQUEST_METHOD': 'GET', - 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data/pdr:see' + 'PATH_INFO': '/midas/dap/mds3/mds3:0001/data' } self.resp = [] body = self.app(req, self.start) self.assertIn("200 ", self.resp[0]) data = self.body2dict(body) - self.assertEqual(data, []) + self.assertEqual(data.get('components',[]), []) From eaeea3338543db00e47388409d961806e396fdef Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 22 Mar 2023 06:14:55 -0400 Subject: [PATCH 095/123] dap.nerdstore.fsbased: tweak unit test --- python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py index 9911f25..25eafa0 100644 --- a/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py +++ b/python/tests/nistoar/midas/dap/nerdstore/test_fsbased.py @@ -878,6 +878,9 @@ def test_replace_res_data(self): nerd = load_simple() self.res.replace_res_data(nerd) resmd = self.res.get_res_data() + self.assertEqual(resmd['title'], nerd.get('title')) + self.assertEqual(resmd['description'], nerd.get('description')) + self.assertEqual(resmd['contactPoint'], nerd.get('contactPoint')) self.assertNotIn('authors', resmd) self.assertNotIn('references', resmd) self.assertNotIn('components', resmd) From fddd012f130759face538e726555f6953ae5382d Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 27 Mar 2023 12:27:21 -0400 Subject: [PATCH 096/123] midas-uwsgi.py: be prepared for working directory --- scripts/midas-uwsgi.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/midas-uwsgi.py b/scripts/midas-uwsgi.py index 1e6077f..fc5bb70 100644 --- a/scripts/midas-uwsgi.py +++ b/scripts/midas-uwsgi.py @@ -93,7 +93,21 @@ def _dec(obj): dbtype = DEF_MIDAS_DB_TYPE if dbtype == "fsbased": - dbdir = os.path.join(cfg.get('working_dir','.'), "dbfiles") + # determine the DB's root directory + wdir = cfg.get('working_dir','.') + dbdir = cfg.get("dbio", {}).get('db_root_dir') + if not dbdir: + # use a default under the working directory + dbdir = os.path.join(wdir, "dbfiles") + if not os.path.exists(wdir): + os.mkdir(wdir) + elif not os.path.isabs(dbdir): + # if relative, make it relative to the work directory + dbdir = os.path.join(wdir, dbdir) + if not os.path.exists(wdir): + os.mkdir(wdir) + if not os.path.exists(dbdir): + os.makedirs(dbdir) if not os.path.exists(dbdir): os.mkdir(dbdir) factory = FSBasedDBClientFactory(cfg.get("dbio", {}), dbdir) From 5a17852524ddebeb991d2ef1717f92a2e7538901 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sun, 9 Apr 2023 13:21:34 -0400 Subject: [PATCH 097/123] install.sh: install api docs --- scripts/install.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/install.sh b/scripts/install.sh index 1aec65d..f69e9e7 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -33,3 +33,6 @@ $oarmd_pkg/scripts/install_extras.sh --install-dir=$INSTALL_DIR mkdir -p $INSTALL_DIR/var/logs echo cp -r $SOURCE_DIR/etc $INSTALL_DIR cp -r $SOURCE_DIR/etc $INSTALL_DIR + +mkdir -p $INSTALL_DIR/docs +cp $SOURCE_DIR/docs/*-openapi.yml $SOURCE_DIR/docs/*-elements.html $INSTALL_DIR/docs From 85771612db5ec18fc20382c679edd46470871b2d Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sun, 9 Apr 2023 13:29:24 -0400 Subject: [PATCH 098/123] dbio/SubApp: generalize Accepts handling --- python/nistoar/midas/dbio/wsgi/base.py | 15 ++++++++++++- .../nistoar/pdr/publish/service/wsgi/base.py | 16 ++++++++++---- .../nistoar/midas/dbio/wsgi/test_project.py | 21 ++++++++++++++++++- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/python/nistoar/midas/dbio/wsgi/base.py b/python/nistoar/midas/dbio/wsgi/base.py index 3b34026..7aee4f0 100644 --- a/python/nistoar/midas/dbio/wsgi/base.py +++ b/python/nistoar/midas/dbio/wsgi/base.py @@ -1,7 +1,7 @@ """ Some common code for implementing the WSGI front end to dbio """ -import logging, json +import logging, json, re from collections import OrderedDict from collections.abc import Callable @@ -43,6 +43,19 @@ def __init__(self, subapp: SubApp, dbclient: DBClient, wsgienv: dict, start_resp if hasattr(self._app, "_recorder") and self._app._recorder: self._reqrec = self._app._recorder.from_wsgi(self._env) + def acceptable(self): + """ + return True if the client's Accept request is compatible with this handler. + + This implementation will return True if "*/*" or "application/json" is included in the + Accept request or if the Accept header is not specified. + """ + accepts = self.get_accepts() + if not accepts: + return True; + jsonre = re.compile(r"/json$") + return "*/*" in accepts or any(jsonre.search(a) for a in accepts); + class FatalError(Exception): def __init__(self, code, reason, explain=None, id=None): if not explain: diff --git a/python/nistoar/pdr/publish/service/wsgi/base.py b/python/nistoar/pdr/publish/service/wsgi/base.py index aae8270..5eea47e 100644 --- a/python/nistoar/pdr/publish/service/wsgi/base.py +++ b/python/nistoar/pdr/publish/service/wsgi/base.py @@ -282,6 +282,16 @@ def authorize(self): """ return bool(self.who) + def get_accepts(self): + """ + return the requested content types as a list ordered by their q-values. An empty list + is returned if no types were specified. + """ + accepts = self._env.get('HTTP_ACCEPT') + if not accepts: + return []; + return order_accepts(accepts) + def acceptable(self): """ return True if the client's Accept request is compatible with this handler. @@ -289,10 +299,8 @@ def acceptable(self): This default implementation will return True if "*/*" is included in the Accept request or if the Accept header is not specified. """ - accepts = self._env.get('HTTP_ACCEPT') - if not accepts: - return True; - return "*/*" in order_accepts(accepts) + accepts = self.get_accepts() + return not accepts or "*/*" in order_accepts(accepts) class SubApp(metaclass=ABCMeta): diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index 9452008..221c314 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -84,7 +84,8 @@ def test_create_handler_name(self): path = "mdm1:0001/name" req = { 'REQUEST_METHOD': 'GET', - 'PATH_INFO': self.rootpath + path + 'PATH_INFO': self.rootpath + path, + 'HTTP_ACCEPT': "*/*" } hdlr = self.app.create_handler(req, self.start, path, nistr) self.assertTrue(isinstance(hdlr, prj.ProjectNameHandler)) @@ -92,6 +93,24 @@ def test_create_handler_name(self): self.assertEqual(hdlr._path, "") self.assertEqual(hdlr._id, "mdm1:0001") + # throw in tests for acceptable + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "application/json" + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "text/json" + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "*/json" + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "goob/json" + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "text/html" + self.assertTrue(not hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "text/html,text/json" + self.assertTrue(hdlr.acceptable()) + hdlr._env['HTTP_ACCEPT'] = "text/html,*/*" + self.assertTrue(hdlr.acceptable()) + + def test_get_name(self): path = "mdm1:0003/name" req = { From 0ac3bab44211cbcfbc8a92f38d1ce42e5d567bb2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 22 Apr 2023 17:05:02 -0400 Subject: [PATCH 099/123] switch oar-metadata to integration branch --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index 1e83bb6..2190bfc 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit 1e83bb634ca5098033e3ea8b67817c8f74ecc853 +Subproject commit 2190bfc79d97f81d52dd24df0d4e9dc844065b67 From 3cc47835a1c54d1e7d3efe83e2c2e13e743393c9 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 28 Apr 2023 13:17:33 -0400 Subject: [PATCH 100/123] support JWT-based authentication --- python/nistoar/midas/wsgi.py | 34 ++++++++++++++++++++++--- python/tests/nistoar/midas/test_wsgi.py | 30 +++++++++++++++++++++- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index 1fdab57..f15ef87 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -121,6 +121,8 @@ from collections.abc import Mapping, MutableMapping, Callable from copy import deepcopy +import jwt + from . import system from .dbio.base import DBClientFactory from .dbio.wsgi import project as prj, SubApp, Handler, DBIOHandler @@ -517,12 +519,36 @@ def authenticate(self, env) -> PubAgent: :return: a representation of the requesting user :rtype: PubAgent """ - # TODO: support JWT cookie for authentication - - # TODO: support optional client + # get client id, if present + client_id = env.get('HTTP_OAR_CLIENT_ID','(unknown)') + agents = self.cfg.get('client_agents', {}).get(client_id, [client_id]) + allowed = self.cfg.get('allowed_clients') + if allowed is not None and client_id not in allowed: + return PubAgent("invalid", PubAgent.UNKN, "anonymous", agents) + + # ensure an authenticated identity + auth = env.get('HTTP_AUTHORIZATION', "x").split() + jwtcfg = self.cfg.get('jwt_auth') + if jwtcfg and auth[0] == "Bearer": + try: + userinfo = jwt.decode(auth[1], jwtcfg.get("key", ""), + algorithms=[jwtcfg.get("algorithm", "HS256")]) + except jwt.InvalidTokenError as ex: + log.warning("Invalid token can not be decoded: %s", str(ex)) + return PubAgent("invalid", PubAgent.UNKN, "anonymous", agents) + + return self._agent_from_claimset(userinfo, agents) # anonymous user - return PubAgent("public", PubAgent.UNKN, "anonymous") + return PubAgent("public", PubAgent.UNKN, "anonymous", agents) + + def _agent_from_claimset(self, userinfo: dict, agents=None): + subj = userinfo.get('subject') + group = "public" + if subj.endswith("@nist.gov"): + group = "nist" + subj = subj[:-1*len("@nist.gov")] + return PubAgent(group, PubAgent.USER, subj, agents) def handle_request(self, env, start_resp): path = env.get('PATH_INFO', '/').strip('/').split('/') diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 6396bbc..dd963a9 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -3,7 +3,7 @@ from io import StringIO from pathlib import Path import unittest as test -import yaml +import yaml, jwt from nistoar.midas.dbio import inmem, fsbased, base from nistoar.midas import wsgi as app @@ -641,6 +641,9 @@ def setUp(self): self.config['working_dir'] = self.workdir self.config['services']['dap']['conventions']['mds3']['nerdstorage']['store_dir'] = \ os.path.join(self.workdir, 'nerdm') + self.config['jwt_auth'] = { "key": "XXXXX", "algorithm": "HS256" } + self.config['client_agents'] = {'ark:/88434/tl0-0001': ["Unit testing agent"]} + self.clifact = fsbased.FSBasedDBClientFactory({}, self.dbdir) self.app = app.MIDASApp(self.config, self.clifact) @@ -662,6 +665,31 @@ def test_set_up(self): self.assertTrue(os.path.isdir(os.path.join(self.workdir, 'dbfiles'))) self.assertTrue(not os.path.exists(os.path.join(self.workdir, 'dbfiles', 'nextnum'))) + def test_authenticate(self): + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': '/midas/dmp' + } + who = self.app.authenticate(req) + self.assertEqual(who.group, "public") + self.assertEqual(who.actor, "anonymous") + self.assertEqual(who.agents, ['(unknown)']) + + req['HTTP_AUTHORIZATION'] = "Bearer goober" # bad token + req['HTTP_OAR_CLIENT_ID'] = 'ark:/88434/tl0-0001' + who = self.app.authenticate(req) + self.assertEqual(who.group, "invalid") + self.assertEqual(who.actor, "anonymous") + self.assertEqual(who.agents, ["Unit testing agent"]) + + token = jwt.encode({"subject": "fed@nist.gov"}, self.config['jwt_auth']['key'], algorithm="HS256") + req['HTTP_AUTHORIZATION'] = "Bearer "+token + who = self.app.authenticate(req) + self.assertEqual(who.group, "nist") + self.assertEqual(who.actor, "fed") + self.assertEqual(who.agents, ["Unit testing agent"]) + + def test_create_dmp(self): req = { 'REQUEST_METHOD': 'POST', From 7577c6c613d440d3348e6ca630e383aa0065d29a Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 26 Jul 2023 09:06:13 -0400 Subject: [PATCH 101/123] dbio: add more robustness to the JWT handling --- docker/pyenv/Dockerfile | 2 +- python/nistoar/midas/wsgi.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/pyenv/Dockerfile b/docker/pyenv/Dockerfile index c438ea6..167ce0f 100644 --- a/docker/pyenv/Dockerfile +++ b/docker/pyenv/Dockerfile @@ -14,7 +14,7 @@ FROM oar-metadata/ejsonschema RUN apt-get update && apt-get install -y python-yaml curl wget less sudo zip \ p7zip-full ca-certificates git # RUN pip install --upgrade pip setuptools -RUN pip install funcsigs 'bagit>=1.6.3,<2.0' 'fs>=2.0.21' jsonpatch mako +RUN pip install funcsigs 'bagit>=1.6.3,<2.0' 'fs>=2.0.21' jsonpatch mako pyjwt # install multibag from source RUN multibag_ver=0.3 && \ diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index f15ef87..cba7aab 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -505,6 +505,9 @@ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, factory = SubAppFactory(self.cfg, subapp_factory_funcs) self.subapps = factory.create_suite(log, dbio_client_factory) + if not self.cfg.get('jwt_auth'): + log.warning("JWT Authentication is not configured") + # Add the groups endpoint # TODO @@ -545,7 +548,9 @@ def authenticate(self, env) -> PubAgent: def _agent_from_claimset(self, userinfo: dict, agents=None): subj = userinfo.get('subject') group = "public" - if subj.endswith("@nist.gov"): + if not subj: + subject = "anonymous" + elif subj.endswith("@nist.gov"): group = "nist" subj = subj[:-1*len("@nist.gov")] return PubAgent(group, PubAgent.USER, subj, agents) From dec5006079ddd8847f9cbc3ebdd6966c8efdd7be Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 26 Jul 2023 09:51:23 -0400 Subject: [PATCH 102/123] dbio authentication bug fix: use std name for subject --- python/nistoar/midas/wsgi.py | 2 +- python/tests/nistoar/midas/test_wsgi.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index cba7aab..cd895a1 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -546,7 +546,7 @@ def authenticate(self, env) -> PubAgent: return PubAgent("public", PubAgent.UNKN, "anonymous", agents) def _agent_from_claimset(self, userinfo: dict, agents=None): - subj = userinfo.get('subject') + subj = userinfo.get('sub') group = "public" if not subj: subject = "anonymous" diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index dd963a9..5bbcc2f 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -682,7 +682,7 @@ def test_authenticate(self): self.assertEqual(who.actor, "anonymous") self.assertEqual(who.agents, ["Unit testing agent"]) - token = jwt.encode({"subject": "fed@nist.gov"}, self.config['jwt_auth']['key'], algorithm="HS256") + token = jwt.encode({"sub": "fed@nist.gov"}, self.config['jwt_auth']['key'], algorithm="HS256") req['HTTP_AUTHORIZATION'] = "Bearer "+token who = self.app.authenticate(req) self.assertEqual(who.group, "nist") From 9bff0b3f56cd5f20025fcec0f1c4c9ec6c1fc224 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 18 Oct 2023 14:46:41 -0400 Subject: [PATCH 103/123] dbio: log record creation and updates --- python/nistoar/midas/dap/service/mds3.py | 6 +++++- python/nistoar/midas/dbio/base.py | 7 +++++++ python/nistoar/midas/dbio/project.py | 13 ++++++++++++- python/nistoar/midas/wsgi.py | 5 ++++- .../tests/nistoar/midas/dap/service/test_mds3.py | 4 ++-- .../nistoar/midas/dap/service/test_mds3_app.py | 16 ++++++++++------ 6 files changed, 40 insertions(+), 11 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 0d425d9..8d5ce9e 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -273,6 +273,7 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: raise self._record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) + self.log.info("Created %s record %s (%s) for %s", self.dbcli.project, prec.id, prec.name, self.who) return prec def _new_data_for(self, recid, meta=None, schemaid=None): @@ -619,6 +620,8 @@ def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=Fal raise self._save_data(self._summarize(nerd), prec, message, set_action and self.STATUS_ACTION_UPDATE) + self.log.info("Updated data for %s record %s (%s) for %s", + self.dbcli.project, prec.id, prec.name, self.who) return data def _summarize(self, nerd: NERDResource): @@ -1637,6 +1640,7 @@ def _update_component(self, nerd: NERDResource, data: Mapping, key=None, replace return data def _filter_props(self, obj, props): + # remove all properties from obj that are not listed in props delprops = [k for k in obj if k not in props or (not obj.get(k) and obj.get(k) is not False)] for k in delprops: del obj[k] @@ -2041,7 +2045,7 @@ def do_POST(self, path): return self.send_error_resp(405, "POST not allowed", "POST not supported on path") - except dbio.NotAuthorized as ex: + except NotAuthorized as ex: return self.send_unauthorized() except ObjectNotFound as ex: return send.send_error_resp(404, "Path not found", diff --git a/python/nistoar/midas/dbio/base.py b/python/nistoar/midas/dbio/base.py index 082da9a..8631f97 100644 --- a/python/nistoar/midas/dbio/base.py +++ b/python/nistoar/midas/dbio/base.py @@ -760,6 +760,13 @@ def __init__(self, config: Mapping, projcoll: str, nativeclient=None, foruser: s self._dbgroups = DBGroups(self) + @property + def project(self) -> str: + """ + return the name of the project collection/type that this client handles records for + """ + return self._projcoll + @property def user_id(self) -> str: """ diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index 487c909..b7da9bf 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -139,6 +139,7 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: prec.save() self._record_action(Action(Action.CREATE, prec.id, self.who, prec.status.message)) + self.log.info("Created %s record %s (%s) for %s", self.dbcli.project, prec.id, prec.name, self.who) return prec def _get_id_shoulder(self, user: PubAgent): @@ -310,7 +311,9 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): finally: self._record_action(provact) - + + self.log.info("Updated data for %s record %s (%s) for %s", + self.dbcli.project, _prec.id, _prec.name, self.who) return self._extract_data_part(data, part) def _jsondiff(self, old, new): @@ -450,6 +453,8 @@ def replace_data(self, id, newdata, part=None, message="", _prec=None): else: self._record_action(provact) + self.log.info("Replaced data for %s record %s (%s) for %s", + self.dbcli.project, _prec.id, _prec.name, self.who) return self._extract_data_part(data, part) def _save_data(self, indata: Mapping, prec: ProjectRecord, @@ -570,6 +575,8 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): finally: self._record_action(provact) + self.log.info("Cleared out data for %s record %s (%s) for %s", + self.dbcli.project, _prec.id, _prec.name, self.who) def update_status_message(self, id: str, message: str, _prec=None) -> status.RecordStatus: @@ -655,6 +662,8 @@ def finalize(self, id, message=None, as_version=None, _prec=None) -> status.Reco stat.act(self.STATUS_ACTION_FINALIZE, message or defmsg) _prec.save() + self.log.info("Finalized %s record %s (%s) for %s", + self.dbcli.project, _prec.id, _prec.name, self.who) return stat.clone() MAJOR_VERSION_LEV = 0 @@ -789,6 +798,8 @@ def submit(self, id: str, message: str=None, _prec=None) -> status.RecordStatus: stat.act(self.STATUS_ACTION_SUBMIT, message or defmsg) _prec.save() + self.log.info("Submitted %s record %s (%s) for %s", + self.dbcli.project, _prec.id, _prec.name, self.who) return stat.clone() diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index cd895a1..9d398ae 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -547,12 +547,15 @@ def authenticate(self, env) -> PubAgent: def _agent_from_claimset(self, userinfo: dict, agents=None): subj = userinfo.get('sub') + email = userinfo.get('userEmail','') group = "public" if not subj: - subject = "anonymous" + subj = "anonymous" elif subj.endswith("@nist.gov"): group = "nist" subj = subj[:-1*len("@nist.gov")] + elif email.endswith("@nist.gov"): + group = "nist" return PubAgent(group, PubAgent.USER, subj, agents) def handle_request(self, env, start_resp): diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index 8d40c55..affb56b 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -484,7 +484,7 @@ def test_moderate_reference(self): ref = self.svc._moderate_reference({"location": "doi:10.18434/example", "refType": "myown", "title": "A Resource", "@id": "#doi:ex", "abbrev": ["SRB-400"], "citation": "C", - "label": "drink me", "inprep": False, + "label": "drink me", "inPreparation": False, "goob": "gurn"}) except InvalidUpdate as ex: self.fail("Validation Error: "+ex.format_errors()) @@ -492,7 +492,7 @@ def test_moderate_reference(self): "title": "A Resource", "@id": "#doi:ex", "@type": ['deo:BibliographicReference'], "abbrev": ["SRB-400"], "citation": "C", - "label": "drink me", "inprep": False}) + "label": "drink me", "inPreparation": False}) def test_replace_update_references(self): self.create_service() diff --git a/python/tests/nistoar/midas/dap/service/test_mds3_app.py b/python/tests/nistoar/midas/dap/service/test_mds3_app.py index 71b2e28..bd2a3bf 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3_app.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3_app.py @@ -170,7 +170,8 @@ def test_create(self): self.resp = [] - req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": {"fn": "Gurn Cranston"} }, + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": {"fn": "Gurn Cranston"}, + "keyword": [ "testing" ] }, "meta": { "creatorisContact": "false", "softwareLink": "https://sw.ex/gurn" }, "name": "Gurn's Penultimate" })) @@ -189,7 +190,8 @@ def test_create(self): self.assertIs(resp['meta']["creatorisContact"], False) self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0002') self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0002') - self.assertNotIn('contactPoint', resp['data']) # because ['data'] is just a summary + self.assertNotIn('keyword', resp['data']) # because ['data'] is just a summary + self.assertIn('contactPoint', resp['data']) # this is included in ['data'] summary self.resp = [] path = resp['id'] + '/data' @@ -213,7 +215,7 @@ def test_create(self): self.assertIn('_extensionSchemas', resp) self.assertEqual(len(resp.get('components',[])), 1) self.assertEqual(resp['components'][0]['accessURL'], "https://sw.ex/gurn") - self.assertEqual(len(resp), 8) + self.assertEqual(len(resp), 9) def test_put_patch(self): testnerd = read_nerd(pdr2210) @@ -229,7 +231,8 @@ def test_put_patch(self): 'REQUEST_METHOD': 'POST', 'PATH_INFO': self.rootpath + path } - req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": res['contactPoint'] }, + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": res['contactPoint'], + "keyword": [ "testing" ] }, "meta": { "creatorisContact": "false" }, "name": "OptSortSph" })) hdlr = self.app.create_handler(req, self.start, path, nistr) @@ -246,7 +249,8 @@ def test_put_patch(self): self.assertIs(resp['meta']["creatorisContact"], False) self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0001') self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0001') - self.assertNotIn('contactPoint', resp['data']) # because ['data'] is just a summary + self.assertNotIn('keyword', resp['data']) # because ['data'] is just a summary + self.assertIn('contactPoint', resp['data']) # this is included in ['data'] summary self.resp = [] id = resp['id'] @@ -274,7 +278,7 @@ def test_put_patch(self): self.assertNotIn('authors', resp) self.assertNotIn('description', resp) self.assertNotIn('rights', resp) - self.assertEqual(len(resp), 7) + self.assertEqual(len(resp), 8) self.resp = [] req = { From 8a19619b282e22276ad6ffd5252d2fb39c2c96c8 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 18 Oct 2023 15:11:37 -0400 Subject: [PATCH 104/123] dbio: add support for config param, jwt_auth.require_expiration midasserver: turn on authentication in standalone server --- docker/midasserver/midas-dmpdap_conf.yml | 3 +++ docker/midasserver/xyztoken.txt | 1 + python/nistoar/midas/wsgi.py | 12 ++++++++++++ python/tests/nistoar/midas/test_wsgi.py | 2 +- 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 docker/midasserver/xyztoken.txt diff --git a/docker/midasserver/midas-dmpdap_conf.yml b/docker/midasserver/midas-dmpdap_conf.yml index 4f300e0..668df8e 100644 --- a/docker/midasserver/midas-dmpdap_conf.yml +++ b/docker/midasserver/midas-dmpdap_conf.yml @@ -6,6 +6,9 @@ about: title: "MIDAS Authoring Services" describedBy: "http://localhost:9091/midas/docs" href: "http://localhost:9091/midas/" +jwt_auth: + key: "tokensecret" + require_expiration: false services: dap: about: diff --git a/docker/midasserver/xyztoken.txt b/docker/midasserver/xyztoken.txt new file mode 100644 index 0000000..a5a7712 --- /dev/null +++ b/docker/midasserver/xyztoken.txt @@ -0,0 +1 @@ +eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ4eXpAbmlzdC5nb3YiLCJ1c2VyTmFtZSI6Inh5eiIsInVzZXJMYXN0TmFtZSI6IkRvZSIsInVzZXJFbWFpbCI6Inh5ekBuaXN0LmdvdiIsInVzZXJPVSI6Ik1NTCJ9.l0KbcWrIYLirDGbqyhL-5iyMuZWFt28ZvV2nMp-kOFI diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index 9d398ae..3d721c6 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -507,6 +507,12 @@ def __init__(self, config: Mapping, dbio_client_factory: DBClientFactory=None, if not self.cfg.get('jwt_auth'): log.warning("JWT Authentication is not configured") + else: + if not isinstance(self.cfg['jwt_auth'], Mapping): + raise ConfigurationException("Config param, jwt_auth, not a dictionary: "+ + str(self.cfg['jwt_auth'])) + if not self.cfg['jwt_auth'].get('require_expiration', True): + log.warning("JWT Authentication: token expiration is not required") # Add the groups endpoint # TODO @@ -540,6 +546,12 @@ def authenticate(self, env) -> PubAgent: log.warning("Invalid token can not be decoded: %s", str(ex)) return PubAgent("invalid", PubAgent.UNKN, "anonymous", agents) + # make sure the token has an expiration date + if jwtcfg.get('require_expiration', True) and not userinfo.get('exp'): + # Note expiration was checked implicitly by the above jwt.decode() call + log.warning("Rejecting non-expiring token for user %s", userinfo.get('sub', "(unknown)")) + return PubAgent("invalid", PubAgent.UNKN, "anonymous", agents) + return self._agent_from_claimset(userinfo, agents) # anonymous user diff --git a/python/tests/nistoar/midas/test_wsgi.py b/python/tests/nistoar/midas/test_wsgi.py index 5bbcc2f..75560b2 100644 --- a/python/tests/nistoar/midas/test_wsgi.py +++ b/python/tests/nistoar/midas/test_wsgi.py @@ -641,7 +641,7 @@ def setUp(self): self.config['working_dir'] = self.workdir self.config['services']['dap']['conventions']['mds3']['nerdstorage']['store_dir'] = \ os.path.join(self.workdir, 'nerdm') - self.config['jwt_auth'] = { "key": "XXXXX", "algorithm": "HS256" } + self.config['jwt_auth'] = { "key": "XXXXX", "algorithm": "HS256", "require_expiration": False } self.config['client_agents'] = {'ark:/88434/tl0-0001': ["Unit testing agent"]} self.clifact = fsbased.FSBasedDBClientFactory({}, self.dbdir) From 754de021faee93d1d951a28c76273204c303d397 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 28 Oct 2023 14:29:11 -0400 Subject: [PATCH 105/123] midas wsgi: inline doc: jwt_aut --- python/nistoar/midas/wsgi.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index 3d721c6..b204910 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -49,6 +49,26 @@ includes a ``factory`` property whose string value identifies the type of backend storage to use ("mongo", "fsbased", or "inmem"). The other properties are the parameters that are specific to the backend storage. +``jwt_auth`` + (object) an object that provides configuration related to JWT-based authentication to the service + endpoints. If set, a JWT token (presented via the Authorization HTTP header) will be used to + determine the client user identity and attributes; if a token is not included with requests, the + user will be set to "anonymous". If this configuration is not set, all client users will be + considered anonymous. + +The supported subproperties for ``jwt_auth`` are as follows: + +``key`` + (str) _required_. The secret key shared with the token generator (usually a separate service) used to + encrypt the token. + +``algorithm`` + (str) _optional_. The name of the encryption algorithm to encrypt the token. Currently, only one value + is support (the default): "HS256". + +``require_expiration`` + (bool) _optional_. If True (default), any JWT token that does not include an expiration time will be + rejected, and the client user will be set to anonymous. Most of the properties in a service configuration object will be treated as default configuration parameters for configuring a particular version, or _convention_, of the service. Convention-level From 96cb2f35f0c8f6a149f92cb964afcd035b734578 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Mon, 30 Oct 2023 10:49:36 -0400 Subject: [PATCH 106/123] metadata submod: pull in support for ca certs in building --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index 2190bfc..5cf9a8d 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit 2190bfc79d97f81d52dd24df0d4e9dc844065b67 +Subproject commit 5cf9a8d632726738ea9fa1e88621fa701b8652dd From d604bf67e50fa7d26a7c6fb2fa0bffa2b5c27ba3 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 31 Oct 2023 12:35:18 -0400 Subject: [PATCH 107/123] cacerts support: add install_ca_certs.sh, update dockbuild.sh to work with oar-metadata --- docker/cacerts/README.md | 13 +++++++++++++ docker/dockbuild.sh | 3 +++ oar-build/_dockbuild.sh | 15 +++++++++++++++ scripts/install_ca_certs.sh | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) create mode 100644 docker/cacerts/README.md create mode 100755 scripts/install_ca_certs.sh diff --git a/docker/cacerts/README.md b/docker/cacerts/README.md new file mode 100644 index 0000000..ceae86e --- /dev/null +++ b/docker/cacerts/README.md @@ -0,0 +1,13 @@ +This directory contains non-standard CA certificates needed to build the docker +images. + +Failures building the Docker containers defined in ../ due to SSL certificate +verification errors may be a consequence of your local network's firewall. In +particular, the firewall may be substituting external site certificates with +its own signed by a non-standard CA certficate (chain). If so, you can place +the necessary certificates into this directory; they will be passed into the +containers, allowing them to safely connect to those external sites. + +Be sure the certificates are in PEM format and include a .crt file extension. + +Do not remove this README file; doing so may cause a Docker build faiure. diff --git a/docker/dockbuild.sh b/docker/dockbuild.sh index c829496..309c4c9 100755 --- a/docker/dockbuild.sh +++ b/docker/dockbuild.sh @@ -44,6 +44,9 @@ setup_build log_intro # record start of build into log +if { echo $BUILD_IMAGES | grep -qs pymongo; }; then + cp_ca_certs_to ../metadata/docker +fi $codedir/metadata/docker/dockbuild.sh $BUILD_IMAGES if { echo " $BUILD_IMAGES " | grep -qs " pyenv "; }; then diff --git a/oar-build/_dockbuild.sh b/oar-build/_dockbuild.sh index 0f2cdc3..041a873 100644 --- a/oar-build/_dockbuild.sh +++ b/oar-build/_dockbuild.sh @@ -114,6 +114,21 @@ function setup_build { BUILD_OPTS=`collect_build_opts` } +function cp_ca_certs_to { + # assuming we are in the docker dir + [ \! -d cacerts ] || { + crts=`compgen -G 'cacerts/*.crt' || true` + [ -z "$crts" ] || { + echo "${prog}: installing CA certs from docker/cacerts" + for cont in $@; do + mkdir -p $cont/cacerts + echo '+' cp $crts cacerts/README.md $cont/cacerts + cp $crts cacerts/README.md $cont/cacerts + done + } + } +} + function help { helpfile=$OAR_BUILD_DIR/dockbuild_help.txt [ -f "$OAR_DOCKER_DIR/dockbuild_help.txt" ] && \ diff --git a/scripts/install_ca_certs.sh b/scripts/install_ca_certs.sh new file mode 100755 index 0000000..c759a6d --- /dev/null +++ b/scripts/install_ca_certs.sh @@ -0,0 +1,37 @@ +#! /bin/bash +# +# install_ca_certs.sh -- copy the specified CA certificates into this source so that they can be used +# to build the software via docker. +# +# usage: install_ca_certs.sh CA_CERT_FILE... +# +# where CA_CERT_FILE is a file path to a CA certificate to install +# +# This script helps address the problem with docker-based builds when run within a firewall that +# replaces external site certificates with ones signed by a non-standard CA, causing the retrieval +# of software dependencies to fail. This script is used by oar-docker's localbuild script to receive +# extra CA certificates that addresses such failures. Because localdeploy makes no assumptions about +# how this source code repository builds using docker, this script encapsulates that knowledge on +# behalf of localbuild. +# +# Note: if this repository does not require/support use of non-standard CA certificates, remove (or +# rename) this script. +# +set -e +prog=`basename $0` +execdir=`dirname $0` +[ "$execdir" = "" -o "$execdir" = "." ] && execdir=$PWD +basedir=`dirname $execdir` + +cacertdir="$basedir/docker/cacerts" +[ -d "$cacertdir" ] || exit 0 # I guess we don't need the certs + +crts=`echo $@ | sed -e 's/^ *//' -e 's/ *$//'` +[ -n "$crts" ] || { + print "${prog}: Missing cert file argument" + false +} + +echo '+' cp $crts $cacertdir +cp $crts $cacertdir + From 878a47e2fa096547c7da073e6c4341a92bd1641c Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 9 Nov 2023 13:21:21 -0500 Subject: [PATCH 108/123] midas service: add a few more log messages about failed authentication --- python/nistoar/midas/wsgi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/nistoar/midas/wsgi.py b/python/nistoar/midas/wsgi.py index b204910..f2594a0 100644 --- a/python/nistoar/midas/wsgi.py +++ b/python/nistoar/midas/wsgi.py @@ -553,6 +553,7 @@ def authenticate(self, env) -> PubAgent: agents = self.cfg.get('client_agents', {}).get(client_id, [client_id]) allowed = self.cfg.get('allowed_clients') if allowed is not None and client_id not in allowed: + log.warning("Client %s is not recongized among %s", client_id, str(allowed)) return PubAgent("invalid", PubAgent.UNKN, "anonymous", agents) # ensure an authenticated identity @@ -575,6 +576,8 @@ def authenticate(self, env) -> PubAgent: return self._agent_from_claimset(userinfo, agents) # anonymous user + if jwtcfg and auth[0] != "Bearer": + log.warning("Client %s did not provide an authentication token as expected", client_id) return PubAgent("public", PubAgent.UNKN, "anonymous", agents) def _agent_from_claimset(self, userinfo: dict, agents=None): From 99765019bec394a965d5b79c8592c1996d0c78e4 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 17 Nov 2023 10:11:39 -0500 Subject: [PATCH 109/123] nerdmstore: fix up API typing --- python/nistoar/midas/dap/nerdstore/base.py | 79 ++++++++++--------- python/nistoar/midas/dap/nerdstore/fsbased.py | 2 +- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/python/nistoar/midas/dap/nerdstore/base.py b/python/nistoar/midas/dap/nerdstore/base.py index c5baa2a..4a5dc62 100644 --- a/python/nistoar/midas/dap/nerdstore/base.py +++ b/python/nistoar/midas/dap/nerdstore/base.py @@ -4,7 +4,7 @@ import logging, re from abc import ABC, ABCMeta, abstractproperty, abstractmethod from collections.abc import MutableMapping, Mapping, MutableSequence -from typing import Iterable, Iterator, NewType, List +from typing import Iterable, Iterator, NewType, List, Union, Any from logging import Logger import nistoar.nerdm.utils as nerdmutils @@ -12,14 +12,17 @@ __all__ = [ "NERDResource", "NERDAuthorList", "NERDRefList", "NERDNonFileComps", "NERDFileComps", "NERDStorageException", "MismatchedIdentifier", "RecordDeleted", "ObjectNotFound", - "StorageFormatException", "CollectionRemovalDissallowed", "NERDResourceStorage" ] + "StorageFormatException", "CollectionRemovalDissallowed", "NERDResourceStorage", "IDorPos" ] +# Forward declarations of the NERDResource model types NERDResource = NewType("NERDResource", ABC) NERDAuthorList = NewType("NERDAuthorList", NERDResource) NERDRefList = NewType("NERDRefList", NERDResource) NERDFileComps = NewType("NERDFileComps", NERDResource) NERDNonFileComps = NewType("NERDNonFileComps", NERDResource) +IDorPos = Union[Any, int] + class NERDResource(ABC): """ an abstract base class representing a NERDm Resource record in storage. @@ -34,14 +37,14 @@ class NERDResource(ABC): client. Once an update is made via this interface, it is expected to be immediately persisted to the underlying storage. """ - def __init__(self, id: str, parentlog: logging.Logger = None): + def __init__(self, id, parentlog: logging.Logger = None): self._id = id if not id: raise ValueError("NERDResource: base init requires id") if not parentlog: parentlog = logging.getLogger("nerdstore") - self.log = parentlog.getChild(id) + self.log = parentlog.getChild(str(id)) @property def id(self): @@ -80,14 +83,14 @@ def references(self) -> NERDRefList: raise NotImplementedError() @abstractmethod - def replace_res_data(self, md): + def replace_res_data(self, md: Mapping): """ replace all resource-level properties excluding `components`, `authors`, and `references` from the provided metadata. """ raise NotImplementedError() - def replace_all_data(self, md): + def replace_all_data(self, md: Mapping): """ replace all data provide in the given metadata model """ @@ -164,7 +167,7 @@ def empty(self): raise NotImplementedError() @abstractmethod - def _get_item_by_id(self, id: str): + def _get_item_by_id(self, id): """ return the author description with the given identifier assigned to it :raise KeyError: if an author with the given identifier is not part of this list @@ -208,7 +211,7 @@ def set_order(self, ids: Iterable[str]): raise NotImplementedError() @abstractmethod - def _set_item(self, id: str, md: Mapping, pos: int=None): + def _set_item(self, id, md: Mapping, pos: int=None): """ commit the given metadata into storage with the specified key and position :param str id: the identifier to assign to the item being added; if this item exists @@ -228,7 +231,7 @@ def _set_item(self, id: str, md: Mapping, pos: int=None): raise NotImplementedError() @abstractmethod - def _remove_item(self, id: str): + def _remove_item(self, id): """ delete and return the item from the list with the given identiifer """ @@ -256,21 +259,21 @@ def __iter__(self): for id in ids: yield self._get_item_by_id(id) - def set(self, key, md): + def set(self, key: IDorPos, md: Mapping): if isinstance(key, int): itm = self._get_item_by_pos(key) key = itm.get('@id') if itm else None self._set_item(key, md) - def _select_id_for(self, md): + def _select_id_for(self, md: Mapping): id = md.get('@id') if not id: id = self._get_default_id_for(md) return id @abstractmethod - def _get_default_id_for(self, md): + def _get_default_id_for(self, md: Mapping): """ determine an appropriate identifier for the given list item metadata; this is usually the value of a particular, custom property. If a value cannot be determined based on the metadata, @@ -291,7 +294,7 @@ def _reserve_id(self, id): """ raise NotImplementedError() - def insert(self, pos, md): + def insert(self, pos: int, md: Mapping): """ inserts a new item into the specified position in the list. If the item has an '@id' property and that identifier is already in the list, the identifier will be replaced with a new one. (Use @@ -343,28 +346,28 @@ def pop(self, key): return self._remove_item(key) @abstractmethod - def move(self, idorpos: str, pos: int = None, rel: int = 0) -> int: + def move(self, idorpos: IDorPos, pos: int = None, rel: Union[bool,int] = 0) -> int: """ - move an item currently in the list to a new position. The `rel` parameter allows one to + move an item currently in the list to a new position. The ``rel`` parameter allows one to push an item up or down in the order. - :param idorpos: the target item to move, either as the string identifier or its current + :param idorpos: the target item to move, either as its identifier or its current (int) position - :param int pos: the new position of the item (where `rel` controls whether this is an + :param int pos: the new position of the item (where ``rel`` controls whether this is an absolute or relative position). If the absolute position is zero or less, the item will be moved to the beginning of the list; if it is a value greater or equal to the number of items in the list, it will be move to the end of the list. Zero as an absolute value is the first position in the list. If `pos` - is set to `None`, the item will be moved to the end of the list (regardless, - of the value of rel. - :param int|bool rel: if value evaluates as False (default), then `pos` will be interpreted + is set to ``None``, the item will be moved to the end of the list (regardless, + of the value of ``rel``. + :param int|bool rel: if value evaluates as False (default), then ``pos`` will be interpreted as an absolute value. Otherwise, it will be treated as a position relative to its current position. A positive number will cause the item - to be move `pos` places toward the end of the list; a negative number moves + to be move ``pos`` places toward the end of the list; a negative number moves it toward the beginning of the list. Any other non-numeric value that evaluates to True behaves as a value of +1. :raises KeyError: if the target item is not found in the list - :return: the new (absolute) position of the item after the move (taking into `rel`). + :return: the new (absolute) position of the item after the move (taking into ``rel``). :rtype: int """ raise NotImplementedError() @@ -386,7 +389,7 @@ class NERDAuthorList(_NERDOrderedObjectList): list of authors via :py:property:`ids`, followed by a call to :py:method:`set_order`, passing in the reordered list of identifiers. """ - def get_author_by_id(self, id: str): + def get_author_by_id(self, id): """ return the author description with the given identifier assigned to it. :raise KeyError: if an author with the given identifier is not part of this list @@ -400,7 +403,7 @@ def get_author_by_pos(self, pos: int): """ return self._get_item_by_pos(pos) - def _get_default_id_for(self, md): + def _get_default_id_for(self, md: Mapping): out = md.get('orcid') if out: out = re.sub(r'^https://orcid.org/', 'doi:', out) @@ -426,7 +429,7 @@ class NERDRefList(_NERDOrderedObjectList): the reordered list of identifiers. """ - def get_reference_by_id(self, id: str): + def get_reference_by_id(self, id): """ return the author description with the given identifier assigned to it. :raise KeyError: if an author with the given identifier is not part of this list @@ -440,7 +443,7 @@ def get_reference_by_pos(self, pos: int): """ return self._get_item_by_pos(pos) - def _get_default_id_for(self, md): + def _get_default_id_for(self, md: Mapping): out = md.get('doi') if out: out = re.sub(r'^https://doi.org/', 'doi:', out) @@ -471,7 +474,7 @@ class NERDNonFileComps(_NERDOrderedObjectList): list of components via :py:property:`ids`, followed by a call to :py:method:`set_order`, passing in the reordered list of identifiers. """ - def get_component_by_id(self, id: str): + def get_component_by_id(self, id): """ return the author description with the given identifier assigned to it. :raise KeyError: if an author with the given identifier is not part of this list @@ -485,7 +488,7 @@ def get_component_by_pos(self, pos: int): """ return self._get_item_by_pos(pos) - def _get_default_id_for(self, md): + def _get_default_id_for(self, md: Mapping): return self._new_id() @@ -516,7 +519,7 @@ def __init__(self, resource: NERDResource, iscollf=None): self.is_collection = iscollf @staticmethod - def file_object_is_subcollection(md): + def file_object_is_subcollection(md: Mapping): """ return True if the given description is recognized as describing a subcollection. False is returned if the object lacks any marker indicating its type. @@ -524,7 +527,7 @@ def file_object_is_subcollection(md): return nerdmutils.is_type(md, "Subcollection") @abstractmethod - def get_file_by_id(self, id: str) -> Mapping: + def get_file_by_id(self, id) -> Mapping: """ return the component in the file list that has the given (location-independent) identifier :raises ObjectNotFound: if no file exists in this set with the given identifier @@ -615,7 +618,7 @@ def set_order_in_subcoll(self, collpath: str, ids: Iterable[str]) -> Iterable[st # raise NotImplementedError() @abstractmethod - def set_file_at(md, filepath: str=None, id=None, as_coll: bool=None) -> str: + def set_file_at(md: Mapping, filepath: str=None, id=None, as_coll: bool=None) -> str: """ add or update a file component. If `id` is given (or otherwise included in the metadata as the `@id` property) and it already exists in the file list, its metadata will be replaced @@ -684,7 +687,7 @@ def move(self, idorpath: str, filepath: str) -> str: return comp.get('@id') @abstractmethod - def delete_file(self, id: str) -> bool: + def delete_file(self, id) -> bool: """ remove a file from this set. :returns: False if the file was not found in this collection; True, otherwise @@ -701,7 +704,7 @@ def empty(self): raise NotImplementedError() @abstractmethod - def exists(self, id: str) -> bool: + def exists(self, id) -> bool: """ return True if the stored files include one with the given identifier """ @@ -714,7 +717,7 @@ def path_exists(self, filepath: str) -> bool: """ raise NotImplementedError() - def __contains__(self, idorpath: str) -> bool: + def __contains__(self, idorpath: IDorPos) -> bool: return self.exists(idorpath) or self.path_exists(idorpath) @abstractmethod @@ -845,7 +848,7 @@ def from_config(cls, config: Mapping, logger: Logger): raise NotImplementedError() @abstractmethod - def open(self, id: str=None) -> NERDResource: + def open(self, id=None) -> NERDResource: """ Open a resource record having the given identifier. If a record with `id` does not exist (or was deleted), a new record should be created and the identifier assigned to it. If `id` is @@ -854,7 +857,7 @@ def open(self, id: str=None) -> NERDResource: raise NotImplementedError() @abstractmethod - def load_from(self, rec: Mapping, id: str=None): + def load_from(self, rec: Mapping, id=None): """ place an existing NERDm Resource record into storage. :param Mapping rec: the NERDm Resource record as a JSON-ready dictionary @@ -866,14 +869,14 @@ def load_from(self, rec: Mapping, id: str=None): raise NotImplementedError() @abstractmethod - def exists(self, id: str) -> bool: + def exists(self, id) -> bool: """ return True if there is a record in the storage with the given identifier """ raise NotImplementedError() @abstractmethod - def delete(self, id: str) -> bool: + def delete(self, id) -> bool: """ delete the record with the given identifier from the storage. If the record with that identifer does not exist, do nothing (except return False) diff --git a/python/nistoar/midas/dap/nerdstore/fsbased.py b/python/nistoar/midas/dap/nerdstore/fsbased.py index 4a8fa74..c578cc9 100644 --- a/python/nistoar/midas/dap/nerdstore/fsbased.py +++ b/python/nistoar/midas/dap/nerdstore/fsbased.py @@ -160,7 +160,7 @@ def set_order(self, ids: Iterable[str]): self._order = neworder self._cache_ids() - def move(self, idorpos: str, pos: int = None, rel: int = 0) -> int: + def move(self, idorpos: IDorPos, pos: int = None, rel: int = 0) -> int: if pos is None: pos = self.count rel = 0 From 114e954630bf56e0bf5eabd99e816fb3862ad005 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 15 Nov 2023 17:45:04 -0500 Subject: [PATCH 110/123] create loosen_nerdm.py script, use in install.sh --- scripts/install.sh | 3 + scripts/loosen_nerdm.py | 172 +++++++++++++++++++++++++++ scripts/tests/test_loosen_nerdm.py | 185 +++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+) create mode 100755 scripts/loosen_nerdm.py create mode 100644 scripts/tests/test_loosen_nerdm.py diff --git a/scripts/install.sh b/scripts/install.sh index f69e9e7..e2538d9 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -29,6 +29,9 @@ echo Installing python libraries into $PY_LIBDIR... # None at this time $oarmd_pkg/scripts/install_extras.sh --install-dir=$INSTALL_DIR +mkdir -p $INSTALL_DIR/etc/midas/schemas +echo $execdir/loosen_nerdm.py $INSTALL_DIR/etc/schemas $INSTALL_DIR/etc/midas/schemas +$execdir/loosen_nerdm.py $INSTALL_DIR/etc/schemas $INSTALL_DIR/etc/midas/schemas mkdir -p $INSTALL_DIR/var/logs echo cp -r $SOURCE_DIR/etc $INSTALL_DIR diff --git a/scripts/loosen_nerdm.py b/scripts/loosen_nerdm.py new file mode 100755 index 0000000..4ad117f --- /dev/null +++ b/scripts/loosen_nerdm.py @@ -0,0 +1,172 @@ +#! /usr/bin/env python3 +# +import os, sys, json, argparse, traceback, re +from pathlib import Path +from collections import OrderedDict +from collections.abc import Mapping + +description="""create copies of NERDm schemas with loosened requirements appropriate for the +metadata drafting process (i.e. within MIDAS)""" +epilog="" +def_progname = "loosen_nerdm" + +def define_options(progname, parser=None): + """ + define command-line arguments + """ + if not parser: + parser = argparse.ArgumentParser(progname, None, description, epilog) + + parser.add_argument("srcdir", metavar="SRCDIR", type=str, + help="the directory containing the NERDm schemas") + parser.add_argument("destdir", metavar="DESTDIR", type=str, + help="the directory write loosened schemas to") + parser.add_argument("-D", "--no-dedocument", dest="dedoc", action="store_false", default=True, + help="do not remove documentation from source schemas") + parser.add_argument("-J", "--assume-post2020", dest="post2020", action="store_true", default=False, + help="assume schemas are compliant with a post-2020 JSON Schema specification "+ + "(and uses $defs)") + parser.add_argument("-m", "--make-dest-dir", dest="mkdest", action="store_true", default=False, + help="create the destination directory if it does not exist") + + return parser + +def set_options(progname, args): + """ + define and parse the command-line options + """ + return define_options(progname).parse_args(args) + +directives_by_file = { + "nerdm-schema.json": { + "derequire": [ "Resource", "Organization" ] + }, + "nerdm-pub-schema.json": { + "derequire": [ "PublicDataResource", "Person" ] + } +} + +try: + import nistoar.nerdm.utils as utils +except ImportError: + sys.path.insert(0, find_nistoar_code()) + import nistoar.nerdm.utils as utils + +def find_nistoar_code(): + execdir = Path(__file__).resolve().parents[0] + basedir = execdir.parents[0] + mdpydir = basedir / "metadata" / "python" + return mdpydir + +def loosen_schema(schema: Mapping, directives: Mapping, opts=None): + """ + apply the given loosening directive to the given JSON Schema. The directives is a + dictionary describes what to do with the following properties (the directives) supported: + + ``derequire`` + a list of type definitions within the schema from which the required property + should be removed (via :py:func:`~nistoar.nerdm.utils.unrequire_props_in`). Each + type name listed will be assumed to be an item under the "definitions" node in the + schema this directive is applied to. + ``dedocument`` + a boolean indicating whether the documentation annotations should be removed from + the schema. If not set, the default is determined by opts.dedoc if opts is given or + True, otherwise. + + :param dict schema: the schema document as a JSON Schema schema dictionary + :param dict directives: the dictionary of directives to apply + :param opt: an options object (containing scripts command-line options) + """ + dedoc = directives.get("dedocument", True) + if opts and not opts.dedoc: + dedoc = False + if dedoc: + utils.declutter_schema(schema) + + p2020 = False + if opts: + p2020 = opts.post2020 + deftag = "$defs" if p2020 else "definitions" + + dereqtps = [ deftag+'.'+t for t in directives.get("derequire", []) ] + utils.unrequire_props_in(schema, dereqtps, p2020) + +def process_nerdm_schemas(srcdir, destdir, opts=None): + """ + process all NERDm schemas (core and extensions) found in the source directory + and write the modified schemas to the output directory + """ + if not os.path.isdir(srcdir): + raise RuntimeException(f"{srcdir}: schema source directory does not exist as directory") + + if not os.path.exists(destdir): + if opts and opts.mkdest: + os.makedirs(destdir) + else: + raise FileNotFoundError(destdir) + if not os.path.isdir(srcdir): + raise RuntimeException(f"{destdir}: schema destination is not a directory") + + nerdfilere = re.compile(r"^nerdm-([a-zA-Z][^\-]*\-)?schema.json$") + schfiles = [f for f in os.listdir(srcdir) if nerdfilere.match(f)] + + failed={} + for f in schfiles: + try: + with open(os.path.join(srcdir, f)) as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + except IOError as ex: + failed[f] = f"Trouble reading schema file: {str(ex)}" + continue + + directives = directives_by_file.get(f, {}) + try: + loosen_schema(schema, directives, opts) + except Exception as ex: + failed[f] = f"Trouble processing schema file: {str(ex)}" + continue + + with open(os.path.join(destdir, f), 'w') as fd: + json.dump(schema, fd, indent=2) + fd.write("\n") + + return failed + +def main(progname=None, args=[]): + global def_progname; + if not progname: + progname = def_progname + else: + progname = os.path.basename(progname) + if progname.endswith(".py"): + progname = progname[:-1*len(".py")] + + opts = set_options(progname, args) + + failed = process_nerdm_schemas(opts.srcdir, opts.destdir, opts) # may raise exceptions + if failed: + print(f"{progname}: WARNING: failed to process the following schemas:", file=sys.stderr) + for f, err in failed: + print(f" {f}: {err}", file=sys.stderr) + + return 3 + + return 0 + +if __name__ == "__main__": + try: + sys.exit(main(sys.argv[0], sys.argv[1:])) + except RuntimeError as ex: + print(f"{progname}: {str(ex)}", file=sys.stderr) + sys.exit(1) + except Exception as ex: + print("Unexpected error: "+str(ex), file=sys.stderr) + traceback.print_tb(sys.exc_info()[2]) + sys.exit(4) + + + + + + + diff --git a/scripts/tests/test_loosen_nerdm.py b/scripts/tests/test_loosen_nerdm.py new file mode 100644 index 0000000..2ba8f29 --- /dev/null +++ b/scripts/tests/test_loosen_nerdm.py @@ -0,0 +1,185 @@ +#! /usr/bin/env python3 +# +import sys, os, csv, json, re +import importlib.util as imputil +import unittest as test +from pathlib import Path +from collections import OrderedDict + +from nistoar.testing import * +from nistoar.base.config import hget + +testdir = Path(__file__).resolve().parents[0] +scrpdir = testdir.parents[0] +basedir = scrpdir.parents[0] +nerdmdir = basedir / "metadata" / "model" + +scriptfile = str(scrpdir / "loosen_nerdm.py") + +def import_file(path, name=None): + if not name: + name = os.path.splitext(os.path.basename(path))[0] + import importlib.util as imputil + spec = imputil.spec_from_file_location(name, path) + out = imputil.module_from_spec(spec) + sys.modules["loosen"] = out + spec.loader.exec_module(out) + return out + +loosen = None # set at end of this file + +def setUpModule(): + ensure_tmpdir() + +def tearDownModule(): + rmtmpdir() + + +class TestLoosenNerdm(test.TestCase): + + def test_import(self): + self.assertIsNotNone(loosen) + self.assertTrue(hasattr(loosen, 'main')) + self.assertIsNotNone(loosen.directives_by_file) + + def setUp(self): + self.tf = Tempfiles() + self.destdir = self.tf.mkdir("loosen_nerdm") + + def tearDown(self): + self.tf.clean() + + def test_set_options(self): + try: + opts = loosen.set_options(loosen.def_progname, ["-D", "goob", "gurn"]) + self.assertFalse(opts.dedoc) + self.assertFalse(opts.post2020) + self.assertEqual(opts.srcdir, "goob") + self.assertEqual(opts.destdir, "gurn") + + opts = loosen.set_options(loosen.def_progname, ["-J", "goob", "gurn"]) + self.assertTrue(opts.dedoc) + self.assertTrue(opts.post2020) + self.assertEqual(opts.srcdir, "goob") + self.assertEqual(opts.destdir, "gurn") + + opts = loosen.set_options(loosen.def_progname, ["harry", "david"]) + self.assertTrue(opts.dedoc) + self.assertFalse(opts.post2020) + self.assertEqual(opts.srcdir, "harry") + self.assertEqual(opts.destdir, "david") + except SystemExit as ex: + self.fail("error processing args") + + def test_find_nistoar_code(self): + self.assertEqual(loosen.find_nistoar_code().parts[-2:], ("metadata", "python")) + + def test_loosen_schema(self): + with open(nerdmdir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + loosen.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": True}) + + self.assertTrue(not hget(schema, "title")) + self.assertTrue(not hget(schema, "description")) + self.assertTrue(not hget(schema, "definitions.Resource.required")) + self.assertTrue(not hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(not hget(schema, "definitions.Organization.description")) + + def test_loosen_schema_no_dedoc(self): + with open(nerdmdir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + loosen.loosen_schema(schema, {"derequire": ["Resource"], "dedocument": False}) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(not hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + def test_loosen_schema_with_opts(self): + with open(nerdmdir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + opts = loosen.set_options(loosen.def_progname, ["goob", "gurn"]) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + loosen.loosen_schema(schema, {"derequire": ["Resource"]}, opts) + + self.assertTrue(not hget(schema, "title")) + self.assertTrue(not hget(schema, "description")) + self.assertTrue(not hget(schema, "definitions.Resource.required")) + self.assertTrue(not hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(not hget(schema, "definitions.Organization.description")) + + def test_loosen_schema_with_opts_D(self): + with open(nerdmdir/"nerdm-schema.json") as fd: + schema = json.load(fd, object_pairs_hook=OrderedDict) + opts = loosen.set_options(loosen.def_progname, ["-D", "goob", "gurn"]) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + loosen.loosen_schema(schema, {"derequire": ["Resource"]}, opts) + + self.assertTrue(hget(schema, "title")) + self.assertTrue(hget(schema, "description")) + self.assertTrue(not hget(schema, "definitions.Resource.required")) + self.assertTrue(hget(schema, "definitions.Resource.description")) + self.assertTrue(hget(schema, "definitions.Organization.required")) + self.assertTrue(hget(schema, "definitions.Organization.description")) + + def test_process_nerdm_schemas(self): + schfre = re.compile(r"^nerdm-([a-zA-Z][^\-]*\-)?schema.json$") + srcfiles = [f for f in os.listdir(nerdmdir) if schfre.match(f)] + self.assertGreater(len(srcfiles), 6) + + destfiles = [f for f in os.listdir(self.destdir) if not f.startswith('.')] + self.assertEqual(destfiles, []) + self.assertEqual(loosen.process_nerdm_schemas(nerdmdir, self.destdir), {}) + + destfiles = [f for f in os.listdir(self.destdir) if not f.startswith('.')] + self.assertIn("nerdm-schema.json", destfiles) + self.assertIn("nerdm-pub-schema.json", destfiles) + for schfile in srcfiles: + self.assertIn(schfile, destfiles) + self.assertEqual(len(destfiles), len(srcfiles)) + + + + + + + +if __name__ == '__main__': + if len(sys.argv) > 1: + scriptfile = sys.argv[1] + loosen = import_file(scriptfile) + test.main() From aedbe584735d33bb6f2556f78bf7dc4e8f2f4870 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 16 Nov 2023 06:25:55 -0500 Subject: [PATCH 111/123] scripts/loosen_nerdm.py: move loosen_schema to nistoar.nerdm.utils --- scripts/loosen_nerdm.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/scripts/loosen_nerdm.py b/scripts/loosen_nerdm.py index 4ad117f..5579f5e 100755 --- a/scripts/loosen_nerdm.py +++ b/scripts/loosen_nerdm.py @@ -43,14 +43,17 @@ def set_options(progname, args): }, "nerdm-pub-schema.json": { "derequire": [ "PublicDataResource", "Person" ] + }, + "nerdm-rls-schema.json": { + "derequire": [ "ReleasedResource" ] } } try: - import nistoar.nerdm.utils as utils + import nistoar.nerdm.utils as nerdm_utils except ImportError: sys.path.insert(0, find_nistoar_code()) - import nistoar.nerdm.utils as utils + import nistoar.nerdm.utils as nerdm_utils def find_nistoar_code(): execdir = Path(__file__).resolve().parents[0] @@ -77,20 +80,13 @@ def loosen_schema(schema: Mapping, directives: Mapping, opts=None): :param dict directives: the dictionary of directives to apply :param opt: an options object (containing scripts command-line options) """ - dedoc = directives.get("dedocument", True) - if opts and not opts.dedoc: - dedoc = False - if dedoc: - utils.declutter_schema(schema) - - p2020 = False if opts: - p2020 = opts.post2020 - deftag = "$defs" if p2020 else "definitions" - - dereqtps = [ deftag+'.'+t for t in directives.get("derequire", []) ] - utils.unrequire_props_in(schema, dereqtps, p2020) + if not opts.dedoc: + directives["dedocument"] = False + directives["post2020"] = opts.post2020 + nerdm_utils.loosen_schema(schema, directives) + def process_nerdm_schemas(srcdir, destdir, opts=None): """ process all NERDm schemas (core and extensions) found in the source directory From b002ca1e9d3a1a1b26b456b15b2ba467630c46e2 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 21 Nov 2023 15:55:00 -0500 Subject: [PATCH 112/123] dap:mds3: fix reference validation problem --- metadata | 2 +- python/nistoar/midas/dap/service/mds3.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/metadata b/metadata index 5cf9a8d..4e1f49d 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit 5cf9a8d632726738ea9fa1e88621fa701b8652dd +Subproject commit 4e1f49dede88ce482dde3e758269b30435a055cc diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 8d5ce9e..f2efa1b 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -17,7 +17,7 @@ Support for the web service frontend is provided via :py:class:`DAPApp` class, an implementation of the WSGI-based :ref:class:`~nistoar.pdr.publish.service.wsgi.SubApp`. """ -import os, re, pkg_resources +import os, re, pkg_resources, random, string from logging import Logger from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence, Callable @@ -92,6 +92,10 @@ RES_DELIM = const.RESONLY_EXTENSION.lstrip('/') EXTSCHPROP = "_extensionSchemas" +def random_id(prefix: str="", n: int=8): + r = ''.join(random.choices(string.ascii_uppercase + string.digits, k=n)) + return prefix+r + class DAPService(ProjectService): """ a project record request broker class for DAP records. @@ -1737,6 +1741,11 @@ def _moderate_reference(self, ref, doval=True): except AttributeError as ex: raise InvalidUpdate("location or proxyFor: value is not a string", sys=self) from ex + # Penultimately, add an id if doesn't already have one + if not ref.get("@id"): + ref['@id'] = "REPLACE" + # ref['@id'] = random_id("ref:") + # Finally, validate (if requested) schemauri = NERDM_SCH_ID + "/definitions/BibliographicReference" if ref.get("_schema"): @@ -1747,6 +1756,8 @@ def _moderate_reference(self, ref, doval=True): if doval: self.validate_json(ref, schemauri) + if ref.get("@id") == "REPLACE": + del ref['@id'] return ref def _moderate_file(self, cmp, doval=True): @@ -1980,6 +1991,8 @@ def __init__(self, dbcli_factory: DBClientFactory, log: Logger, config: dict={}, class DAPProjectDataHandler(ProjectDataHandler): """ A :py:class:`~nistoar.midas.wsgi.project.ProjectDataHandler` specialized for editing NERDm records. + + Note that this implementation inherits its PUT, PATCH, and DELETE handling from its super-class. """ _allowed_post_paths = "authors references components".split() + [FILE_DELIM, LINK_DELIM] From d83d7b05e7c51c9b1780991ba6199512e20f4cc6 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 21 Nov 2023 15:59:33 -0500 Subject: [PATCH 113/123] dap:mds3: be more explanatory about validation errors in log, test_mds3: add test_set_landingpage test --- python/nistoar/midas/dap/service/mds3.py | 10 ++++++++++ python/tests/nistoar/midas/dap/service/test_mds3.py | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index f2efa1b..7eb7fca 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -781,6 +781,11 @@ def put_each_into(data, objlist): for fmd in files: nerd.files.set_file_at(fmd) + except InvalidUpdate as ex: + self.log.error("Invalid update to NERDm data not saved: %s: %s", prec.id, str(ex)) + if ex.errors: + self.log.error("Errors include:\n "+("\n ".join([str(e) for e in ex.errors]))) + raise except Exception as ex: provact.message = "Failed to save NERDm data update due to internal error" self.log.error("Failed to save NERDm metadata: "+str(ex)) @@ -1021,6 +1026,11 @@ def _update_part_nerd(self, path: str, prec: ProjectRecord, nerd: NERDResource, except PartNotAccessible: # client request error; don't record action raise + except InvalidUpdate as ex: + self.log.error("Invalid update to NERDm data not saved: %s: %s", prec.id, str(ex)) + if ex.errors: + self.log.error("Errors include:\n "+("\n ".join([str(e) for e in ex.errors]))) + raise except Exception as ex: self.log.error("Failed to save update to NERDm data, %s: %s", prec.id, str(ex)) self.log.warning("Partial update is possible") diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index affb56b..09cde19 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -928,6 +928,15 @@ def test_update(self): self.assertNotIn("references", nerd) self.assertEqual(len(nerd["components"]), 2) + def test_set_landingpage(self): + self.create_service() + prec = self.svc.create_record("goob") + id = prec.id + nerd = self.svc._store.open(id) + + self.svc.replace_data(id, "https://example.com/", part="landingPage") + res = nerd.get_res_data() + self.assertEqual(res.get('landingPage'), "https://example.com/") From d5774bcacb80664353420c3bdbb2e914c08774d7 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 21 Nov 2023 16:03:15 -0500 Subject: [PATCH 114/123] dap: validate: implement new LenientSchemaLoader based on loosen_schema() --- python/nistoar/midas/dap/service/validate.py | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/python/nistoar/midas/dap/service/validate.py b/python/nistoar/midas/dap/service/validate.py index ce15a9a..484fec5 100644 --- a/python/nistoar/midas/dap/service/validate.py +++ b/python/nistoar/midas/dap/service/validate.py @@ -1,12 +1,45 @@ """ validation utilities specialized for DAP editing """ +import re + from nistoar.nerdm.validate import * from nistoar.nerdm.constants import core_schema_base as CORE_SCHEMA_BASE +import nistoar.nerdm.utils as nerdm_utils PUB_SCHEMA_BASE = CORE_SCHEMA_BASE + "pub/" +RLS_SCHEMA_BASE = CORE_SCHEMA_BASE + "rls/" + +directives_by_uribase = { + CORE_SCHEMA_BASE: { + "derequire": [ "Resource", "Organization" ] + }, + PUB_SCHEMA_BASE: { + "derequire": [ "PublicDataResource", "Person" ] + }, + RLS_SCHEMA_BASE: { + "derequire": [ "ReleasedResource" ] + } +} +_verre = re.compile(r"/v\d.*$") class LenientSchemaLoader(ejs.SchemaLoader): + """ + this modifies the schema definitions on selected schemas to be more lenient for records + intended for use in the DAP Authoring API. + """ + def load_schema(self, uri): + out = super().load_schema(uri) + + if out.get("id", "").startswith(CORE_SCHEMA_BASE): + base = _verre.sub("/", out['id']) + directives = directives_by_uribase.get(base, {}) + nerdm_utils.loosen_schema(out, directives) + + return out + + +class OldLenientSchemaLoader(ejs.SchemaLoader): """ this modifies the schema definitions on selected schemas to be more lenient for records intended for use in the DAP Authoring API. From f5327e5dbc46dbc39e2af5a3f5f42e0f602fb774 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 22 Nov 2023 11:06:23 -0500 Subject: [PATCH 115/123] dbio.project: fix clearing/deleting --- python/nistoar/midas/dbio/project.py | 24 +++-- python/nistoar/midas/dbio/wsgi/project.py | 85 +++++++++++++--- .../tests/nistoar/midas/dbio/test_project.py | 26 +++++ .../nistoar/midas/dbio/wsgi/test_project.py | 99 +++++++++++++++++-- 4 files changed, 208 insertions(+), 26 deletions(-) diff --git a/python/nistoar/midas/dbio/project.py b/python/nistoar/midas/dbio/project.py index b7da9bf..7b00b81 100644 --- a/python/nistoar/midas/dbio/project.py +++ b/python/nistoar/midas/dbio/project.py @@ -142,6 +142,14 @@ def create_record(self, name, data=None, meta=None) -> ProjectRecord: self.log.info("Created %s record %s (%s) for %s", self.dbcli.project, prec.id, prec.name, self.who) return prec + def delete_record(self, id) -> ProjectRecord: + """ + delete the draft record. This may leave a stub record in place if, for example, the record + has been published previously. + """ + # TODO: handling previously published records + raise NotImplementedError() + def _get_id_shoulder(self, user: PubAgent): """ return an ID shoulder that is appropriate for the given user agent @@ -231,7 +239,7 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): """ merge the given data into the currently save data content for the record with the given identifier. :param str id: the identifier for the record whose data should be updated. - :param str newdata: the data to save as the new content. + :param str|dict|list newdata: the data to save as the new content. :param str part: the slash-delimited pointer to an internal data property. If provided, the given ``newdata`` is a value that should be set to the property pointed to by ``part``. @@ -498,13 +506,16 @@ def _save_data(self, indata: Mapping, prec: ProjectRecord, def _validate_data(self, data): pass - def clear_data(self, id: str, part: str=None, message: str=None, prec=None): + def clear_data(self, id: str, part: str=None, message: str=None, prec=None) -> bool: """ - remove the stored data content of the record and reset it to its defaults. + remove the stored data content of the record and reset it to its defaults. Note that + no change is recorded if the requested data does not exist yet. :param str id: the identifier for the record whose data should be cleared. :param stt part: the slash-delimited pointer to an internal data property. If provided, only that property will be cleared (either removed or set to an initial default). + :return: True the data was properly cleared; return False if ``part`` was specified but does not + yet exist in the data. :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to `id`. If this is not provided, the record will by fetched anew based on the `id`. :raises ObjectNotFound: if no record with the given ID exists or the `part` parameter points to @@ -518,7 +529,7 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): set_state = True prec = self.dbcli.get_record_for(id, ACLs.WRITE) # may raise ObjectNotFound/NotAuthorized - if _prec.status.state not in [status.EDIT, status.READY]: + if prec.status.state not in [status.EDIT, status.READY]: raise NotEditable(id) initdata = self._new_data_for(prec.id, prec.meta) @@ -541,7 +552,7 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): elif prop not in data: data[prop] = {} elif prop not in data: - break + return False elif not steps: del data[prop] break @@ -576,7 +587,8 @@ def clear_data(self, id: str, part: str=None, message: str=None, prec=None): finally: self._record_action(provact) self.log.info("Cleared out data for %s record %s (%s) for %s", - self.dbcli.project, _prec.id, _prec.name, self.who) + self.dbcli.project, prec.id, prec.name, self.who) + return True def update_status_message(self, id: str, message: str, _prec=None) -> status.RecordStatus: diff --git a/python/nistoar/midas/dbio/wsgi/project.py b/python/nistoar/midas/dbio/wsgi/project.py index 4489ac8..c85f099 100644 --- a/python/nistoar/midas/dbio/wsgi/project.py +++ b/python/nistoar/midas/dbio/wsgi/project.py @@ -2,13 +2,39 @@ A web service interface to various MIDAS project records. A _project record_ is a persistable record that is compliant with the MIDAS Common Database project -data model, where examples of "project record" types include DMP records and data publication drafts. +data model, where examples of a "project record" types include DMP records and data publication drafts. The :py:class:`MIDASProjectApp` encapsulates the handling of requests to create and manipulate project -records. If desired, this class can be specialized for a particular project type, and the easiest way -to do that is by sub-classing the :py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` and -passing that class to the :py:class:`MIDASProjectApp` constructor. This is because the -:py:class:`~nistoar.midas.dbio.wsgi.project.ProjectRecordBroker` class isolates the business logic for -retrieving and manipulating project records. +records. If desired, this class can be specialized for a particular project type; as an example, see +:py:mod:`nistoar.midas.dap.service.mds3`. + +This implementation uses the simple :py:mod:`nistoar-internal WSGI +framework` to handle the specific web service endpoints. The +:py:class:`MIDASProjectApp` is the router for the Project collection endpoint: it analyzes the relative +URL path and delegates the handling to a more specific handler class. In particular, these endpoints +are handled accordingly: + +``/`` -- :py:class:`ProjectSelectionHandler` + responds to to project search queries to find project records matching search criteria (GET) + as well as accepts requests to create new records (POST). + +``/{projid}`` -- :py:class:`ProjectHandler` + returns the full project record (GET) or deletes it (DELETE). + +``/{projid}/name`` -- :py:class:`ProjectNameHandler` + returns (GET) or updates (PUT) the user-supplied name of the record. + +``/{projid}/data[/...]`` -- :py:class:`ProjectDataHandler` + returns (GET), updates (PUT, PATCH), or clears (DELETE) the data content of the record. This + implementation supports updating individual parts of the data object via PUT, PATCH, DELETE + based on the path relative to ``data``. Subclasses (e.g. with the + :py:mod:`DAP specialization`) may also support POST for certain + array-type properties within ``data``. + +``/{projid}/acls[/...]`` -- :py:class:`ProjectACLsHandler` + returns (GET) or updates (PUT, PATCH, POST, DELETE) access control lists for the record. + +``/{projid}/*`` -- :py:class`ProjectInfoHandler` + returns other non-editable parts of the record via GET (including the ``meta`` property). """ from logging import Logger from collections import OrderedDict @@ -86,7 +112,7 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start raise ValueError("Missing ProjectRecord id") def do_OPTIONS(self, path): - return self.send_options(["GET"]) + return self.send_options(["GET", "DELETE"]) def do_GET(self, path, ashead=False): try: @@ -98,7 +124,22 @@ def do_GET(self, path, ashead=False): self._id, ashead=ashead) return self.send_json(prec.to_dict(), ashead=ashead) - + + def do_DELETE(self, path): + try: + prec = self.svc.get_record(self._id) + out = prec.to_dict() + self.svc.delete_record(self._id) + except dbio.NotAuthorized as ex: + return self.send_unauthorized() + except dbio.ObjectNotFound as ex: + return self.send_error_resp(404, "ID not found", "Record with requested identifier not found", + self._id) + except NotImplementedError as ex: + return self.send_error(501, "Not Implemented") + + return self.send_json(out, "Deleted") + class ProjectInfoHandler(ProjectRecordHandler): """ @@ -261,7 +302,7 @@ def __init__(self, service: ProjectService, subapp: SubApp, wsgienv: dict, start raise ValueError("Missing ProjectRecord id") def do_OPTIONS(self, path): - return self.send_options(["GET", "PUT", "PATCH"]) + return self.send_options(["GET", "PUT", "PATCH", "DELETE"]) def do_GET(self, path, ashead=False): """ @@ -283,6 +324,28 @@ def do_GET(self, path, ashead=False): "Record with requested identifier not found", self._id, ashead=ashead) return self.send_json(out, ashead=ashead) + def do_DELETE(self, path): + """ + respond to a DELETE request. This is used to clear the value of a particular property + within the project data or otherwise reset the project data to its initial defaults. + :param str path: a path to the portion of the data to clear + """ + try: + cleared = self.svc.clear_data(self._id, path) + except dbio.NotAuthorized as ex: + return self._send_unauthorized() + except dbio.PartNotAccessible as ex: + return self.send_error_resp(405, "Data part not deletable", + "Requested part of data cannot be deleted") + except dbio.ObjectNotFound as ex: + if ex.record_part: + return self.send_error_resp(404, "Data property not found", + "No data found at requested property", self._id, ashead=ashead) + return self.send_error_resp(404, "ID not found", + "Record with requested identifier not found", self._id, ashead=ashead) + + return self.send_json(cleared, "Cleared", 201) + def do_PUT(self, path): try: newdata = self.get_json_body() @@ -650,7 +713,7 @@ def do_DELETE(self, path): try: prec.acls.revoke_perm_from(parts[0], parts[1]) prec.save() - return self.send_ok() + return self.send_ok(message="ID removed") except dbio.NotAuthorized as ex: return self.send_unauthorized() @@ -787,7 +850,7 @@ def _apply_action(self, action, message=None): class MIDASProjectApp(SubApp): """ - a base web app for an interface handling project record + a base web app for an interface handling project record. """ _selection_handler = ProjectSelectionHandler _update_handler = ProjectHandler diff --git a/python/tests/nistoar/midas/dbio/test_project.py b/python/tests/nistoar/midas/dbio/test_project.py index 74d7455..35ea008 100644 --- a/python/tests/nistoar/midas/dbio/test_project.py +++ b/python/tests/nistoar/midas/dbio/test_project.py @@ -230,6 +230,32 @@ def test_update_replace_data(self): self.assertEqual(len(self.project.dbcli._db.get(base.PROV_ACT_LOG, {}).get(prec.id,[])), 6) + def test_clear_data(self): + self.create_service() + prec = self.project.create_record("goob") + self.assertEqual(prec.data, {}) + + data = self.project.update_data(prec.id, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + self.assertEqual(data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"color": "red", "pos": {"x": 23, "y": 12, "grid": "A"}}) + + self.assertIs(self.project.clear_data(prec.id, "color"), True) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"pos": {"x": 23, "y": 12, "grid": "A"}}) + + self.assertIs(self.project.clear_data(prec.id, "color"), False) + self.assertIs(self.project.clear_data(prec.id, "gurn/goob/gomer"), False) + + self.assertIs(self.project.clear_data(prec.id, "pos/y"), True) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {"pos": {"x": 23, "grid": "A"}}) + + self.assertIs(self.project.clear_data(prec.id), True) + prec = self.project.get_record(prec.id) + self.assertEqual(prec.data, {}) + + def test_finalize(self): self.create_service() prec = self.project.create_record("goob") diff --git a/python/tests/nistoar/midas/dbio/wsgi/test_project.py b/python/tests/nistoar/midas/dbio/wsgi/test_project.py index 221c314..69e5d00 100644 --- a/python/tests/nistoar/midas/dbio/wsgi/test_project.py +++ b/python/tests/nistoar/midas/dbio/wsgi/test_project.py @@ -266,15 +266,16 @@ def test_full_methnotallowed(self): body = hdlr.handle() self.assertIn("405 ", self.resp[0]) - self.resp = [] - path = "mdm1:0001" - req = { - 'REQUEST_METHOD': 'DELETE', - 'PATH_INFO': self.rootpath + path - } - hdlr = self.app.create_handler(req, self.start, path, nistr) - body = hdlr.handle() - self.assertIn("405 ", self.resp[0]) +# DELETE is now allowed +# self.resp = [] +# path = "mdm1:0001" +# req = { +# 'REQUEST_METHOD': 'DELETE', +# 'PATH_INFO': self.rootpath + path +# } +# hdlr = self.app.create_handler(req, self.start, path, nistr) +# body = hdlr.handle() +# self.assertIn("405 ", self.resp[0]) def test_create(self): path = "" @@ -305,6 +306,86 @@ def test_create(self): self.assertEqual(resp['data'], {"color": "red"}) self.assertEqual(resp['meta'], {}) + def test_delete(self): + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"name": "big", "owner": "nobody", + "data": {"color": "red", "pos": {"x": 0, "y": 1}}})) + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['data'], {"color": "red", "pos": {"x": 0, "y": 1}}) + recid = resp['id'] + + self.resp = [] + path = recid+"/data/pos/x" + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertIs(resp, True) + + self.resp = [] + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertIs(resp, False) + + self.resp = [] + path = recid+"/data" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, {"color": "red", "pos": {"y": 1}}) + + self.resp = [] + path = recid+"/data" + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertIs(resp, True) + + self.resp = [] + path = recid+"/data" + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp, {}) + + self.resp = [] + path = recid + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + body = hdlr.handle() + self.assertIn("501 ", self.resp[0]) + def test_search(self): path = "" From 858bcc5559403d398a136e24de5f3548bd3970d1 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Fri, 24 Nov 2023 09:59:52 -0500 Subject: [PATCH 116/123] dap.mds3: fix clearing/deleting --- python/nistoar/midas/dap/service/mds3.py | 59 ++++++++---- .../nistoar/midas/dap/service/test_mds3.py | 39 ++++++++ .../midas/dap/service/test_mds3_app.py | 89 ++++++++++++++++++- 3 files changed, 167 insertions(+), 20 deletions(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 7eb7fca..4670878 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -485,13 +485,15 @@ def update_data(self, id, newdata, part=None, message="", _prec=None): """ return self._update_data(id, newdata, part, replace=False, message="", prec=_prec) - def clear_data(self, id, part=None, _prec=None): + def clear_data(self, id, part=None, message: str=None, _prec=None) -> bool: """ remove the stored data content of the record and reset it to its defaults. :param str id: the identifier for the record whose data should be cleared. :param stt part: the slash-delimited pointer to an internal data property. If provided, only that property will be cleared (either removed or set to an initial default). + :return: True the data was properly cleared; return False if ``part`` was specified but does not + yet exist in the data. :param ProjectRecord prec: the previously fetched and possibly updated record corresponding to ``id``. If this is not provided, the record will by fetched anew based on the ``id``. @@ -513,44 +515,61 @@ def clear_data(self, id, part=None, _prec=None): self._store.load_from(nerd) nerd = self._store.open(id) + provact = None try: if part: what = part if part == "authors": + if nerd.authors.count == 0: + return False nerd.authors.empty() elif part == "references": + if nerd.references.count == 0: + return False nerd.references.empty() elif part == FILE_DELIM: + if nerd.files.count == 0: + return False what = "files" nerd.files.empty() elif part == LINK_DELIM: + if nerd.nonfiles.count == 0: + return False what = "links" nerd.nonfiles.empty() elif part == "components": + if nerd.nonfiles.count == 0 and nerd.files.count == 0: + return False nerd.files.empty() nerd.nonfiles.empty() - elif part in "title rights disclaimer description".split(): + elif part in "title rights disclaimer description landingPage keyword".split(): resmd = nerd.get_res_data() + if part not in resmd: + return False del resmd[part] nerd.replace_res_data(resmd) else: - raise PartNotAccessible(_prec.id, path, "Clearing %s not allowed" % path) + raise PartNotAccessible(_prec.id, part, "Clearing %s not allowed" % part) - provact = Action(Action.PATCH, _prec.id, self.who, "clearing "+what) + if not message: + message = "clearing "+what + provact = Action(Action.PATCH, _prec.id, self.who, message) part = ("/"+part) if part.startswith("pdr:") else ("."+part) provact.add_subaction(Action(Action.DELETE, _prec.id+"#data"+part, self.who, - "clearing "+what)) - prec.status.act(self.STATUS_ACTION_CLEAR, "cleared "+what) + message)) + _prec.status.act(self.STATUS_ACTION_CLEAR, "cleared "+what) else: nerd.authors.empty() nerd.references.empty() nerd.files.empty() nerd.nonfiles.empty() - nerd.replace_res_data(self._new_data_for(_prec.id, prec.meta)) + nerd.replace_res_data(self._new_data_for(_prec.id, _prec.meta)) - provact = Action(Action.PATCH, _prec.id, self.who, "clearing all NERDm data") - prec.status.act(self.STATUS_ACTION_CLEAR, "cleared all NERDm data") + if not message: + message = "clearing all NERDm data" + provact = Action(Action.PATCH, _prec.id, self.who, message) + _prec.status.act(self.STATUS_ACTION_CLEAR, "cleared all NERDm data") except PartNotAccessible: # client request error; don't record action @@ -559,28 +578,30 @@ def clear_data(self, id, part=None, _prec=None): except Exception as ex: self.log.error("Failed to clear requested NERDm data, %s: %s", _prec.id, str(ex)) self.log.warning("Partial update is possible") - provact.message = "Failed to clear requested NERDm data" - self._record_action(provact) + if provact: + provact.message = "Failed to clear requested NERDm data" + self._record_action(provact) - prec.status.act(self.STATUS_ACTION_CLEAR, "Failed to clear NERDm data") - prec.set_state(status.EDIT) - prec.data = self._summarize(nerd) - self._try_save(prec) + _prec.status.act(self.STATUS_ACTION_CLEAR, "Failed to clear NERDm data") + _prec.set_state(status.EDIT) + _prec.data = self._summarize(nerd) + self._try_save(_prec) raise - prec.data = self._summarize(nerd) + _prec.data = self._summarize(nerd) if set_state: - prec.status.set_state(status.EDIT) + _prec.status.set_state(status.EDIT) try: - prec.save() + _prec.save() except Exception as ex: self.log.error("Failed to saved DBIO record, %s: %s", prec.id, str(ex)) raise finally: - self._record_action(provact) + self._record_action(provact) + return True def _update_data(self, id, newdata, part=None, prec=None, nerd=None, replace=False, message=""): diff --git a/python/tests/nistoar/midas/dap/service/test_mds3.py b/python/tests/nistoar/midas/dap/service/test_mds3.py index 09cde19..eb816fa 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3.py @@ -813,6 +813,45 @@ def test_get_sw_desc_for(self): "accessURL": "https://bitbucket.com/foo/bar" }) + def test_clear_data(self): + self.create_service() + prec = self.svc.create_record("goob") + pdrid = "ark:/88434/%s-%s" % tuple(prec.id.split(":")) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(set(nerd.keys()), + {"_schema", "@id", "doi", "_extensionSchemas", "@context", "@type"}) + + nerd = self.svc.update_data(prec.id, + {"landingPage": "https://example.com", + "contactPoint": { "fn": "Gurn Cranston", "hasEmail": "mailto:gjc1@nist.gov"}}) + self.assertEqual(set(nerd.keys()), + {"_schema", "@id", "doi", "_extensionSchemas", "@context", "@type", + "contactPoint", "landingPage"}) + self.assertEqual(set(nerd["contactPoint"].keys()), {"@type", "fn", "hasEmail"}) + + with self.assertRaises(PartNotAccessible): + self.assertIs(self.svc.clear_data(prec.id, "goober"), False) + with self.assertRaises(PartNotAccessible): + self.assertIs(self.svc.clear_data(prec.id, "contactPoint/hasEmail"), True) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(set(nerd.keys()), + {"_schema", "@id", "doi", "_extensionSchemas", "@context", "@type", + "contactPoint", "landingPage"}) + self.assertEqual(set(nerd["contactPoint"].keys()), {"@type", "fn", "hasEmail"}) + + self.assertIs(self.svc.clear_data(prec.id, "landingPage"), True) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(set(nerd.keys()), + {"_schema", "@id", "doi", "_extensionSchemas", "@context", "@type", + "contactPoint"}) + self.assertIs(self.svc.clear_data(prec.id, "references"), False) + + self.assertIs(self.svc.clear_data(prec.id), True) + nerd = self.svc.get_nerdm_data(prec.id) + self.assertEqual(set(nerd.keys()), + {"_schema", "@id", "doi", "_extensionSchemas", "@context", "@type"}) + + def test_update(self): rec = read_nerd(pdr2210) self.create_service() diff --git a/python/tests/nistoar/midas/dap/service/test_mds3_app.py b/python/tests/nistoar/midas/dap/service/test_mds3_app.py index bd2a3bf..1e214fb 100644 --- a/python/tests/nistoar/midas/dap/service/test_mds3_app.py +++ b/python/tests/nistoar/midas/dap/service/test_mds3_app.py @@ -383,7 +383,7 @@ def test_put_patch(self): self.assertNotIn("downloadURL", resp) self.resp = [] - path = id + '/data/components[file_1]' + path = id + '/data/components/file_1' req = { 'REQUEST_METHOD': 'GET', 'PATH_INFO': self.rootpath + path @@ -443,6 +443,93 @@ def test_put_patch(self): self.assertIn('description', resp) self.assertEqual(resp['rights'], "What ever.") + def test_delete(self): + testnerd = read_nerd(pdr2210) + res = deepcopy(testnerd) + del res['references'] + del res['components'] + del res['@id'] + del res['_schema'] + del res['_extensionSchemas'] + + path = "" + req = { + 'REQUEST_METHOD': 'POST', + 'PATH_INFO': self.rootpath + path + } + req['wsgi.input'] = StringIO(json.dumps({"data": { "contactPoint": res['contactPoint'], + "keyword": [ "testing" ], + "landingPage": "https://example.com/" }, + "meta": { "creatorisContact": "false" }, + "name": "OptSortSph" })) + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectSelectionHandler)) + self.assertNotEqual(hdlr.cfg, {}) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['name'], "OptSortSph") + self.assertEqual(resp['id'], "mds3:0001") + self.assertEqual(resp['data']['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['data']['doi'], 'doi:10.88888/mds3-0001') + recid = resp['id'] + + self.resp = [] + path = recid + '/data' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertEqual(resp['@id'], 'ark:/88434/mds3-0001') + self.assertEqual(resp['doi'], 'doi:10.88888/mds3-0001') + self.assertEqual(resp['contactPoint'], + {"fn": "Zachary Levine", "@type": "vcard:Contact", + "hasEmail": "mailto:zachary.levine@nist.gov" }) + self.assertEqual(resp['@type'], + [ "nrdp:PublicDataResource", "dcat:Resource" ]) + self.assertEqual(resp['landingPage'], "https://example.com/") + self.assertEqual(resp['keyword'], ["testing"]) + + self.resp = [] + path = recid + '/data/landingPage' + req = { + 'REQUEST_METHOD': 'DELETE', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr._path, "landingPage") + body = hdlr.handle() + self.assertIn("201 ", self.resp[0]) + resp = self.body2dict(body) + self.assertIs(resp, True) + + + self.resp = [] + path = recid + '/data' + req = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': self.rootpath + path + } + hdlr = self.app.create_handler(req, self.start, path, nistr) + self.assertTrue(isinstance(hdlr, prj.ProjectDataHandler)) + self.assertEqual(hdlr._path, "") + body = hdlr.handle() + self.assertIn("200 ", self.resp[0]) + resp = self.body2dict(body) + self.assertNotIn("landingPage", resp) + + + + From c45fb1d4737aac5da532afaa0d768d256bf2f566 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 25 Nov 2023 09:58:05 -0500 Subject: [PATCH 117/123] loosen_nerdm.py: allow find_nistoar_code() to work, don't bother using in install.sh --- scripts/install.sh | 2 -- scripts/loosen_nerdm.py | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/install.sh b/scripts/install.sh index e2538d9..91a8f74 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -30,8 +30,6 @@ echo Installing python libraries into $PY_LIBDIR... $oarmd_pkg/scripts/install_extras.sh --install-dir=$INSTALL_DIR mkdir -p $INSTALL_DIR/etc/midas/schemas -echo $execdir/loosen_nerdm.py $INSTALL_DIR/etc/schemas $INSTALL_DIR/etc/midas/schemas -$execdir/loosen_nerdm.py $INSTALL_DIR/etc/schemas $INSTALL_DIR/etc/midas/schemas mkdir -p $INSTALL_DIR/var/logs echo cp -r $SOURCE_DIR/etc $INSTALL_DIR diff --git a/scripts/loosen_nerdm.py b/scripts/loosen_nerdm.py index 5579f5e..85f98b9 100755 --- a/scripts/loosen_nerdm.py +++ b/scripts/loosen_nerdm.py @@ -49,18 +49,18 @@ def set_options(progname, args): } } -try: - import nistoar.nerdm.utils as nerdm_utils -except ImportError: - sys.path.insert(0, find_nistoar_code()) - import nistoar.nerdm.utils as nerdm_utils - def find_nistoar_code(): execdir = Path(__file__).resolve().parents[0] basedir = execdir.parents[0] mdpydir = basedir / "metadata" / "python" return mdpydir +try: + import nistoar.nerdm.utils as nerdm_utils +except ImportError: + sys.path.insert(0, str(find_nistoar_code())) + import nistoar.nerdm.utils as nerdm_utils + def loosen_schema(schema: Mapping, directives: Mapping, opts=None): """ apply the given loosening directive to the given JSON Schema. The directives is a From 9b1380cc50a2d36f43b7fe1ed4dc90eb743d6565 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Sat, 25 Nov 2023 16:44:52 -0500 Subject: [PATCH 118/123] pull in update to oar-metadata: jsonpath-ng dependency --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index 4e1f49d..8ef86a5 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit 4e1f49dede88ce482dde3e758269b30435a055cc +Subproject commit 8ef86a563ed4f1a633c9070a31874b0d76402519 From d51a695f6ab10dc05bd9be55dfe0a4f848a5e07b Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 30 Nov 2023 10:37:30 -0500 Subject: [PATCH 119/123] dap/mds3: include authors as among allowed attributes in a reference --- python/nistoar/midas/dap/service/mds3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/nistoar/midas/dap/service/mds3.py b/python/nistoar/midas/dap/service/mds3.py index 4670878..05cf957 100644 --- a/python/nistoar/midas/dap/service/mds3.py +++ b/python/nistoar/midas/dap/service/mds3.py @@ -1732,7 +1732,8 @@ def _moderate_author(self, auth, doval=True): return auth _refprops = set(("@id _schema _extensionSchemas title abbrev proxyFor location label "+ - "description citation refType doi inPreparation vol volNumber pages publishYear").split()) + "description citation refType doi inPreparation vol volNumber pages "+ + "authors publishYear").split()) _reftypes = set(("IsDocumentedBy IsSupplementTo IsSupplementedBy IsCitedBy Cites IsReviewedBy "+ "IsReferencedBy References IsSourceOf IsDerivedFrom "+ "IsNewVersionOf IsPreviousVersionOf").split()) From 0621c9dab155296411ed6040c50d28a8316c7cdf Mon Sep 17 00:00:00 2001 From: RayPlante Date: Thu, 30 Nov 2023 13:17:43 -0500 Subject: [PATCH 120/123] pull in schema tweaker changes from oar-metadata --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index 8ef86a5..d96af33 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit 8ef86a563ed4f1a633c9070a31874b0d76402519 +Subproject commit d96af33d1f2ec0520e906360c21dc478acb8867b From a87c229e3790a7a4a373cb2507de318c26948ccc Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 5 Dec 2023 12:04:32 -0500 Subject: [PATCH 121/123] uprev oar-metadata to 2.1.0rc1 --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index d96af33..ef804ed 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit d96af33d1f2ec0520e906360c21dc478acb8867b +Subproject commit ef804ed8775eabe986f2a36dcf3b10d51330aece From 2d1f6ca2150e9faa4cf96263d2a7a7d0e6eb5a01 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Wed, 3 Jan 2024 16:00:03 -0500 Subject: [PATCH 122/123] document dependencies into pip-compatible requirements file --- requirements.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bf1205a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +pipenv +setuptools<66.0.0 +json-spec +jsonschema==2.4.0 +requests +pytest==4.6.5 +filelock +crossrefapi +pyyaml +pymongo +jsonmerge==1.3.0 +funcsigs +bagit>=1.6.3,<2.0 +fs>=2.0.21 +jsonpatch +mako +pyjwt +ejsonschema @ https://github.com/usnistgov/ejsonschema/archive/master.zip +pynoid @ https://github.com/RayPlante/pynoid/archive/master.zip +multibag @ https://github.com/usnistgov/multibag-py/archive/0.3.zip From 37cf70be31865a121faa58eeef4df22ae20c59e0 Mon Sep 17 00:00:00 2001 From: RayPlante Date: Tue, 16 Jan 2024 12:12:51 -0500 Subject: [PATCH 123/123] uprev metadata submodule to 2.1.0 --- metadata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata b/metadata index ef804ed..9af4c0a 160000 --- a/metadata +++ b/metadata @@ -1 +1 @@ -Subproject commit ef804ed8775eabe986f2a36dcf3b10d51330aece +Subproject commit 9af4c0a626d52199591caabbe5427da1b059f992