Skip to content

Commit

Permalink
integrate indexing service into midas web service
Browse files Browse the repository at this point in the history
  • Loading branch information
RayPlante committed May 9, 2024
1 parent 17049dc commit b48065d
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 16 deletions.
17 changes: 17 additions & 0 deletions docker/midasserver/midas-dmpdap_conf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,20 @@ services:
describedBy: "http://localhost:9091/docs/dmpsvc-elements.html"
href: "http://localhost:9091/midas/dmp/mdm1"
version: mdm1

nsdi:
about:
message: "NSD Indexing Service"
title: "NIST Staff Directory Indexing API"
describedBy: "http://localhost:9091/docs/nsdisvc-elements.html"
href: "http://localhost:9091/midas/nsdi"
default_convention: v1
conventions:
v1:
nsd:
service_endpoint: http://peopleserver:9092/
about:
title: "NIST Staff Directory Indexing API (version 1)"
describedBy: "http://localhost:9091/docs/nsdisvc-elements.html"
href: "http://localhost:9091/midas/nsdi/v1"
version: v1
71 changes: 67 additions & 4 deletions python/nistoar/midas/dbio/index.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
"""
a module supporting special-purpose indexes for the DBIO.
The core of this module is the :py:class:`Index` which can be serialized to JSON or CSV and
delivered a remote (web) client. The client then uses that index on the client-side to quickly
determine which remote records match a prompt string. See :py:class:`Index` for more details,
including the structure of the JSON and CSV serializations.
An :py:class:`Index` is created on the server-side by an :py:class:`Indexer` implementation.
This module includes different common and specific Index generators, including a ones for
indexing entries from the NIST Staff Directory (NSD) service. These indexers are made available
as web services via the :py:mod:`nistoar.midas.nsdi` module.
"""
import json, csv, re
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -38,6 +48,10 @@ class Index:
where the target value starts with a particular substring, and the second provides a format
for the index that can be delivered to a web client. The client can then do its own fast
lookups, and use the keys to retrieve specific records of interest based on the target value.
An Index is serialized to deliver it to a client. See :py:mod:`export_as_json` and
:py:mod:`export_as_csv` for a description of the structure of the JSON and CSV serialization
formats.
"""
def __init__(self, caseins=True):
"""
Expand Down Expand Up @@ -124,14 +138,63 @@ def clone(self) -> Index:

def export_as_json(self, pretty=False) -> str:
"""
export this index into JSON
serialize this index into JSON.
The output is a JSON dictionary in which the keys are the index's target values--that is,
the values that matched the prompt string used to generate the index. Each key maps to
an object with entries representing all the entries in the indexed database that feature
the target value. Each key of this second object is the unique identifier for the
matching record, and its value is displayable string meant to represent or summarize that
record. (A client may show this displayable string as, say, a suggestion to an input field.)
For example, a small index might look like this::
{
"Bryan": {
"13913": "Cranston, Bryan"
},
"Cranston": {
"23497": "Cranston, Gurn",
"13913": "Cranston, Bryan"
},
"Gurn": {
"23497": "Cranston, Gurn"
}
}
:param bool pretty: if True, format the JSON in a pretty format with indentation and
newline characters (as shown above). The default, False, formats
JSON in compact form without indentation or newlines.
:rtype: str
"""
if pretty:
return json.dumps(self._data, indent=2)
else:
return json.dumps(self._data)

def export_as_csv(self, keydelim=':') -> str:
def export_as_csv(self, keydelim: str=':') -> str:
"""
serialize this index into JSON.
The output is a CSV table in which each row represents matching records for a particular
target value. The first column is a target value--that is, a value the matched the
prompt string used to generate the index. The remaining columns represent records that
match the target value. (Note that since a target value can match 1 or more records, the
table wil not, in general, have a constant number of columns.) Each remaining column
contains a colon-delimited key-value pair: the key is the unique identifier for the
matching record, and its value is displayable string meant to represent or summarize that
record. (A client may show this displayable string as, say, a suggestion to an input field.)
For example, three rows of an index into a staff directory may look like this::
Bryan,"13913:Cranston, Bryan"
Cranston,"23497:Cranston, Gurn","13913:Cranston, Bryan"
Gurn,"23497:Cranston, Gurn",
:param str keydelim: A delimiter to use instead of a colon (:) to seperate the identifier
and displayable value.
:rtype: str
"""
out = StringIO(newline='')
wrtr = csv.writer(out, csv.unix_dialect, quoting=csv.QUOTE_MINIMAL)
for target in self._data:
Expand Down Expand Up @@ -417,7 +480,7 @@ def _select_from(self, orgtype: str, prompt: str):

def startswithprompt(rec):
for p in self.props:
if rec[p].lower().startswith(prompt):
if isinstance(rec[p], str) and rec[p].lower().startswith(prompt):
return True
return False

Expand Down Expand Up @@ -489,7 +552,7 @@ def get_index_for(self, prompt: str) -> Index:
if self.muststart:
def startswithprompt(rec):
for p in self.props:
if rec[p].lower().startswith(prompt):
if isinstance(rec[p], str) and rec[p].lower().startswith(prompt):
return True
return False

Expand Down
3 changes: 3 additions & 0 deletions python/nistoar/midas/nsdi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@
This module provides the indexing service and its REST web service interfaces. It leverages the
index-generating capabilities provided by :py:mod:`nistoar.midas.dbio.index`.
For the details on the service endpoints, see the convention/version-specific documentation:
* :py:mod:`nistoar.midas.nsdi.wsgi.v1
"""
38 changes: 27 additions & 11 deletions python/nistoar/midas/nsdi/wsgi/v1.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
The WSGI implementation of the web API to the NSD Indexer Service. See :py:mod:`nistoar.midas.nsdi`
for a description of what this service does.
for a description of what this service does and :py:mod:`nistoar.midas.dbio.index` for what the index
looks like.
In this implementation, all endpoints feature the same interface: a GET request will return an index
document based on a prompt string given by a ``prompt`` query parameter. The endpoint relects the
Expand All @@ -15,6 +16,11 @@
``/Group``
indexes division records base on the group's full name, abbreviation, or number
For example, a GET to ``/People?prompt=pla`` will return an index to all NSD people entries whose
first or last name begins with "pla" (case-insensitive). A GET to ``/Division?prompt=mat`` returns
an index to all division entries whose full name, abbreviation starts with "mat".
``/Division?prompt=64`` returns index where the division number starts with "64".
The default format for the returned index is JSON. A format can be explicitly returned either via
the ``format`` query parameter or by requesting a media type with the ``Accept`` HTTP request header.
Supported ``format`` values are "json" and "csv"; their corresponding media types are "application/json"
Expand All @@ -34,7 +40,7 @@
from urllib.parse import parse_qs

from nistoar.web.rest import ServiceApp, Handler, ErrorHandling, FatalError
from nistoar.nsd.client import NSDClient
from nistoar.nsd.client import NSDClient, NSDServerError
from nistoar.pdr.publish.prov import PubAgent
from nistoar.midas.dbio.index import NSDPeopleIndexClient, NSDOrgIndexClient

Expand Down Expand Up @@ -71,7 +77,7 @@ def do_GET(self, path, ashead=False):
return self.send_error_obj(503, "Upstream service error",
"Failure accessing the NSD service: "+str(ex))
except Exception as ex:
self.log.error("Unexpected error accessing indexer client: %s", str(ex))
self.log.exception("Unexpected error accessing indexer client: %s", str(ex))
return self.send_error_obj(500, "Internal Server Error")

return self.send_ok(idx.export_as_json(), "application/json")
Expand All @@ -94,10 +100,10 @@ def do_GET(self, path, ashead=False):
return an index based on the given prompt
"""
path = path.lower()
if path not in "ou div group any".split():
if path not in "ou division group organization".split():
return self.send_error_obj(404, "Not Found", "Not a recognized organization type: "+path)
if path == "organization":
path = "ou div group".split()
path = "ou division group".split()

prompt = ''
qstr = self._env.get('QUERY_STRING')
Expand All @@ -112,7 +118,7 @@ def do_GET(self, path, ashead=False):
return self.send_error_obj(503, "Upstream service error",
"Failure accessing the NSD service: "+str(ex))
except Exception as ex:
self.log.error("Unexpected error accessing indexer client: %s", str(ex))
self.log.exception("Unexpected error accessing indexer client: %s", str(ex))
return self.send_error_obj(500, "Internal Server Error")

return self.send_ok(idx.export_as_json(), "application/json")
Expand All @@ -124,10 +130,11 @@ class NSDIndexerApp(ServiceApp):
a web app interface for handling NSD indexing requests
"""
_supported_eps = {
"people": PeopleIndexHandler,
"ou": OrgIndexHandler,
"division": OrgIndexHandler,
"group": OrgIndexHandler
"people": PeopleIndexHandler,
"ou": OrgIndexHandler,
"division": OrgIndexHandler,
"group": OrgIndexHandler,
"organization": OrgIndexHandler
}

def __init__(self, log: Logger, config: Mapping={}, nsdclient: NSDClient=None):
Expand Down Expand Up @@ -175,7 +182,16 @@ class Unsupported(Handler, ErrorHandling):

def __init__(self, env: Mapping, start_resp: Callable, path: str, config: Mapping=None,
log: Logger=None, app=None):
Handler.__init__(self, env, start_resp, path, None, config, log, app)
Handler.__init__(self, path, env, start_resp, None, config, log, app)

def do_GET(self, path, ashead=False):
return self.send_error_obj(404, "Not Found", ashead=ashead)


def NSDIndexerAppFactory(dbio_client_factory, log: Logger, config: Mapping, projname):
"""
a factory function that fits an NSDIndexerApp into the MIDAS web app framework. The
``dbio_client_factory`` and ``projname`` argument are ignored as they are not needed.
"""
return NSDIndexerApp(log, config)

6 changes: 5 additions & 1 deletion python/nistoar/midas/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
Repository (PDR)
* ``/groups/`` -- the API for creating and managing access permission groups for collaborative
authoring.
* ``/nsdi/v1/`` -- an API for retrieving fast indexes matching entries in the NIST Staff Directory
(see :py:mod:`nistoar.midas.nsdi` for details).
These endpoint send and receive data stored in the backend database through the common
:py:mod:` DBIO layer <nistoar.midas.dbio>`.
Expand Down Expand Up @@ -151,6 +153,7 @@
from .dbio.inmem import InMemoryDBClientFactory
from .dbio.fsbased import FSBasedDBClientFactory
from .dbio.mongo import MongoDBClientFactory
from .nsdi.wsgi import v1 as nsdiv1
from nistoar.base.config import ConfigurationException, merge_config

log = logging.getLogger(system.system_abbrev) \
Expand Down Expand Up @@ -464,7 +467,8 @@ def create_handler(self, env: dict, start_resp: Callable, path: str, who: Agent)
# "dmp/mdm1": mdm1.DMPApp,
"dmp/mdm1": prj.MIDASProjectApp.factory_for("dmp"),
"dap/mdsx": mdsx.DAPApp,
"dap/mds3": mds3.DAPApp
"dap/mds3": mds3.DAPApp,
"nsdi/v1": nsdiv1.NSDIndexerAppFactory
}

class MIDASApp(AuthenticatedWSGIApp):
Expand Down
14 changes: 14 additions & 0 deletions python/tests/nistoar/midas/nsdi/wsgi/test_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ def setUp(self):
self.app = v1.NSDIndexerApp(rootlog, self.cfg)
self.resp = []

def test_unsupported(self):
req = {
"REQUEST_METHOD": "GET",
"PATH_INFO": "/Gurn",
"QUERY_STRING": "prompt=ve"
}
body = self.app(req, self.start)
self.assertIn("404 ", self.resp[0])
resp = self.body2data(body)

self.assertEqual(resp["http:status"], 404)
self.assertEqual(resp["http:reason"], "Not Found")
self.assertEqual(resp["oar:message"], "Not Found")

def test_group(self):
req = {
"REQUEST_METHOD": "GET",
Expand Down

0 comments on commit b48065d

Please sign in to comment.