Skip to content

Commit

Permalink
Merge pull request #474 from PsychoinformaticsLab/fix/clarifaiapi
Browse files Browse the repository at this point in the history
Move to Clarifai-GRPC library
  • Loading branch information
adelavega authored Sep 21, 2022
2 parents 3c30ae1 + a289ad9 commit 0047547
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 88 deletions.
2 changes: 1 addition & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ WIT_AI_API_KEY=abc123
IBM_USERNAME=[email protected]
IBM_PASSWORD=xyzabc
GOOGLE_APPLICATION_CREDENTIALS=/root/share/googleapi.json
CLARIFAI_API_KEY=hhhvvv
CLARIFAI_ACCESS_TOKEN=hhhvvv
```
This should be obvious, but just to be sure: NEVER check this file into a github repository, unless you want to pay for free cloud computing for a cybercriminal.
Expand Down
6 changes: 3 additions & 3 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ While installing pliers itself is usually straightforward, setting up some of th
| MicrosoftAPIFaceExtractor | `Microsoft Face API <https://azure.microsoft.com/try/cognitive-services/my-apis/>`__ | MICROSOFT\_FACE\_SUBSCRIPTION\_KEY | API key and | 152b067184e2ae03711e6439de124c27 |
| (and subclasses) | | MICROSOFT\_SUBSCRIPTION\_LOCATION | registered region | westus |
+---------------------------------------+-----------------------------------------------------------------------------------------------------+--------------------------------------+--------------------------------+---------------------------------------+
| ClarifaiAPIImageExtractor | `Clarifai image recognition API <https://clarifai.com>`__ | CLARIFAI\_API\_KEY | API key | 168ed02e137459ead66c3a661be7b784 |
+---------------------------------------+-----------------------------------------------------------------------------------------------------+--------------------------------------+--------------------------------+---------------------------------------+
| ClarifaiAPIVideoExtractor | `Clarifai video tagging API <https://clarifai.com>`__ | CLARIFAI\_API\_KEY | API key | 168ed02e137459ead66c3a661be7b784 |
| ClarifaiAPIExtractor | `Clarifai recognition API <https://clarifai.com>`__ | CLARIFAI\_ACCESS_TOKEN | Personal access token | 168ed02e137459ead66c3a661be7b784 |
| (and subclasses | | CLARIFAI\_USER\_ID | User name | user_name |
| e.g ClarifaiAPIImageExtractor) | | CLARIFAI\_APP\_ID | Application name | my_application |
+---------------------------------------+-----------------------------------------------------------------------------------------------------+--------------------------------------+--------------------------------+---------------------------------------+
| RevAISpeechAPIConverter | `Rev.ai speech-to-text API <https://rev.ai>`__ | REVAI\_ACCESS\_TOKEN | API key | 686n83674ab3989d2f5e4aa0aec9f273 |
+---------------------------------------+-----------------------------------------------------------------------------------------------------+--------------------------------------+--------------------------------+---------------------------------------+
Expand Down
2 changes: 1 addition & 1 deletion optional-dependencies.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
clarifai
clarifai-grpc
duecredit
face_recognition
python-twitter
Expand Down
171 changes: 109 additions & 62 deletions pliers/extractors/api/clarifai.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,10 @@
from pliers.utils import listify, attempt_to_import, verify_dependencies


clarifai_client = attempt_to_import('clarifai.rest.client', 'clarifai_client',
['ClarifaiApp',
'Concept',
'ModelOutputConfig',
'ModelOutputInfo',
'Image',
'Video'])
clarifai_channel = attempt_to_import('clarifai_grpc.channel.clarifai_channel', 'clarifai_channel',
['ClarifaiChannel'])

clarifai_api = attempt_to_import('clarifai_grpc.grpc.api', 'clarifai_api', ['resources_pb2, service_pb2, service_pb2_grpc'])

class ClarifaiAPIExtractor(APITransformer):

Expand All @@ -44,60 +40,88 @@ class ClarifaiAPIExtractor(APITransformer):
transform calls on this Transformer.
'''

_log_attributes = ('api_key', 'model', 'model_name', 'min_value',
_log_attributes = ('access_token', 'model_name', 'min_value',
'max_concepts', 'select_concepts')
_env_keys = ('CLARIFAI_API_KEY',)
_env_keys = ('CLARIFAI_ACCESS_TOKEN', 'CLARIFAI_APP_ID', 'CLARIFAI_USER_ID')
VERSION = '1.0'

def __init__(self, api_key=None, model='general-v1.3', min_value=None,
def __init__(self, access_token=None, user_id=None, app_id=None, model='general-image-recognition', min_value=None,
max_concepts=None, select_concepts=None, rate_limit=None,
batch_size=None):
verify_dependencies(['clarifai_client'])
if api_key is None:
verify_dependencies(['clarifai_channel', 'clarifai_api'])
if access_token is None:
try:
api_key = os.environ['CLARIFAI_API_KEY']
access_token = os.environ['CLARIFAI_ACCESS_TOKEN']
except KeyError:
raise ValueError("A valid Clarifai API API_KEY "
raise ValueError("A valid Clarifai API ACCESS_TOKEN "
"must be passed the first time a Clarifai "
"extractor is initialized.")

self.api_key = api_key
try:
self.api = clarifai_client.ClarifaiApp(api_key=api_key)
self.model = self.api.models.get(model)
except clarifai_client.ApiError as e:
logging.warning(str(e))
self.api = None
self.model = None
if user_id is None:
try:
user_id = os.environ['CLARIFAI_USER_ID']
except KeyError:
raise ValueError("A valid Clarifai API CLARIFAI_USER_ID "
"must be passed the first time a Clarifai "
"extractor is initialized.")

if app_id is None:
try:
app_id = os.environ['CLARIFAI_APP_ID']
except KeyError:
raise ValueError("A valid Clarifai API CLARIFAI_APP_ID "
"must be passed the first time a Clarifai "
"extractor is initialized.")

self.access_token = access_token
self.api = clarifai_api.service_pb2_grpc.V2Stub(clarifai_channel.ClarifaiChannel.get_grpc_channel())
self.metadata = (('authorization', 'Key ' + access_token),)
self.user_id= user_id
self.app_id = app_id

self.model_name = model
self.min_value = min_value
self.application_id = None
self.min_value = min_value #NA
self.max_concepts = max_concepts
self.select_concepts = select_concepts
if select_concepts:
select_concepts = listify(select_concepts)
self.select_concepts = [clarifai_client.Concept(concept_name=n)
self.select_concepts = [clarifai_api.resources_pb2.Concept(name=n)
for n in select_concepts]
super().__init__(rate_limit=rate_limit)

@property
def api_keys(self):
return [self.api_key]
return [self.access_token]

def check_valid_keys(self):
return self.api is not None
return None

def _query_api(self, objects):
verify_dependencies(['clarifai_client'])
moc = clarifai_client.ModelOutputConfig(min_value=self.min_value,
max_concepts=self.max_concepts,
select_concepts=self.select_concepts)
model_output_info = clarifai_client.ModelOutputInfo(output_config=moc)
tags = self.model.predict(objects, model_output_info=model_output_info)
return tags['outputs']
verify_dependencies(['clarifai_api'])
model_options = None
if self.select_concepts or self.max_concepts or self.min_value:
model_options = clarifai_api.resources_pb2.Model(
output_info=clarifai_api.resources_pb2.OutputInfo(
output_config=clarifai_api.resources_pb2.OutputConfig(
select_concepts=self.select_concepts,
min_value = self.min_value,
max_concepts = self.max_concepts
)
)
)
request = clarifai_api.service_pb2.PostModelOutputsRequest(
model_id=self.model_name,
user_app_id=clarifai_api.resources_pb2.UserAppIDSet(user_id=self.user_id, app_id=self.app_id),
inputs=objects,
model=model_options
)
response = self.api.PostModelOutputs(request, metadata=self.metadata)
return response.outputs

def _parse_annotations(self, annotation, handle_annotations=None):
"""
Parse outputs from a clarifai face extraction.
Parse outputs from a clarifai extraction.
Args:
handle_annotations (str): How returned face annotations should be
Expand All @@ -106,26 +130,30 @@ def _parse_annotations(self, annotation, handle_annotations=None):
other values will default to including every face.
"""
# check whether the model is the face detection model
if self.model_name == 'face':
if self.model_name == 'face-detection':

# if a face was detected, get at least the boundaries
if annotation['data']:
if annotation.data:
# if specified, only return first face
if handle_annotations == 'first':
annotation = [annotation['data']['region'][0]]
annotation = [annotation.data.regions[0]]
# else collate all faces into a multi-row dataframe
face_results = []
for i, d in enumerate(annotation['data']['regions']):
for i, d in enumerate(annotation.data.regions):
data_dict = {}
for k, v in d['region_info']['bounding_box'].items():
data_dict[k] = v
for k, v in d.region_info.bounding_box.ListFields():
data_dict[k.name] = v

for tag in d.data.concepts:
data_dict[tag.name] = tag.value

face_results.append(data_dict)
return face_results
# return an empty dict if there was no face
else:
data_dict = {}
for tag in annotation['data']['concepts']:
data_dict[tag['name']] = tag['value']
for tag in annotation.data.concepts:
data_dict[tag.name] = tag.value
return data_dict


Expand All @@ -152,29 +180,42 @@ class ClarifaiAPIImageExtractor(ClarifaiAPIExtractor, BatchTransformerMixin,

_batch_size = 32

def __init__(self, api_key=None, model='general-v1.3', min_value=None,
def __init__(self, access_token=None, user_id=None, app_id=None, model='general-image-recognition', min_value=None,
max_concepts=None, select_concepts=None, rate_limit=None,
batch_size=None):
super().__init__(api_key=api_key,
model=model,
min_value=min_value,
max_concepts=max_concepts,
select_concepts=select_concepts,
rate_limit=rate_limit,
batch_size=batch_size)
super().__init__(access_token=access_token,
user_id=user_id,
app_id=app_id,
model=model,
min_value=min_value,
max_concepts=max_concepts,
select_concepts=select_concepts,
rate_limit=rate_limit,
batch_size=batch_size)

def _extract(self, stims):
verify_dependencies(['clarifai_client'])
verify_dependencies(['clarifai_api'])

# ExitStack lets us use filename context managers simultaneously
with ExitStack() as stack:
imgs = []
for s in stims:
if s.url:
imgs.append(clarifai_client.Image(url=s.url))
image=clarifai_api.resources_pb2.Image(url=s.url)

else:
f = stack.enter_context(s.get_filename())
imgs.append(clarifai_client.Image(filename=f))
f_name = stack.enter_context(s.get_filename())
with open(f_name, "rb") as f:
file_bytes = f.read()
image = clarifai_api.resources_pb2.Image(
base64=file_bytes
)
image = clarifai_api.resources_pb2.Input(
data=clarifai_api.resources_pb2.Data(
image=image
)
)
imgs.append(image)
outputs = self._query_api(imgs)

extractions = []
Expand All @@ -183,7 +224,7 @@ def _extract(self, stims):
return extractions

def _to_df(self, result):
if self.model_name == 'face':
if self.model_name == 'face-detection':
# is a list already, no need to wrap it in one
return pd.DataFrame(self._parse_annotations(result._data))
return pd.DataFrame([self._parse_annotations(result._data)])
Expand All @@ -209,29 +250,35 @@ class ClarifaiAPIVideoExtractor(ClarifaiAPIExtractor, VideoExtractor):
'''

def _extract(self, stim):
verify_dependencies(['clarifai_client'])
verify_dependencies(['clarifai_api'])
with stim.get_filename() as filename:
vids = [clarifai_client.Video(filename=filename)]
with open(filename, "rb") as f:
file_bytes = f.read()
vids = [clarifai_api.resources_pb2.Input(
data=clarifai_api.resources_pb2.Data(
video=clarifai_api.resources_pb2.Video(base64=file_bytes)
)
)]
outputs = self._query_api(vids)
return ExtractorResult(outputs, stim, self)

def _to_df(self, result):
onsets = []
durations = []
data = []
frames = result._data[0]['data']['frames']
frames = result._data[0].data.frames
for i, frame_res in enumerate(frames):
tmp_res = self._parse_annotations(frame_res)
# if we detect multiple faces, the parsed annotation can be multi-line
if type(tmp_res) == list:
for d in tmp_res:
data.append(d)
onset = frame_res['frame_info']['time'] / 1000.0
onset = frame_res.frame_info.time / 1000.0

if (i + 1) == len(frames):
end = result.stim.duration
else:
end = frames[i + 1]['frame_info']['time'] / 1000.0
end = frames[i + 1].frame_info.time / 1000.0
onsets.append(onset)
durations.append(max([end - onset, 0]))

Expand All @@ -241,12 +288,12 @@ def _to_df(self, result):
result.features = list(df.columns)
else:
data.append(tmp_res)
onset = frame_res['frame_info']['time'] / 1000.0
onset = frame_res.frame_info.time / 1000.0

if (i + 1) == len(frames):
end = result.stim.duration
else:
end = frames[i+1]['frame_info']['time'] / 1000.0
end = frames[i+1].frame_info.time / 1000.0
onsets.append(onset)
# NOTE: As of Clarifai API v2 and client library 2.6.1, the API
# returns more frames than it should—at least for some videos.
Expand Down
2 changes: 1 addition & 1 deletion pliers/tests/converters/api/test_wit_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ def test_WitTranscriptionConverter():
text = [elem.text for elem in out_stim]
assert 'today' in text or 'negotiations' in text

conv = WitTranscriptionConverter(api_key='nogood')
conv = WitTranscriptionConverter(access_token='nogood')
assert not conv.validate_keys()
Loading

0 comments on commit 0047547

Please sign in to comment.