Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not sensitive to temporary S3 credentials expiring #353

Closed
jrbourbeau opened this issue Nov 14, 2023 · 0 comments · Fixed by #354
Closed

Not sensitive to temporary S3 credentials expiring #353

jrbourbeau opened this issue Nov 14, 2023 · 0 comments · Fixed by #354
Labels
type: bug Something isn't working

Comments

@jrbourbeau
Copy link
Collaborator

I tried to download some granules in a Python session that had been up for a while (over an hour)

files = earthaccess.download(granules, "test_data")

and got this error due to the temporary S3 credentials that earthaccess was using being expired (full traceback further below):

PermissionError: The provided token has expired.
Full traceback:
---------------------------------------------------------------------------
ClientError                               Traceback (most recent call last)
File /opt/coiled/env/lib/python3.11/site-packages/s3fs/core.py:706, in S3FileSystem._lsdir(self, path, refresh, max_items, delimiter, prefix, versions)
    705 files = []
--> 706 async for c in self._iterdir(
    707     bucket,
    708     max_items=max_items,
    709     delimiter=delimiter,
    710     prefix=prefix,
    711     versions=versions,
    712 ):
    713     if c["type"] == "directory":

File /opt/coiled/env/lib/python3.11/site-packages/s3fs/core.py:756, in S3FileSystem._iterdir(self, bucket, max_items, delimiter, prefix, versions)
    749 it = pag.paginate(
    750     Bucket=bucket,
    751     Prefix=prefix,
   (...)
    754     **self.req_kw,
    755 )
--> 756 async for i in it:
    757     for l in i.get("CommonPrefixes", []):

File /opt/coiled/env/lib/python3.11/site-packages/aiobotocore/paginate.py:30, in AioPageIterator.__anext__(self)
     29 while True:
---> 30     response = await self._make_request(current_kwargs)
     31     parsed = self._extract_parsed_response(response)

File /opt/coiled/env/lib/python3.11/site-packages/aiobotocore/client.py:383, in AioBaseClient._make_api_call(self, operation_name, api_params)
    382     error_class = self.exceptions.from_code(error_code)
--> 383     raise error_class(parsed_response, operation_name)
    384 else:

ClientError: An error occurred (ExpiredToken) when calling the ListObjectsV2 operation: The provided token has expired.

The above exception was the direct cause of the following exception:

PermissionError                           Traceback (most recent call last)
Cell In[29], line 1
----> 1 files = earthaccess.download(granules, "test_data")
      2 print(f"{files = }")

File /opt/coiled/env/lib/python3.11/site-packages/earthaccess/api.py:176, in download(granules, local_path, provider, threads)
    174     granules = [granules]
    175 try:
--> 176     results = earthaccess.__store__.get(granules, local_path, provider, threads)
    177 except AttributeError as err:
    178     print(err)

File /opt/coiled/env/lib/python3.11/site-packages/earthaccess/store.py:463, in Store.get(self, granules, local_path, provider, threads)
    457     local_path = os.path.join(
    458         ".",
    459         "data",
    460         f"{datetime.datetime.today().strftime('%Y-%m-%d')}-{uuid4().hex[:6]}",
    461     )
    462 if len(granules):
--> 463     files = self._get(granules, local_path, provider, threads)
    464     return files
    465 else:

File /opt/coiled/env/lib/python3.11/site-packages/multimethod/__init__.py:315, in multimethod.__call__(self, *args, **kwargs)
    313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
    314 try:
--> 315     return func(*args, **kwargs)
    316 except TypeError as ex:
    317     raise DispatchError(f"Function {func.__code__}") from ex

File /opt/coiled/env/lib/python3.11/site-packages/earthaccess/store.py:564, in Store._get_granules(self, granules, local_path, provider, threads)
    562 # TODO: make this async
    563 for file in data_links:
--> 564     s3_fs.get(file, local_path)
    565     file_name = os.path.join(local_path, os.path.basename(file))
    566     print(f"Downloaded: {file_name}")

File /opt/coiled/env/lib/python3.11/site-packages/fsspec/asyn.py:118, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
    115 @functools.wraps(func)
    116 def wrapper(*args, **kwargs):
    117     self = obj or args[0]
--> 118     return sync(self.loop, func, *args, **kwargs)

File /opt/coiled/env/lib/python3.11/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
    101     raise FSTimeoutError from return_result
    102 elif isinstance(return_result, BaseException):
--> 103     raise return_result
    104 else:
    105     return return_result

File /opt/coiled/env/lib/python3.11/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
     54     coro = asyncio.wait_for(coro, timeout=timeout)
     55 try:
---> 56     result[0] = await coro
     57 except Exception as ex:
     58     result[0] = ex

File /opt/coiled/env/lib/python3.11/site-packages/fsspec/asyn.py:609, in AsyncFileSystem._get(self, rpath, lpath, recursive, callback, maxdepth, **kwargs)
    604 rpaths = await self._expand_path(
    605     rpath, recursive=recursive, maxdepth=maxdepth
    606 )
    607 if source_is_str and (not recursive or maxdepth is not None):
    608     # Non-recursive glob does not copy directories
--> 609     rpaths = [
    610         p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
    611     ]
    612     if not rpaths:
    613         return

File /opt/coiled/env/lib/python3.11/site-packages/fsspec/asyn.py:610, in <listcomp>(.0)
    604 rpaths = await self._expand_path(
    605     rpath, recursive=recursive, maxdepth=maxdepth
    606 )
    607 if source_is_str and (not recursive or maxdepth is not None):
    608     # Non-recursive glob does not copy directories
    609     rpaths = [
--> 610         p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
    611     ]
    612     if not rpaths:
    613         return

File /opt/coiled/env/lib/python3.11/site-packages/s3fs/core.py:1411, in S3FileSystem._isdir(self, path)
   1409 # This only returns things within the path and NOT the path object itself
   1410 try:
-> 1411     return bool(await self._lsdir(path))
   1412 except FileNotFoundError:
   1413     return False

File /opt/coiled/env/lib/python3.11/site-packages/s3fs/core.py:719, in S3FileSystem._lsdir(self, path, refresh, max_items, delimiter, prefix, versions)
    717     files += dirs
    718 except ClientError as e:
--> 719     raise translate_boto_error(e)
    721 if delimiter and files and not versions:
    722     self.dircache[path] = files

PermissionError: The provided token has expired.

IIUC this has to do with me adding this @lru_cache a while ago for performance reasons without thinking about how these credentials can expire:

@lru_cache

I'd still like to having caching around getting new S3 credentials since it can be pretty slow (several seconds) but the top-level @lru_cache we have now isn't the right approach to caching.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
type: bug Something isn't working
Projects
Status: Done
Development

Successfully merging a pull request may close this issue.

1 participant