boto · jonemo · May 9, 2023 · May 5, 2023 · May 9, 2023 · May 9, 2023
diff --git a/.changes/next-release/enhancement-enpoints-71343.json b/.changes/next-release/enhancement-enpoints-71343.json
@@ -0,0 +1,5 @@
+{
+  "type": "enhancement",
+  "category": "endpoints",
+  "description": "Fix cache implementation to reduce memory consumption."
+}
diff --git a/botocore/endpoint_provider.py b/botocore/endpoint_provider.py
@@ -24,7 +24,6 @@
 import logging
 import re
 from enum import Enum
-from functools import lru_cache
 from string import Formatter
 from typing import NamedTuple
 
@@ -36,6 +35,7 @@
     InvalidArnException,
     is_valid_ipv4_endpoint_url,
     is_valid_ipv6_endpoint_url,
+    lru_cache_weakref,
     normalize_url_path,
     percent_encode,
 )
@@ -708,7 +708,7 @@ class EndpointProvider:
     def __init__(self, ruleset_data, partition_data):
         self.ruleset = RuleSet(**ruleset_data, partitions=partition_data)
 
-    @lru_cache(maxsize=CACHE_SIZE)
+    @lru_cache_weakref(maxsize=CACHE_SIZE)
     def resolve_endpoint(self, **input_parameters):
         """Match input parameters to a rule.
 

diff --git a/botocore/utils.py b/botocore/utils.py
@@ -1425,6 +1425,34 @@ def _cache_guard(self, *args, **kwargs):
     return _cache_guard
 
 
+def lru_cache_weakref(*cache_args, **cache_kwargs):
+    """
+    Version of functools.lru_cache that stores a weak reference to ``self``.
+
+    Serves the same purpose as :py:func:`instance_cache` but uses Python's
+    functools implementation which offers ``max_size`` and ``typed`` properties.
+
+    lru_cache is a global cache even when used on a method. The cache's
+    reference to ``self`` will prevent garbace collection of the object. This
+    wrapper around functools.lru_cache replaces the reference to ``self`` with
+    a weak reference to not interfere with garbage collection.
+    """
+
+    def wrapper(func):
+        @functools.lru_cache(*cache_args, **cache_kwargs)
+        def func_with_weakref(weakref_to_self, *args, **kwargs):
+            return func(weakref_to_self(), *args, **kwargs)
+
+        @functools.wraps(func)
+        def inner(self, *args, **kwargs):
+            return func_with_weakref(weakref.ref(self), *args, **kwargs)
+
+        inner.cache_info = func_with_weakref.cache_info
+        return inner
+
+    return wrapper
+
+
 def switch_host_s3_accelerate(request, operation_name, **kwargs):
     """Switches the current s3 endpoint with an S3 Accelerate endpoint"""
 

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
@@ -14,6 +14,7 @@
 import datetime
 import io
 import operator
+from sys import getrefcount
 
 import pytest
 from dateutil.tz import tzoffset, tzutc
@@ -78,6 +79,7 @@
     is_valid_ipv6_endpoint_url,
     is_valid_uri,
     lowercase_dict,
+    lru_cache_weakref,
     merge_dicts,
     normalize_url_path,
     parse_key_val_file,
@@ -3388,3 +3390,40 @@ def test_is_s3_accelerate_url(url, expected):
 def test_get_encoding_from_headers(headers, default, expected):
     charset = get_encoding_from_headers(HeadersDict(headers), default=default)
     assert charset == expected
+
+
+def test_lru_cache_weakref():
+    class ClassWithCachedMethod:
+        @lru_cache_weakref(maxsize=10)
+        def cached_fn(self, a, b):
+            return a + b
+
+    cls1 = ClassWithCachedMethod()
+    cls2 = ClassWithCachedMethod()
+
+    assert cls1.cached_fn.cache_info().currsize == 0
+    assert getrefcount(cls1) == 2
+    assert getrefcount(cls2) == 2
+    # "The count returned is generally one higher than you might expect, because
+    # it includes the (temporary) reference as an argument to getrefcount()."
+    # https://docs.python.org/3.8/library/sys.html#getrefcount
+
+    cls1.cached_fn(1, 1)
+    cls2.cached_fn(1, 1)
+
+    # The cache now has two entries, but the reference count remains the same as
+    # before.
+    assert cls1.cached_fn.cache_info().currsize == 2
+    assert getrefcount(cls1) == 2
+    assert getrefcount(cls2) == 2
+
+    # Deleting one of the objects does not interfere with the cache entries
+    # related to the other object.
+    del cls1
+    assert cls2.cached_fn.cache_info().currsize == 2
+    assert cls2.cached_fn.cache_info().hits == 0
+    assert cls2.cached_fn.cache_info().misses == 2
+    cls2.cached_fn(1, 1)
+    assert cls2.cached_fn.cache_info().currsize == 2
+    assert cls2.cached_fn.cache_info().hits == 1  # the call was a cache hit
+    assert cls2.cached_fn.cache_info().misses == 2