From 5c44af814857e1d652303d78fb52078f73747894 Mon Sep 17 00:00:00 2001 From: Jonas Neubert Date: Thu, 4 May 2023 23:04:12 -0600 Subject: [PATCH 1/3] lru_cache_weakref for resolve_endpoint() --- botocore/endpoint_provider.py | 4 ++-- botocore/utils.py | 28 +++++++++++++++++++++++++ tests/unit/test_utils.py | 39 +++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/botocore/endpoint_provider.py b/botocore/endpoint_provider.py index d4c1ae4fd4..be927719b6 100644 --- a/botocore/endpoint_provider.py +++ b/botocore/endpoint_provider.py @@ -24,7 +24,6 @@ import logging import re from enum import Enum -from functools import lru_cache from string import Formatter from typing import NamedTuple @@ -36,6 +35,7 @@ InvalidArnException, is_valid_ipv4_endpoint_url, is_valid_ipv6_endpoint_url, + lru_cache_weakref, normalize_url_path, percent_encode, ) @@ -708,7 +708,7 @@ class EndpointProvider: def __init__(self, ruleset_data, partition_data): self.ruleset = RuleSet(**ruleset_data, partitions=partition_data) - @lru_cache(maxsize=CACHE_SIZE) + @lru_cache_weakref(maxsize=CACHE_SIZE) def resolve_endpoint(self, **input_parameters): """Match input parameters to a rule. diff --git a/botocore/utils.py b/botocore/utils.py index fbc4963ccc..86c842774e 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -1425,6 +1425,34 @@ def _cache_guard(self, *args, **kwargs): return _cache_guard +def lru_cache_weakref(*cache_args, **cache_kwargs): + """ + Version of functools.lru_cache that stores a weak reference to ``self``. + + Serves the same purpose as :py:func:`instance_cache` but uses Python's + functools implementation which offers ``max_size`` and ``typed`` properties. + + lru_cache is a global cache even when used on a method. The cache's + reference to ``self`` will prevent garbace collection of the object. This + wrapper around functools.lru_cache replaces the reference to ``self`` with + a weak reference to not interfere with garbage collection. + """ + + def wrapper(func): + @functools.lru_cache(*cache_args, **cache_kwargs) + def func_with_weakref(weakref_to_self, *args, **kwargs): + return func(weakref_to_self(), *args, **kwargs) + + @functools.wraps(func) + def inner(self, *args, **kwargs): + return func_with_weakref(weakref.ref(self), *args, **kwargs) + + inner.cache_info = func_with_weakref.cache_info + return inner + + return wrapper + + def switch_host_s3_accelerate(request, operation_name, **kwargs): """Switches the current s3 endpoint with an S3 Accelerate endpoint""" diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ae253d0397..7cceaa8ec5 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -14,6 +14,7 @@ import datetime import io import operator +from sys import getrefcount import pytest from dateutil.tz import tzoffset, tzutc @@ -78,6 +79,7 @@ is_valid_ipv6_endpoint_url, is_valid_uri, lowercase_dict, + lru_cache_weakref, merge_dicts, normalize_url_path, parse_key_val_file, @@ -3388,3 +3390,40 @@ def test_is_s3_accelerate_url(url, expected): def test_get_encoding_from_headers(headers, default, expected): charset = get_encoding_from_headers(HeadersDict(headers), default=default) assert charset == expected + + +def test_lru_cache_weakref(): + class ClassWithCachedMethod: + @lru_cache_weakref(maxsize=10) + def cached_fn(self, a, b): + return a + b + + cls1 = ClassWithCachedMethod() + cls2 = ClassWithCachedMethod() + + assert cls1.cached_fn.cache_info().currsize == 0 + assert getrefcount(cls1) == 2 + assert getrefcount(cls2) == 2 + # "The count returned is generally one higher than you might expect, because + # it includes the (temporary) reference as an argument to getrefcount()." + # https://docs.python.org/3.8/library/sys.html#getrefcount + + cls1.cached_fn(1, 1) + cls2.cached_fn(1, 1) + + # The cache now has two entries, but the reference count remains the same as + # before. + assert cls1.cached_fn.cache_info().currsize == 2 + assert getrefcount(cls1) == 2 + assert getrefcount(cls2) == 2 + + # Deleting one of the objects does not interfere with the cache entries + # related to the other object. + del cls1 + assert cls2.cached_fn.cache_info().currsize == 2 + assert cls2.cached_fn.cache_info().hits == 0 + assert cls2.cached_fn.cache_info().misses == 2 + cls2.cached_fn(1, 1) + assert cls2.cached_fn.cache_info().currsize == 2 + assert cls2.cached_fn.cache_info().hits == 1 # the call was a cache hit + assert cls2.cached_fn.cache_info().misses == 2 From e7a2593403f98a318e0b9273cd596efd62bee64b Mon Sep 17 00:00:00 2001 From: Jonas Neubert Date: Tue, 9 May 2023 11:13:14 -0600 Subject: [PATCH 2/3] changelog --- .changes/next-release/enhancement-enpoints-71343.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changes/next-release/enhancement-enpoints-71343.json diff --git a/.changes/next-release/enhancement-enpoints-71343.json b/.changes/next-release/enhancement-enpoints-71343.json new file mode 100644 index 0000000000..57c72d1ceb --- /dev/null +++ b/.changes/next-release/enhancement-enpoints-71343.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "enpoints", + "description": "Fix cache implementation to reduce memory consumption." +} From 9b752894bc21d37771101a28791f0e474fa3b8ad Mon Sep 17 00:00:00 2001 From: Jonas Neubert Date: Tue, 9 May 2023 11:14:18 -0600 Subject: [PATCH 3/3] changelog --- .changes/next-release/enhancement-enpoints-71343.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/next-release/enhancement-enpoints-71343.json b/.changes/next-release/enhancement-enpoints-71343.json index 57c72d1ceb..e59ca0e5c3 100644 --- a/.changes/next-release/enhancement-enpoints-71343.json +++ b/.changes/next-release/enhancement-enpoints-71343.json @@ -1,5 +1,5 @@ { "type": "enhancement", - "category": "enpoints", + "category": "endpoints", "description": "Fix cache implementation to reduce memory consumption." }