From 3f4335729a7dc3bd457fd4c60a52e17cc954bdce Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Thu, 12 Dec 2024 13:58:35 -0600 Subject: [PATCH 1/2] Truncate (to a configurable limit) very large lock errors. Fixes #511. --- CHANGES.rst | 5 ++- src/relstorage/adapters/locker.py | 19 +++++---- src/relstorage/storage/tpc/__init__.py | 4 +- .../storage/tpc/temporary_storage.py | 4 ++ src/relstorage/storage/tpc/tests/test_vote.py | 19 +++++---- src/relstorage/storage/tpc/vote.py | 41 ++++++++++++++----- 6 files changed, 64 insertions(+), 28 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e9beb127..04cd56f0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,10 @@ 4.1.1 (unreleased) ================== -- Nothing changed yet. +- Make certain lock-related errors in very large transactions no + longer produce huge (unusable) error messages when logged or + printed. Now such messages are truncated. Previously, they were + allowed to grow without bounds. See :issue:`511` 4.1.0 (2024-10-11) diff --git a/src/relstorage/adapters/locker.py b/src/relstorage/adapters/locker.py index bca441a1..dc1fdb01 100644 --- a/src/relstorage/adapters/locker.py +++ b/src/relstorage/adapters/locker.py @@ -254,9 +254,10 @@ def _modify_commit_lock_kind(self, kind, exc): # pylint:disable=unused-argument return kind def reraise_commit_lock_error(self, cursor, lock_stmt, kind): - v = sys.exc_info()[1] - kind = self._modify_commit_lock_kind(kind, v) - if self.driver.exception_is_deadlock(v): + _current_exc_type, current_exc, current_tb = sys.exc_info() + + kind = self._modify_commit_lock_kind(kind, current_exc) + if self.driver.exception_is_deadlock(current_exc): kind = getattr(kind, 'DEADLOCK_VARIANT', UnableToLockRowsDeadlockError) try: @@ -270,13 +271,15 @@ def reraise_commit_lock_error(self, cursor, lock_stmt, kind): logger.debug("Failed to acquire commit lock:\n%s", debug_info) message = "Acquiring a lock during commit failed: %s%s" % ( - sys.exc_info()[1], + current_exc, '\n' + debug_info if debug_info else '(No debug info.)' ) val = kind(message) - val.__relstorage_cause__ = v - del v - raise val.with_traceback(sys.exc_info()[2]) + val.__relstorage_cause__ = current_exc + try: + raise val.with_traceback(current_tb) from current_exc + finally: + del current_exc, current_tb # MySQL allows aggregates in the top level to use FOR UPDATE, # but PostgreSQL does not, so we have to use the second form. @@ -342,6 +345,8 @@ def _get_commit_lock_debug_info(self, cursor, was_failure=False): that will be added to the exception message when a commit lock cannot be acquired. For example, it might list other connections that have conflicting locks. + + :rtype: str """ return '' diff --git a/src/relstorage/storage/tpc/__init__.py b/src/relstorage/storage/tpc/__init__.py index 6f6dc8c8..53d71d0c 100644 --- a/src/relstorage/storage/tpc/__init__.py +++ b/src/relstorage/storage/tpc/__init__.py @@ -133,8 +133,8 @@ def local_client(self): def store_connection(self): conn = self._storage._store_connection_pool.borrow() # Report on the connection we will use. - # https://github.com/zodb/relstorage/issues/460 - logger.info("Using store connection %s", conn) + # https://github.com/zodb/relstorage/issues/460 + logger.debug("Using store connection %s", conn) return conn @store_connection.aborter diff --git a/src/relstorage/storage/tpc/temporary_storage.py b/src/relstorage/storage/tpc/temporary_storage.py index 5576dd27..69160cb3 100644 --- a/src/relstorage/storage/tpc/temporary_storage.py +++ b/src/relstorage/storage/tpc/temporary_storage.py @@ -155,6 +155,10 @@ def __repr__(self): ) def __str__(self): + """ + The string of this object can get very, very long, if the transaction + modifies a lot of objects. + """ base = repr(self) if not self: return base diff --git a/src/relstorage/storage/tpc/tests/test_vote.py b/src/relstorage/storage/tpc/tests/test_vote.py index c46a22ce..ce9c927a 100644 --- a/src/relstorage/storage/tpc/tests/test_vote.py +++ b/src/relstorage/storage/tpc/tests/test_vote.py @@ -3,12 +3,9 @@ Tests for vote.py. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import unittest +from unittest.mock import patch as Patch from hamcrest import assert_that from nti.testing.matchers import verifiably_provides @@ -227,7 +224,7 @@ def _callFUT(self, ex, state, required_tids): add_details_to_lock_error(ex, state, required_tids) return ex - def _check(self, kind): + def _check(self, kind, truncated=False): ex = kind('MESSAGE') storage = MockStorage() storage.keep_history = False # pylint:disable=attribute-defined-outside-init @@ -237,9 +234,9 @@ def _check(self, kind): self._callFUT(ex, state, {1: 1}) s = str(ex) self.assertIn('readCurrent {oid: tid}', s) - self.assertIn('{1: 1}', s) - self.assertIn('Previous TID', s) - self.assertIn('42', s) + self.assertIn('{1: 1}' if not truncated else '{...', s) + self.assertIn('Previous TID' if not truncated else '<...', s) + self.assertIn('42' if not truncated else '', s) self.assertIn('MESSAGE', s) def test_add_details_to_UnableToAcquireCommitLockError(self): @@ -250,6 +247,12 @@ def test_add_details_to_UnableToLockRowsToModifyError(self): from relstorage.adapters.interfaces import UnableToLockRowsToModifyError self._check(UnableToLockRowsToModifyError) + def test_add_truncated_details_to_UnableToLockRowsToModifyError(self): + from relstorage.adapters.interfaces import UnableToLockRowsToModifyError + from .. import vote + with Patch.object(vote, 'DETAIL_TRUNCATION_LEN', new=1): + self._check(UnableToLockRowsToModifyError, truncated=True) + def test_add_details_to_UnableToLockRowsToReadCurrentError(self): from relstorage.adapters.interfaces import UnableToLockRowsToReadCurrentError self._check(UnableToLockRowsToReadCurrentError) diff --git a/src/relstorage/storage/tpc/vote.py b/src/relstorage/storage/tpc/vote.py index 820f2722..b92d07ac 100644 --- a/src/relstorage/storage/tpc/vote.py +++ b/src/relstorage/storage/tpc/vote.py @@ -18,11 +18,9 @@ temporary objects to the database, and moving them to their final locations, live here. """ -from __future__ import absolute_import -from __future__ import print_function import time -from logging import DEBUG +import logging from zope.interface import implementer @@ -37,6 +35,7 @@ from relstorage._util import do_log_duration_info from relstorage._util import TRACE from relstorage._util import METRIC_SAMPLE_RATE +from relstorage._util import get_positive_integer_from_environ from relstorage.options import COMMIT_EXIT_CRITICAL_SECTION_EARLY from relstorage.adapters.interfaces import UnableToAcquireLockError from ..interfaces import VoteReadConflictError @@ -46,9 +45,9 @@ from . import AbstractTPCStateDatabaseAvailable from .finish import Finish -LOG_LEVEL_TID = DEBUG +LOG_LEVEL_TID = logging.DEBUG -logger = __import__('logging').getLogger(__name__) +logger = logging.getLogger(__name__) perf_logger = logger.getChild('timing') class DatabaseLockedForTid(object): @@ -663,14 +662,36 @@ def loadSerial(self, oid, serial): assert bytes8_to_int64(serial) == tid return state +#: The maximum number of characters to allow in each portion +#: of lock detail errors. +DETAIL_TRUNCATION_LEN = get_positive_integer_from_environ( + 'RS_LOCK_ERROR_MAX_LEN', + # 2k seems a reasonable default. If you are actually getting + # truncated, then chances are the transaction is much larger + # than that + 2048, + logger=logger +) + def add_details_to_lock_error(ex, shared_state, required_tids): # type: (Exception, SharedState, required_tids) + + obj_msg = str(shared_state.temp_storage) if shared_state.has_temp_data() else 'None' + tid_msg = str(dict(required_tids)) # May be a BTree, which has no useful str/repr + + obj_msg = (obj_msg[:DETAIL_TRUNCATION_LEN] + '...' + if len(obj_msg) > DETAIL_TRUNCATION_LEN + else obj_msg) + tid_msg = (tid_msg[:DETAIL_TRUNCATION_LEN] + '...' + if len(tid_msg) > DETAIL_TRUNCATION_LEN + else tid_msg) + message = '\n'.join(( - 'Stored Objects', - str(shared_state.temp_storage) if shared_state.has_temp_data() else 'None', - 'readCurrent {oid: tid}', - str(dict(required_tids)) # May be a BTree, which has no - )) + 'Stored Objects', + obj_msg, + 'readCurrent {oid: tid}', + tid_msg, + )) if hasattr(ex, 'message'): # A ConflictError subclass *or* we're on Python 2. From 3bb954a67b8a642faf4bc66fad2c502197f34227 Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Thu, 12 Dec 2024 14:40:28 -0600 Subject: [PATCH 2/2] make-manylinux: Strip out the CCache stuff; it appears to be fragile. See https://github.com/zodb/relstorage/actions/runs/12303833192/job/34340261204 --- scripts/releases/make-manylinux | 62 ++++----------------------------- 1 file changed, 7 insertions(+), 55 deletions(-) diff --git a/scripts/releases/make-manylinux b/scripts/releases/make-manylinux index baf8deca..ea001a3d 100755 --- a/scripts/releases/make-manylinux +++ b/scripts/releases/make-manylinux @@ -8,10 +8,7 @@ export PYTHONDONTWRITEBYTECODE=1 # Use a fixed hash seed for reproducability export PYTHONHASHSEED=8675309 -# Disable tests that use external network resources; -# too often we get failures to resolve DNS names or failures -# to connect on AppVeyor. -export GEVENTTEST_USE_RESOURCES="-network" + export CI=1 export TRAVIS=true export GEVENT_MANYLINUX=1 @@ -21,49 +18,19 @@ export PIP_NO_PYTHON_VERSION_WARNING=1 export PIP_NO_WARN_SCRIPT_LOCATION=1 # Build configuration. - -export CC="ccache $(which cc)" -export CXX="ccache $(which c++)" -export LDCXXSHARED="ccache $(which c++) -shared" -export LDSHARED="ccache $(which cc) -shared" -export CCACHE_NOCPP2=true -export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_ctime,include_file_mtime -export CCACHE_NOHASHDIR=true -export CCACHE_BASEDIR="/RelStorage" export CFLAGS="-pipe -O3" export CXXFLAGS="-pipe -O3" export BUILD_LIBS=$HOME/.libs -# Share the ccache directory -export CCACHE_DIR="/ccache" -# Disable some warnings produced by libev especially and also some Cython generated code. -# Note that changing the value of these variables invalidates configure caches -SLOW_BUILD='' -# Some images/archs (specificaly 2014_aarch64) don't have ccache; -# This also seems to have vanished for manylinux_2010 x64/64 after November 30 -# 2020 when the OS went EOL and the package repos switched to the "vault" -NEED_CCACHE=1 -GEVENT_WARNFLAGS="-Wno-strict-aliasing -Wno-comment -Wno-unused-value -Wno-unused-but-set-variable -Wno-sign-compare -Wno-parentheses -Wno-unused-function -Wno-tautological-compare -Wno-strict-prototypes -Wno-return-type -Wno-misleading-indentation" -OPTIMIZATION_FLAGS="-pipe" -if [ -n "$GITHUB_ACTIONS" ]; then - if [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_aarch64" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_ppc64le" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_s390x" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/musllinux_1_1_aarch64" ] ; then - # Compiling with -Ofast is a no-go because of the regression it causes (#1864). - # The default settings have -O3, and adding -Os doesn't help much. So maybe -O1 will. - echo "Compiling with -O1" - OPTIMIZATION_FLAGS="-pipe -O1" - SLOW_BUILD=1 - NEED_CCACHE=1 - fi -else - echo "Compiling with -O3" - OPTIMIZATION_FLAGS="-pipe -O3" - NEED_CCACHE=1 -fi +WARNFLAGS="-Wno-strict-aliasing -Wno-comment -Wno-unused-value -Wno-unused-but-set-variable -Wno-sign-compare -Wno-parentheses -Wno-unused-function -Wno-tautological-compare -Wno-return-type -Wno-misleading-indentation" +OPTIMIZATION_FLAGS="-pipe -O3" + -export CFLAGS="$OPTIMIZATION_FLAGS $GEVENT_WARNFLAGS" + +export CFLAGS="$OPTIMIZATION_FLAGS $WARNFLAGS" export CXXFLAGS="$CFLAGS" -if [ -d /RelStorage -a -d /opt/python ]; then +if [ -d /RelStorage ] && [ -d /opt/python ]; then # Running inside docker # Set a cache directory for pip. This was @@ -82,21 +49,6 @@ if [ -d /RelStorage -a -d /opt/python ]; then mkdir -p /cache/pip ls -ld /cache/pip - if [ -e /usr/bin/yum ]; then - # manylinux - - if [ -n "$NEED_CCACHE" ] ; then - # This provides access to ccache for the 2014 image; it is already installed in the - # s390x image. - echo Installing epel - rpm -Uvh https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-14.noarch.rpm || true - fi - yum -y install ccache || export CC=gcc CXX=c++ LDSHARED="gcc -shared" LDCXXSHARED="gcc -shared" - fi - if [ -e /sbin/apk ]; then - # musllinux - apk add ccache - fi # Ahh, overprotective security. Disable it. echo "Fixing git's paranoia"