Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate (to a configurable limit) very large lock errors #512

Merged
merged 2 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
4.1.1 (unreleased)
==================

- Nothing changed yet.
- Make certain lock-related errors in very large transactions no
longer produce huge (unusable) error messages when logged or
printed. Now such messages are truncated. Previously, they were
allowed to grow without bounds. See :issue:`511`


4.1.0 (2024-10-11)
Expand Down
62 changes: 7 additions & 55 deletions scripts/releases/make-manylinux
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ export PYTHONDONTWRITEBYTECODE=1

# Use a fixed hash seed for reproducability
export PYTHONHASHSEED=8675309
# Disable tests that use external network resources;
# too often we get failures to resolve DNS names or failures
# to connect on AppVeyor.
export GEVENTTEST_USE_RESOURCES="-network"

export CI=1
export TRAVIS=true
export GEVENT_MANYLINUX=1
Expand All @@ -21,49 +18,19 @@ export PIP_NO_PYTHON_VERSION_WARNING=1
export PIP_NO_WARN_SCRIPT_LOCATION=1

# Build configuration.

export CC="ccache $(which cc)"
export CXX="ccache $(which c++)"
export LDCXXSHARED="ccache $(which c++) -shared"
export LDSHARED="ccache $(which cc) -shared"
export CCACHE_NOCPP2=true
export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_ctime,include_file_mtime
export CCACHE_NOHASHDIR=true
export CCACHE_BASEDIR="/RelStorage"
export CFLAGS="-pipe -O3"
export CXXFLAGS="-pipe -O3"
export BUILD_LIBS=$HOME/.libs
# Share the ccache directory
export CCACHE_DIR="/ccache"

# Disable some warnings produced by libev especially and also some Cython generated code.
# Note that changing the value of these variables invalidates configure caches
SLOW_BUILD=''
# Some images/archs (specificaly 2014_aarch64) don't have ccache;
# This also seems to have vanished for manylinux_2010 x64/64 after November 30
# 2020 when the OS went EOL and the package repos switched to the "vault"
NEED_CCACHE=1
GEVENT_WARNFLAGS="-Wno-strict-aliasing -Wno-comment -Wno-unused-value -Wno-unused-but-set-variable -Wno-sign-compare -Wno-parentheses -Wno-unused-function -Wno-tautological-compare -Wno-strict-prototypes -Wno-return-type -Wno-misleading-indentation"
OPTIMIZATION_FLAGS="-pipe"
if [ -n "$GITHUB_ACTIONS" ]; then
if [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_aarch64" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_ppc64le" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/manylinux2014_s390x" ] || [ "$DOCKER_IMAGE" == "quay.io/pypa/musllinux_1_1_aarch64" ] ; then
# Compiling with -Ofast is a no-go because of the regression it causes (#1864).
# The default settings have -O3, and adding -Os doesn't help much. So maybe -O1 will.
echo "Compiling with -O1"
OPTIMIZATION_FLAGS="-pipe -O1"
SLOW_BUILD=1
NEED_CCACHE=1
fi
else
echo "Compiling with -O3"
OPTIMIZATION_FLAGS="-pipe -O3"
NEED_CCACHE=1
fi
WARNFLAGS="-Wno-strict-aliasing -Wno-comment -Wno-unused-value -Wno-unused-but-set-variable -Wno-sign-compare -Wno-parentheses -Wno-unused-function -Wno-tautological-compare -Wno-return-type -Wno-misleading-indentation"
OPTIMIZATION_FLAGS="-pipe -O3"


export CFLAGS="$OPTIMIZATION_FLAGS $GEVENT_WARNFLAGS"

export CFLAGS="$OPTIMIZATION_FLAGS $WARNFLAGS"
export CXXFLAGS="$CFLAGS"

if [ -d /RelStorage -a -d /opt/python ]; then
if [ -d /RelStorage ] && [ -d /opt/python ]; then
# Running inside docker

# Set a cache directory for pip. This was
Expand All @@ -82,21 +49,6 @@ if [ -d /RelStorage -a -d /opt/python ]; then
mkdir -p /cache/pip
ls -ld /cache/pip

if [ -e /usr/bin/yum ]; then
# manylinux

if [ -n "$NEED_CCACHE" ] ; then
# This provides access to ccache for the 2014 image; it is already installed in the
# s390x image.
echo Installing epel
rpm -Uvh https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-14.noarch.rpm || true
fi
yum -y install ccache || export CC=gcc CXX=c++ LDSHARED="gcc -shared" LDCXXSHARED="gcc -shared"
fi
if [ -e /sbin/apk ]; then
# musllinux
apk add ccache
fi

# Ahh, overprotective security. Disable it.
echo "Fixing git's paranoia"
Expand Down
19 changes: 12 additions & 7 deletions src/relstorage/adapters/locker.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,10 @@ def _modify_commit_lock_kind(self, kind, exc): # pylint:disable=unused-argument
return kind

def reraise_commit_lock_error(self, cursor, lock_stmt, kind):
v = sys.exc_info()[1]
kind = self._modify_commit_lock_kind(kind, v)
if self.driver.exception_is_deadlock(v):
_current_exc_type, current_exc, current_tb = sys.exc_info()

kind = self._modify_commit_lock_kind(kind, current_exc)
if self.driver.exception_is_deadlock(current_exc):
kind = getattr(kind, 'DEADLOCK_VARIANT', UnableToLockRowsDeadlockError)

try:
Expand All @@ -270,13 +271,15 @@ def reraise_commit_lock_error(self, cursor, lock_stmt, kind):
logger.debug("Failed to acquire commit lock:\n%s", debug_info)

message = "Acquiring a lock during commit failed: %s%s" % (
sys.exc_info()[1],
current_exc,
'\n' + debug_info if debug_info else '(No debug info.)'
)
val = kind(message)
val.__relstorage_cause__ = v
del v
raise val.with_traceback(sys.exc_info()[2])
val.__relstorage_cause__ = current_exc
try:
raise val.with_traceback(current_tb) from current_exc
finally:
del current_exc, current_tb

# MySQL allows aggregates in the top level to use FOR UPDATE,
# but PostgreSQL does not, so we have to use the second form.
Expand Down Expand Up @@ -342,6 +345,8 @@ def _get_commit_lock_debug_info(self, cursor, was_failure=False):
that will be added to the exception message when a commit lock cannot
be acquired. For example, it might list other connections that
have conflicting locks.

:rtype: str
"""
return ''

Expand Down
4 changes: 2 additions & 2 deletions src/relstorage/storage/tpc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def local_client(self):
def store_connection(self):
conn = self._storage._store_connection_pool.borrow()
# Report on the connection we will use.
# https://github.com/zodb/relstorage/issues/460
logger.info("Using store connection %s", conn)
# https://github.com/zodb/relstorage/issues/460
logger.debug("Using store connection %s", conn)
return conn

@store_connection.aborter
Expand Down
4 changes: 4 additions & 0 deletions src/relstorage/storage/tpc/temporary_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ def __repr__(self):
)

def __str__(self):
"""
The string of this object can get very, very long, if the transaction
modifies a lot of objects.
"""
base = repr(self)
if not self:
return base
Expand Down
19 changes: 11 additions & 8 deletions src/relstorage/storage/tpc/tests/test_vote.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
Tests for vote.py.

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import unittest
from unittest.mock import patch as Patch

from hamcrest import assert_that
from nti.testing.matchers import verifiably_provides
Expand Down Expand Up @@ -227,7 +224,7 @@ def _callFUT(self, ex, state, required_tids):
add_details_to_lock_error(ex, state, required_tids)
return ex

def _check(self, kind):
def _check(self, kind, truncated=False):
ex = kind('MESSAGE')
storage = MockStorage()
storage.keep_history = False # pylint:disable=attribute-defined-outside-init
Expand All @@ -237,9 +234,9 @@ def _check(self, kind):
self._callFUT(ex, state, {1: 1})
s = str(ex)
self.assertIn('readCurrent {oid: tid}', s)
self.assertIn('{1: 1}', s)
self.assertIn('Previous TID', s)
self.assertIn('42', s)
self.assertIn('{1: 1}' if not truncated else '{...', s)
self.assertIn('Previous TID' if not truncated else '<...', s)
self.assertIn('42' if not truncated else '', s)
self.assertIn('MESSAGE', s)

def test_add_details_to_UnableToAcquireCommitLockError(self):
Expand All @@ -250,6 +247,12 @@ def test_add_details_to_UnableToLockRowsToModifyError(self):
from relstorage.adapters.interfaces import UnableToLockRowsToModifyError
self._check(UnableToLockRowsToModifyError)

def test_add_truncated_details_to_UnableToLockRowsToModifyError(self):
from relstorage.adapters.interfaces import UnableToLockRowsToModifyError
from .. import vote
with Patch.object(vote, 'DETAIL_TRUNCATION_LEN', new=1):
self._check(UnableToLockRowsToModifyError, truncated=True)

def test_add_details_to_UnableToLockRowsToReadCurrentError(self):
from relstorage.adapters.interfaces import UnableToLockRowsToReadCurrentError
self._check(UnableToLockRowsToReadCurrentError)
41 changes: 31 additions & 10 deletions src/relstorage/storage/tpc/vote.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@
temporary objects to the database, and moving them to their final locations,
live here.
"""
from __future__ import absolute_import
from __future__ import print_function

import time
from logging import DEBUG
import logging

from zope.interface import implementer

Expand All @@ -37,6 +35,7 @@
from relstorage._util import do_log_duration_info
from relstorage._util import TRACE
from relstorage._util import METRIC_SAMPLE_RATE
from relstorage._util import get_positive_integer_from_environ
from relstorage.options import COMMIT_EXIT_CRITICAL_SECTION_EARLY
from relstorage.adapters.interfaces import UnableToAcquireLockError
from ..interfaces import VoteReadConflictError
Expand All @@ -46,9 +45,9 @@
from . import AbstractTPCStateDatabaseAvailable
from .finish import Finish

LOG_LEVEL_TID = DEBUG
LOG_LEVEL_TID = logging.DEBUG

logger = __import__('logging').getLogger(__name__)
logger = logging.getLogger(__name__)
perf_logger = logger.getChild('timing')

class DatabaseLockedForTid(object):
Expand Down Expand Up @@ -663,14 +662,36 @@ def loadSerial(self, oid, serial):
assert bytes8_to_int64(serial) == tid
return state

#: The maximum number of characters to allow in each portion
#: of lock detail errors.
DETAIL_TRUNCATION_LEN = get_positive_integer_from_environ(
'RS_LOCK_ERROR_MAX_LEN',
# 2k seems a reasonable default. If you are actually getting
# truncated, then chances are the transaction is much larger
# than that
2048,
logger=logger
)

def add_details_to_lock_error(ex, shared_state, required_tids):
# type: (Exception, SharedState, required_tids)

obj_msg = str(shared_state.temp_storage) if shared_state.has_temp_data() else 'None'
tid_msg = str(dict(required_tids)) # May be a BTree, which has no useful str/repr

obj_msg = (obj_msg[:DETAIL_TRUNCATION_LEN] + '...'
if len(obj_msg) > DETAIL_TRUNCATION_LEN
else obj_msg)
tid_msg = (tid_msg[:DETAIL_TRUNCATION_LEN] + '...'
if len(tid_msg) > DETAIL_TRUNCATION_LEN
else tid_msg)

message = '\n'.join((
'Stored Objects',
str(shared_state.temp_storage) if shared_state.has_temp_data() else 'None',
'readCurrent {oid: tid}',
str(dict(required_tids)) # May be a BTree, which has no
))
'Stored Objects',
obj_msg,
'readCurrent {oid: tid}',
tid_msg,
))

if hasattr(ex, 'message'):
# A ConflictError subclass *or* we're on Python 2.
Expand Down
Loading